diff --git "a/med-Llava3-instruct/trainer_state.json" "b/med-Llava3-instruct/trainer_state.json" --- "a/med-Llava3-instruct/trainer_state.json" +++ "b/med-Llava3-instruct/trainer_state.json" @@ -3,100172 +3,148745 @@ "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, - "global_step": 14307, + "global_step": 21246, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.00020968756552736424, - "grad_norm": 44.581535249025066, - "learning_rate": 4.6511627906976744e-08, - "loss": 5.2578, + "epoch": 0.0001412030499858797, + "grad_norm": 11.016289456388648, + "learning_rate": 3.134796238244514e-08, + "loss": 1.2185, "step": 1 }, { - "epoch": 0.0004193751310547285, - "grad_norm": 55.97575030060379, - "learning_rate": 9.302325581395349e-08, - "loss": 5.1115, + "epoch": 0.0002824060999717594, + "grad_norm": 8.120133111158005, + "learning_rate": 6.269592476489028e-08, + "loss": 1.1331, "step": 2 }, { - "epoch": 0.0006290626965820927, - "grad_norm": 45.6807323797568, - "learning_rate": 1.3953488372093024e-07, - "loss": 5.5506, + "epoch": 0.0004236091499576391, + "grad_norm": 7.250787374374918, + "learning_rate": 9.404388714733543e-08, + "loss": 1.0514, "step": 3 }, { - "epoch": 0.000838750262109457, - "grad_norm": 44.58814275824773, - "learning_rate": 1.8604651162790698e-07, - "loss": 5.3761, + "epoch": 0.0005648121999435188, + "grad_norm": 7.695053669816028, + "learning_rate": 1.2539184952978057e-07, + "loss": 1.0558, "step": 4 }, { - "epoch": 0.0010484378276368212, - "grad_norm": 45.86331102678746, - "learning_rate": 2.3255813953488374e-07, - "loss": 5.4749, + "epoch": 0.0007060152499293985, + "grad_norm": 9.360748970299243, + "learning_rate": 1.567398119122257e-07, + "loss": 1.1812, "step": 5 }, { - "epoch": 0.0012581253931641854, - "grad_norm": 50.1396833416372, - "learning_rate": 2.790697674418605e-07, - "loss": 5.4041, + "epoch": 0.0008472182999152782, + "grad_norm": 8.227430059936243, + "learning_rate": 1.8808777429467086e-07, + "loss": 1.0638, "step": 6 }, { - "epoch": 0.0014678129586915495, - "grad_norm": 43.36356048315389, - "learning_rate": 3.2558139534883724e-07, - "loss": 5.3444, + "epoch": 0.000988421349901158, + "grad_norm": 11.252963008349193, + "learning_rate": 2.19435736677116e-07, + "loss": 1.5209, "step": 7 }, { - "epoch": 0.001677500524218914, - "grad_norm": 41.237362194380275, - "learning_rate": 3.7209302325581396e-07, - "loss": 5.0789, + "epoch": 0.0011296243998870376, + "grad_norm": 9.05785886340715, + "learning_rate": 2.5078369905956113e-07, + "loss": 1.3215, "step": 8 }, { - "epoch": 0.001887188089746278, - "grad_norm": 44.6268570337259, - "learning_rate": 4.186046511627907e-07, - "loss": 5.225, + "epoch": 0.0012708274498729173, + "grad_norm": 7.12566384828119, + "learning_rate": 2.821316614420063e-07, + "loss": 1.1655, "step": 9 }, { - "epoch": 0.0020968756552736424, - "grad_norm": 41.93516500698109, - "learning_rate": 4.651162790697675e-07, - "loss": 5.251, + "epoch": 0.001412030499858797, + "grad_norm": 9.534474543883421, + "learning_rate": 3.134796238244514e-07, + "loss": 1.2507, "step": 10 }, { - "epoch": 0.0023065632208010066, - "grad_norm": 45.66797322674655, - "learning_rate": 5.116279069767442e-07, - "loss": 5.2581, + "epoch": 0.0015532335498446767, + "grad_norm": 8.249516296143845, + "learning_rate": 3.4482758620689656e-07, + "loss": 1.311, "step": 11 }, { - "epoch": 0.0025162507863283707, - "grad_norm": 45.92240921350971, - "learning_rate": 5.58139534883721e-07, - "loss": 5.2577, + "epoch": 0.0016944365998305564, + "grad_norm": 7.883106644572163, + "learning_rate": 3.761755485893417e-07, + "loss": 1.0434, "step": 12 }, { - "epoch": 0.002725938351855735, - "grad_norm": 40.551544063218365, - "learning_rate": 6.046511627906977e-07, - "loss": 5.4346, + "epoch": 0.0018356396498164361, + "grad_norm": 7.800790851628648, + "learning_rate": 4.0752351097178683e-07, + "loss": 1.1142, "step": 13 }, { - "epoch": 0.002935625917383099, - "grad_norm": 38.95477715120408, - "learning_rate": 6.511627906976745e-07, - "loss": 4.9173, + "epoch": 0.001976842699802316, + "grad_norm": 8.734356264599777, + "learning_rate": 4.38871473354232e-07, + "loss": 1.1967, "step": 14 }, { - "epoch": 0.003145313482910463, - "grad_norm": 42.505450995173696, - "learning_rate": 6.976744186046513e-07, - "loss": 5.184, + "epoch": 0.0021180457497881953, + "grad_norm": 9.706526880499672, + "learning_rate": 4.7021943573667715e-07, + "loss": 1.1889, "step": 15 }, { - "epoch": 0.003355001048437828, - "grad_norm": 46.532900317909046, - "learning_rate": 7.441860465116279e-07, - "loss": 5.206, + "epoch": 0.0022592487997740753, + "grad_norm": 9.839295917755758, + "learning_rate": 5.015673981191223e-07, + "loss": 1.1732, "step": 16 }, { - "epoch": 0.003564688613965192, - "grad_norm": 35.2464767755706, - "learning_rate": 7.906976744186047e-07, - "loss": 4.9951, + "epoch": 0.0024004518497599547, + "grad_norm": 8.631626705724988, + "learning_rate": 5.329153605015674e-07, + "loss": 1.1017, "step": 17 }, { - "epoch": 0.003774376179492556, - "grad_norm": 38.354300316731944, - "learning_rate": 8.372093023255814e-07, - "loss": 5.0014, + "epoch": 0.0025416548997458347, + "grad_norm": 10.755118935271149, + "learning_rate": 5.642633228840126e-07, + "loss": 1.4075, "step": 18 }, { - "epoch": 0.00398406374501992, - "grad_norm": 39.309026236411896, - "learning_rate": 8.837209302325582e-07, - "loss": 4.9677, + "epoch": 0.002682857949731714, + "grad_norm": 10.148848929833456, + "learning_rate": 5.956112852664577e-07, + "loss": 1.2353, "step": 19 }, { - "epoch": 0.004193751310547285, - "grad_norm": 30.962161376619925, - "learning_rate": 9.30232558139535e-07, - "loss": 4.6954, + "epoch": 0.002824060999717594, + "grad_norm": 7.610801905336504, + "learning_rate": 6.269592476489028e-07, + "loss": 1.0546, "step": 20 }, { - "epoch": 0.0044034388760746486, - "grad_norm": 31.716183882767574, - "learning_rate": 9.767441860465117e-07, - "loss": 4.9276, + "epoch": 0.0029652640497034735, + "grad_norm": 8.24765505083955, + "learning_rate": 6.58307210031348e-07, + "loss": 1.056, "step": 21 }, { - "epoch": 0.004613126441602013, - "grad_norm": 28.029713344025122, - "learning_rate": 1.0232558139534884e-06, - "loss": 4.4202, + "epoch": 0.0031064670996893535, + "grad_norm": 9.498512915102472, + "learning_rate": 6.896551724137931e-07, + "loss": 1.4084, "step": 22 }, { - "epoch": 0.004822814007129377, - "grad_norm": 29.411298964471545, - "learning_rate": 1.0697674418604653e-06, - "loss": 4.2975, + "epoch": 0.003247670149675233, + "grad_norm": 8.45574073987412, + "learning_rate": 7.210031347962383e-07, + "loss": 1.2365, "step": 23 }, { - "epoch": 0.0050325015726567415, - "grad_norm": 31.397717047382393, - "learning_rate": 1.116279069767442e-06, - "loss": 4.7745, + "epoch": 0.003388873199661113, + "grad_norm": 7.381880650658492, + "learning_rate": 7.523510971786834e-07, + "loss": 1.1363, "step": 24 }, { - "epoch": 0.005242189138184106, - "grad_norm": 30.17266535106565, - "learning_rate": 1.1627906976744188e-06, - "loss": 4.4112, + "epoch": 0.0035300762496469924, + "grad_norm": 9.82656867976583, + "learning_rate": 7.836990595611286e-07, + "loss": 1.185, "step": 25 }, { - "epoch": 0.00545187670371147, - "grad_norm": 28.864912852486626, - "learning_rate": 1.2093023255813954e-06, - "loss": 4.6884, + "epoch": 0.0036712792996328723, + "grad_norm": 7.8957415859238775, + "learning_rate": 8.150470219435737e-07, + "loss": 1.0926, "step": 26 }, { - "epoch": 0.005661564269238834, - "grad_norm": 27.584213780426506, - "learning_rate": 1.2558139534883723e-06, - "loss": 4.287, + "epoch": 0.0038124823496187518, + "grad_norm": 9.804516056540306, + "learning_rate": 8.463949843260188e-07, + "loss": 1.0909, "step": 27 }, { - "epoch": 0.005871251834766198, - "grad_norm": 32.516567618012104, - "learning_rate": 1.302325581395349e-06, - "loss": 4.7153, + "epoch": 0.003953685399604632, + "grad_norm": 10.38514489323193, + "learning_rate": 8.77742946708464e-07, + "loss": 1.2265, "step": 28 }, { - "epoch": 0.006080939400293563, - "grad_norm": 28.518458947751625, - "learning_rate": 1.3488372093023258e-06, - "loss": 4.2179, + "epoch": 0.004094888449590511, + "grad_norm": 9.613709288614304, + "learning_rate": 9.090909090909091e-07, + "loss": 1.0937, "step": 29 }, { - "epoch": 0.006290626965820926, - "grad_norm": 32.01356840067489, - "learning_rate": 1.3953488372093025e-06, - "loss": 4.0782, + "epoch": 0.004236091499576391, + "grad_norm": 8.52739686979812, + "learning_rate": 9.404388714733543e-07, + "loss": 1.2375, "step": 30 }, { - "epoch": 0.006500314531348291, - "grad_norm": 30.306645519671417, - "learning_rate": 1.4418604651162794e-06, - "loss": 4.2055, + "epoch": 0.00437729454956227, + "grad_norm": 8.407880490682919, + "learning_rate": 9.717868338557995e-07, + "loss": 0.9995, "step": 31 }, { - "epoch": 0.006710002096875656, - "grad_norm": 21.555136777198957, - "learning_rate": 1.4883720930232558e-06, - "loss": 3.7999, + "epoch": 0.0045184975995481505, + "grad_norm": 8.818852133609342, + "learning_rate": 1.0031347962382445e-06, + "loss": 0.9853, "step": 32 }, { - "epoch": 0.006919689662403019, - "grad_norm": 16.01301527346332, - "learning_rate": 1.534883720930233e-06, - "loss": 3.8126, + "epoch": 0.00465970064953403, + "grad_norm": 7.6608076447519124, + "learning_rate": 1.0344827586206898e-06, + "loss": 1.027, "step": 33 }, { - "epoch": 0.007129377227930384, - "grad_norm": 17.677733048222027, - "learning_rate": 1.5813953488372093e-06, - "loss": 3.6181, + "epoch": 0.0048009036995199095, + "grad_norm": 7.045474268264931, + "learning_rate": 1.0658307210031348e-06, + "loss": 0.9492, "step": 34 }, { - "epoch": 0.007339064793457748, - "grad_norm": 22.56973075579131, - "learning_rate": 1.6279069767441862e-06, - "loss": 4.0219, + "epoch": 0.004942106749505789, + "grad_norm": 7.997538865617388, + "learning_rate": 1.0971786833855801e-06, + "loss": 1.1267, "step": 35 }, { - "epoch": 0.007548752358985112, - "grad_norm": 19.22863542781956, - "learning_rate": 1.6744186046511629e-06, - "loss": 3.684, + "epoch": 0.005083309799491669, + "grad_norm": 7.878590188779293, + "learning_rate": 1.1285266457680252e-06, + "loss": 1.2463, "step": 36 }, { - "epoch": 0.007758439924512477, - "grad_norm": 41.136305365288855, - "learning_rate": 1.7209302325581397e-06, - "loss": 3.8011, + "epoch": 0.005224512849477549, + "grad_norm": 8.243444524822978, + "learning_rate": 1.1598746081504702e-06, + "loss": 1.1125, "step": 37 }, { - "epoch": 0.00796812749003984, - "grad_norm": 26.64658586090878, - "learning_rate": 1.7674418604651164e-06, - "loss": 3.7584, + "epoch": 0.005365715899463428, + "grad_norm": 7.498789580843632, + "learning_rate": 1.1912225705329155e-06, + "loss": 1.1791, "step": 38 }, { - "epoch": 0.008177815055567205, - "grad_norm": 18.393267879776605, - "learning_rate": 1.8139534883720933e-06, - "loss": 3.9381, + "epoch": 0.005506918949449308, + "grad_norm": 8.711680777445437, + "learning_rate": 1.2225705329153605e-06, + "loss": 1.1524, "step": 39 }, { - "epoch": 0.00838750262109457, - "grad_norm": 14.15050940717148, - "learning_rate": 1.86046511627907e-06, - "loss": 3.3514, + "epoch": 0.005648121999435188, + "grad_norm": 9.095212085184661, + "learning_rate": 1.2539184952978056e-06, + "loss": 1.1614, "step": 40 }, { - "epoch": 0.008597190186621933, - "grad_norm": 17.425435117299486, - "learning_rate": 1.9069767441860468e-06, - "loss": 3.4709, + "epoch": 0.005789325049421068, + "grad_norm": 8.076909928890133, + "learning_rate": 1.2852664576802509e-06, + "loss": 1.0627, "step": 41 }, { - "epoch": 0.008806877752149297, - "grad_norm": 20.13210149604207, - "learning_rate": 1.9534883720930235e-06, - "loss": 3.5707, + "epoch": 0.005930528099406947, + "grad_norm": 7.877567616556983, + "learning_rate": 1.316614420062696e-06, + "loss": 1.1967, "step": 42 }, { - "epoch": 0.009016565317676662, - "grad_norm": 14.29131526927446, - "learning_rate": 2.0000000000000003e-06, - "loss": 3.4287, + "epoch": 0.006071731149392827, + "grad_norm": 7.811444399828281, + "learning_rate": 1.3479623824451412e-06, + "loss": 1.1811, "step": 43 }, { - "epoch": 0.009226252883204026, - "grad_norm": 17.350463360046255, - "learning_rate": 2.0465116279069768e-06, - "loss": 3.601, + "epoch": 0.006212934199378707, + "grad_norm": 10.697303496487786, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1331, "step": 44 }, { - "epoch": 0.009435940448731391, - "grad_norm": 12.955524208006738, - "learning_rate": 2.0930232558139536e-06, - "loss": 3.6587, + "epoch": 0.006354137249364586, + "grad_norm": 8.077993683082106, + "learning_rate": 1.4106583072100313e-06, + "loss": 0.9725, "step": 45 }, { - "epoch": 0.009645628014258754, - "grad_norm": 11.725416839586316, - "learning_rate": 2.1395348837209305e-06, - "loss": 3.5439, + "epoch": 0.006495340299350466, + "grad_norm": 6.865107269829995, + "learning_rate": 1.4420062695924766e-06, + "loss": 1.2191, "step": 46 }, { - "epoch": 0.009855315579786118, - "grad_norm": 12.251306517358946, - "learning_rate": 2.1860465116279074e-06, - "loss": 3.5524, + "epoch": 0.006636543349336345, + "grad_norm": 7.078868194913755, + "learning_rate": 1.4733542319749216e-06, + "loss": 1.1047, "step": 47 }, { - "epoch": 0.010065003145313483, - "grad_norm": 12.012362016650496, - "learning_rate": 2.232558139534884e-06, - "loss": 3.4082, + "epoch": 0.006777746399322226, + "grad_norm": 7.673040623741436, + "learning_rate": 1.5047021943573669e-06, + "loss": 1.0038, "step": 48 }, { - "epoch": 0.010274690710840848, - "grad_norm": 13.29171390002502, - "learning_rate": 2.2790697674418607e-06, - "loss": 3.5815, + "epoch": 0.006918949449308105, + "grad_norm": 7.583881587623022, + "learning_rate": 1.536050156739812e-06, + "loss": 1.0244, "step": 49 }, { - "epoch": 0.010484378276368212, - "grad_norm": 9.96477777092122, - "learning_rate": 2.3255813953488376e-06, - "loss": 3.3192, + "epoch": 0.007060152499293985, + "grad_norm": 9.38341982167418, + "learning_rate": 1.5673981191222572e-06, + "loss": 0.9941, "step": 50 }, { - "epoch": 0.010694065841895575, - "grad_norm": 9.602017188577209, - "learning_rate": 2.3720930232558144e-06, - "loss": 3.4421, + "epoch": 0.007201355549279864, + "grad_norm": 8.912233805021561, + "learning_rate": 1.5987460815047023e-06, + "loss": 1.0193, "step": 51 }, { - "epoch": 0.01090375340742294, - "grad_norm": 10.66111999130771, - "learning_rate": 2.418604651162791e-06, - "loss": 3.5158, + "epoch": 0.0073425585992657446, + "grad_norm": 7.778349403085027, + "learning_rate": 1.6300940438871473e-06, + "loss": 1.0961, "step": 52 }, { - "epoch": 0.011113440972950304, - "grad_norm": 10.848278733863214, - "learning_rate": 2.4651162790697678e-06, - "loss": 3.4538, + "epoch": 0.007483761649251624, + "grad_norm": 6.973044642406554, + "learning_rate": 1.6614420062695926e-06, + "loss": 0.9429, "step": 53 }, { - "epoch": 0.011323128538477669, - "grad_norm": 10.385904100498394, - "learning_rate": 2.5116279069767446e-06, - "loss": 3.4165, + "epoch": 0.0076249646992375035, + "grad_norm": 7.273640449409624, + "learning_rate": 1.6927899686520376e-06, + "loss": 0.9659, "step": 54 }, { - "epoch": 0.011532816104005033, - "grad_norm": 9.79849245706584, - "learning_rate": 2.558139534883721e-06, - "loss": 3.3661, + "epoch": 0.007766167749223383, + "grad_norm": 8.45866624072379, + "learning_rate": 1.724137931034483e-06, + "loss": 1.0845, "step": 55 }, { - "epoch": 0.011742503669532396, - "grad_norm": 10.477143016621993, - "learning_rate": 2.604651162790698e-06, - "loss": 3.5001, + "epoch": 0.007907370799209263, + "grad_norm": 7.933551688799783, + "learning_rate": 1.755485893416928e-06, + "loss": 0.9936, "step": 56 }, { - "epoch": 0.01195219123505976, - "grad_norm": 11.313800543376221, - "learning_rate": 2.6511627906976744e-06, - "loss": 3.3892, + "epoch": 0.008048573849195143, + "grad_norm": 7.3752957818911975, + "learning_rate": 1.786833855799373e-06, + "loss": 1.2214, "step": 57 }, { - "epoch": 0.012161878800587125, - "grad_norm": 10.142488883977713, - "learning_rate": 2.6976744186046517e-06, - "loss": 3.5476, + "epoch": 0.008189776899181022, + "grad_norm": 7.759633648039131, + "learning_rate": 1.8181818181818183e-06, + "loss": 1.1288, "step": 58 }, { - "epoch": 0.01237156636611449, - "grad_norm": 9.658386187848318, - "learning_rate": 2.744186046511628e-06, - "loss": 3.6256, + "epoch": 0.008330979949166902, + "grad_norm": 8.297481290277021, + "learning_rate": 1.8495297805642633e-06, + "loss": 1.0711, "step": 59 }, { - "epoch": 0.012581253931641853, - "grad_norm": 9.716817971019752, - "learning_rate": 2.790697674418605e-06, - "loss": 3.1563, + "epoch": 0.008472182999152781, + "grad_norm": 7.707348532551644, + "learning_rate": 1.8808777429467086e-06, + "loss": 1.1045, "step": 60 }, { - "epoch": 0.012790941497169217, - "grad_norm": 10.080634586272732, - "learning_rate": 2.8372093023255815e-06, - "loss": 3.0472, + "epoch": 0.00861338604913866, + "grad_norm": 8.440734580994208, + "learning_rate": 1.9122257053291537e-06, + "loss": 1.2053, "step": 61 }, { - "epoch": 0.013000629062696582, - "grad_norm": 8.278465166902485, - "learning_rate": 2.8837209302325587e-06, - "loss": 3.3681, + "epoch": 0.00875458909912454, + "grad_norm": 9.379713463539574, + "learning_rate": 1.943573667711599e-06, + "loss": 1.0985, "step": 62 }, { - "epoch": 0.013210316628223947, - "grad_norm": 9.487536575067205, - "learning_rate": 2.930232558139535e-06, - "loss": 3.0451, + "epoch": 0.008895792149110422, + "grad_norm": 8.393582865795228, + "learning_rate": 1.974921630094044e-06, + "loss": 1.1986, "step": 63 }, { - "epoch": 0.013420004193751311, - "grad_norm": 8.906592766331341, - "learning_rate": 2.9767441860465116e-06, - "loss": 2.9602, + "epoch": 0.009036995199096301, + "grad_norm": 8.07011113417986, + "learning_rate": 2.006269592476489e-06, + "loss": 1.0335, "step": 64 }, { - "epoch": 0.013629691759278674, - "grad_norm": 10.169538227681718, - "learning_rate": 3.0232558139534885e-06, - "loss": 3.1515, + "epoch": 0.00917819824908218, + "grad_norm": 7.694745925151725, + "learning_rate": 2.0376175548589343e-06, + "loss": 1.0446, "step": 65 }, { - "epoch": 0.013839379324806039, - "grad_norm": 9.69295874549237, - "learning_rate": 3.069767441860466e-06, - "loss": 3.0275, + "epoch": 0.00931940129906806, + "grad_norm": 7.214117451209332, + "learning_rate": 2.0689655172413796e-06, + "loss": 0.9772, "step": 66 }, { - "epoch": 0.014049066890333403, - "grad_norm": 8.417351385432722, - "learning_rate": 3.1162790697674423e-06, - "loss": 3.1228, + "epoch": 0.00946060434905394, + "grad_norm": 8.105960770628542, + "learning_rate": 2.100313479623825e-06, + "loss": 1.0872, "step": 67 }, { - "epoch": 0.014258754455860768, - "grad_norm": 8.359946297125934, - "learning_rate": 3.1627906976744187e-06, - "loss": 3.1764, + "epoch": 0.009601807399039819, + "grad_norm": 6.543049973253576, + "learning_rate": 2.1316614420062697e-06, + "loss": 0.8508, "step": 68 }, { - "epoch": 0.014468442021388132, - "grad_norm": 8.586224181948893, - "learning_rate": 3.2093023255813956e-06, - "loss": 3.35, + "epoch": 0.009743010449025698, + "grad_norm": 6.828975668478929, + "learning_rate": 2.163009404388715e-06, + "loss": 0.7794, "step": 69 }, { - "epoch": 0.014678129586915495, - "grad_norm": 7.775800455437223, - "learning_rate": 3.2558139534883724e-06, - "loss": 3.3618, + "epoch": 0.009884213499011578, + "grad_norm": 8.179384371223852, + "learning_rate": 2.1943573667711602e-06, + "loss": 0.9591, "step": 70 }, { - "epoch": 0.01488781715244286, - "grad_norm": 8.009191898773206, - "learning_rate": 3.3023255813953493e-06, - "loss": 3.3176, + "epoch": 0.01002541654899746, + "grad_norm": 8.219708247974477, + "learning_rate": 2.225705329153605e-06, + "loss": 1.162, "step": 71 }, { - "epoch": 0.015097504717970224, - "grad_norm": 8.690797306545317, - "learning_rate": 3.3488372093023258e-06, - "loss": 3.4012, + "epoch": 0.010166619598983339, + "grad_norm": 9.237842698268448, + "learning_rate": 2.2570532915360503e-06, + "loss": 1.1298, "step": 72 }, { - "epoch": 0.015307192283497589, - "grad_norm": 6.759160640370161, - "learning_rate": 3.3953488372093026e-06, - "loss": 3.379, + "epoch": 0.010307822648969218, + "grad_norm": 7.538555016516361, + "learning_rate": 2.2884012539184956e-06, + "loss": 0.9263, "step": 73 }, { - "epoch": 0.015516879849024954, - "grad_norm": 8.063201572053002, - "learning_rate": 3.4418604651162795e-06, - "loss": 3.1205, + "epoch": 0.010449025698955098, + "grad_norm": 7.797504601380168, + "learning_rate": 2.3197492163009404e-06, + "loss": 0.9031, "step": 74 }, { - "epoch": 0.015726567414552316, - "grad_norm": 8.382017181883159, - "learning_rate": 3.4883720930232564e-06, - "loss": 3.0159, + "epoch": 0.010590228748940977, + "grad_norm": 7.452472518693939, + "learning_rate": 2.3510971786833857e-06, + "loss": 1.0156, "step": 75 }, { - "epoch": 0.01593625498007968, - "grad_norm": 7.381364777071937, - "learning_rate": 3.534883720930233e-06, - "loss": 3.0113, + "epoch": 0.010731431798926857, + "grad_norm": 6.490277329807399, + "learning_rate": 2.382445141065831e-06, + "loss": 0.9288, "step": 76 }, { - "epoch": 0.016145942545607046, - "grad_norm": 7.181591548601563, - "learning_rate": 3.5813953488372093e-06, - "loss": 3.2055, + "epoch": 0.010872634848912736, + "grad_norm": 9.092323356828192, + "learning_rate": 2.4137931034482762e-06, + "loss": 0.9758, "step": 77 }, { - "epoch": 0.01635563011113441, - "grad_norm": 8.067824850658068, - "learning_rate": 3.6279069767441866e-06, - "loss": 3.1375, + "epoch": 0.011013837898898616, + "grad_norm": 7.399506935121757, + "learning_rate": 2.445141065830721e-06, + "loss": 1.0961, "step": 78 }, { - "epoch": 0.016565317676661775, - "grad_norm": 8.90045917292892, - "learning_rate": 3.674418604651163e-06, - "loss": 3.143, + "epoch": 0.011155040948884497, + "grad_norm": 8.763661517197843, + "learning_rate": 2.4764890282131664e-06, + "loss": 0.9974, "step": 79 }, { - "epoch": 0.01677500524218914, - "grad_norm": 7.428311648268624, - "learning_rate": 3.72093023255814e-06, - "loss": 3.0114, + "epoch": 0.011296243998870376, + "grad_norm": 8.823673726434475, + "learning_rate": 2.507836990595611e-06, + "loss": 1.0356, "step": 80 }, { - "epoch": 0.016984692807716504, - "grad_norm": 8.18283591778806, - "learning_rate": 3.7674418604651163e-06, - "loss": 3.2888, + "epoch": 0.011437447048856256, + "grad_norm": 9.43989059848618, + "learning_rate": 2.5391849529780565e-06, + "loss": 1.206, "step": 81 }, { - "epoch": 0.017194380373243865, - "grad_norm": 7.144937554022569, - "learning_rate": 3.8139534883720936e-06, - "loss": 3.0265, + "epoch": 0.011578650098842135, + "grad_norm": 8.819174428360991, + "learning_rate": 2.5705329153605017e-06, + "loss": 1.033, "step": 82 }, { - "epoch": 0.01740406793877123, - "grad_norm": 7.809265858472188, - "learning_rate": 3.86046511627907e-06, - "loss": 2.8489, + "epoch": 0.011719853148828015, + "grad_norm": 8.598032169347169, + "learning_rate": 2.601880877742947e-06, + "loss": 0.9142, "step": 83 }, { - "epoch": 0.017613755504298594, - "grad_norm": 7.86043319583929, - "learning_rate": 3.906976744186047e-06, - "loss": 3.0097, + "epoch": 0.011861056198813894, + "grad_norm": 7.929972401192863, + "learning_rate": 2.633228840125392e-06, + "loss": 1.0614, "step": 84 }, { - "epoch": 0.01782344306982596, - "grad_norm": 8.358068065440838, - "learning_rate": 3.953488372093024e-06, - "loss": 3.1544, + "epoch": 0.012002259248799774, + "grad_norm": 6.9944529439088665, + "learning_rate": 2.664576802507837e-06, + "loss": 0.9095, "step": 85 }, { - "epoch": 0.018033130635353323, - "grad_norm": 8.32027460266491, - "learning_rate": 4.000000000000001e-06, - "loss": 3.0539, + "epoch": 0.012143462298785653, + "grad_norm": 7.736966107250294, + "learning_rate": 2.6959247648902824e-06, + "loss": 0.9155, "step": 86 }, { - "epoch": 0.018242818200880688, - "grad_norm": 8.361733270607763, - "learning_rate": 4.0465116279069775e-06, - "loss": 3.0993, + "epoch": 0.012284665348771533, + "grad_norm": 7.0155854768729755, + "learning_rate": 2.7272727272727272e-06, + "loss": 0.8631, "step": 87 }, { - "epoch": 0.018452505766408053, - "grad_norm": 8.244772580707666, - "learning_rate": 4.0930232558139536e-06, - "loss": 2.9546, + "epoch": 0.012425868398757414, + "grad_norm": 7.412422751254611, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9165, "step": 88 }, { - "epoch": 0.018662193331935417, - "grad_norm": 7.606218973040087, - "learning_rate": 4.1395348837209304e-06, - "loss": 3.1352, + "epoch": 0.012567071448743293, + "grad_norm": 8.143433946568083, + "learning_rate": 2.7899686520376178e-06, + "loss": 0.8537, "step": 89 }, { - "epoch": 0.018871880897462782, - "grad_norm": 7.1326650064224895, - "learning_rate": 4.186046511627907e-06, - "loss": 3.0812, + "epoch": 0.012708274498729173, + "grad_norm": 8.150750830633141, + "learning_rate": 2.8213166144200626e-06, + "loss": 0.9352, "step": 90 }, { - "epoch": 0.019081568462990146, - "grad_norm": 6.9993119864944635, - "learning_rate": 4.232558139534884e-06, - "loss": 3.0158, + "epoch": 0.012849477548715052, + "grad_norm": 7.508385509036284, + "learning_rate": 2.852664576802508e-06, + "loss": 0.9612, "step": 91 }, { - "epoch": 0.019291256028517507, - "grad_norm": 6.679637342259109, - "learning_rate": 4.279069767441861e-06, - "loss": 3.0143, + "epoch": 0.012990680598700932, + "grad_norm": 7.447685254764651, + "learning_rate": 2.884012539184953e-06, + "loss": 0.9174, "step": 92 }, { - "epoch": 0.019500943594044872, - "grad_norm": 7.0219081176472145, - "learning_rate": 4.325581395348837e-06, - "loss": 3.1109, + "epoch": 0.013131883648686811, + "grad_norm": 6.938010829660382, + "learning_rate": 2.9153605015673984e-06, + "loss": 0.8738, "step": 93 }, { - "epoch": 0.019710631159572237, - "grad_norm": 7.399432103121332, - "learning_rate": 4.372093023255815e-06, - "loss": 3.1689, + "epoch": 0.01327308669867269, + "grad_norm": 6.2478036866826265, + "learning_rate": 2.9467084639498432e-06, + "loss": 0.883, "step": 94 }, { - "epoch": 0.0199203187250996, - "grad_norm": 7.67344627391521, - "learning_rate": 4.418604651162791e-06, - "loss": 3.1581, + "epoch": 0.01341428974865857, + "grad_norm": 8.713346272613183, + "learning_rate": 2.9780564263322885e-06, + "loss": 0.9038, "step": 95 }, { - "epoch": 0.020130006290626966, - "grad_norm": 7.451527879617144, - "learning_rate": 4.465116279069768e-06, - "loss": 2.7311, + "epoch": 0.013555492798644452, + "grad_norm": 7.405605657939075, + "learning_rate": 3.0094043887147338e-06, + "loss": 0.8549, "step": 96 }, { - "epoch": 0.02033969385615433, - "grad_norm": 8.827861596076998, - "learning_rate": 4.5116279069767445e-06, - "loss": 3.1583, + "epoch": 0.013696695848630331, + "grad_norm": 6.889707950820388, + "learning_rate": 3.0407523510971786e-06, + "loss": 0.9198, "step": 97 }, { - "epoch": 0.020549381421681695, - "grad_norm": 8.341343814476492, - "learning_rate": 4.558139534883721e-06, - "loss": 3.0021, + "epoch": 0.01383789889861621, + "grad_norm": 8.49207098940456, + "learning_rate": 3.072100313479624e-06, + "loss": 1.0015, "step": 98 }, { - "epoch": 0.02075906898720906, - "grad_norm": 7.259040254403374, - "learning_rate": 4.604651162790698e-06, - "loss": 2.7542, + "epoch": 0.01397910194860209, + "grad_norm": 7.938481581936447, + "learning_rate": 3.103448275862069e-06, + "loss": 1.0189, "step": 99 }, { - "epoch": 0.020968756552736424, - "grad_norm": 7.265031203493301, - "learning_rate": 4.651162790697675e-06, - "loss": 3.175, + "epoch": 0.01412030499858797, + "grad_norm": 6.809269885595204, + "learning_rate": 3.1347962382445144e-06, + "loss": 0.836, "step": 100 }, { - "epoch": 0.021178444118263785, - "grad_norm": 6.62490543346344, - "learning_rate": 4.697674418604651e-06, - "loss": 2.9096, + "epoch": 0.014261508048573849, + "grad_norm": 8.36461382411018, + "learning_rate": 3.1661442006269593e-06, + "loss": 0.8465, "step": 101 }, { - "epoch": 0.02138813168379115, - "grad_norm": 6.665691734747577, - "learning_rate": 4.744186046511629e-06, - "loss": 3.0775, + "epoch": 0.014402711098559728, + "grad_norm": 9.138606230146802, + "learning_rate": 3.1974921630094045e-06, + "loss": 0.9167, "step": 102 }, { - "epoch": 0.021597819249318514, - "grad_norm": 7.073363576835186, - "learning_rate": 4.790697674418605e-06, - "loss": 3.0203, + "epoch": 0.014543914148545608, + "grad_norm": 8.378076403772889, + "learning_rate": 3.22884012539185e-06, + "loss": 1.1506, "step": 103 }, { - "epoch": 0.02180750681484588, - "grad_norm": 7.337835231721183, - "learning_rate": 4.837209302325582e-06, - "loss": 3.1326, + "epoch": 0.014685117198531489, + "grad_norm": 6.717403458248077, + "learning_rate": 3.2601880877742946e-06, + "loss": 0.7493, "step": 104 }, { - "epoch": 0.022017194380373244, - "grad_norm": 9.292910347987931, - "learning_rate": 4.883720930232559e-06, - "loss": 2.8739, + "epoch": 0.014826320248517369, + "grad_norm": 8.688997715121792, + "learning_rate": 3.29153605015674e-06, + "loss": 1.2732, "step": 105 }, { - "epoch": 0.022226881945900608, - "grad_norm": 6.884255701484647, - "learning_rate": 4.9302325581395355e-06, - "loss": 2.9463, + "epoch": 0.014967523298503248, + "grad_norm": 7.5651936824346775, + "learning_rate": 3.322884012539185e-06, + "loss": 0.8016, "step": 106 }, { - "epoch": 0.022436569511427973, - "grad_norm": 6.8751041618497695, - "learning_rate": 4.976744186046512e-06, - "loss": 2.9997, + "epoch": 0.015108726348489128, + "grad_norm": 7.775727264010563, + "learning_rate": 3.35423197492163e-06, + "loss": 0.8038, "step": 107 }, { - "epoch": 0.022646257076955337, - "grad_norm": 6.371828526115297, - "learning_rate": 5.023255813953489e-06, - "loss": 2.8357, + "epoch": 0.015249929398475007, + "grad_norm": 9.076632987472834, + "learning_rate": 3.3855799373040753e-06, + "loss": 1.1514, "step": 108 }, { - "epoch": 0.022855944642482702, - "grad_norm": 7.441845262648583, - "learning_rate": 5.069767441860466e-06, - "loss": 3.0136, + "epoch": 0.015391132448460887, + "grad_norm": 6.713356329803989, + "learning_rate": 3.4169278996865206e-06, + "loss": 0.7853, "step": 109 }, { - "epoch": 0.023065632208010067, - "grad_norm": 6.973505599652898, - "learning_rate": 5.116279069767442e-06, - "loss": 3.0189, + "epoch": 0.015532335498446766, + "grad_norm": 7.327696700434162, + "learning_rate": 3.448275862068966e-06, + "loss": 0.9567, "step": 110 }, { - "epoch": 0.023275319773537428, - "grad_norm": 8.031376102956665, - "learning_rate": 5.162790697674419e-06, - "loss": 3.1962, + "epoch": 0.015673538548432647, + "grad_norm": 7.793066393172524, + "learning_rate": 3.4796238244514107e-06, + "loss": 0.9615, "step": 111 }, { - "epoch": 0.023485007339064792, - "grad_norm": 6.405612779391031, - "learning_rate": 5.209302325581396e-06, - "loss": 2.7497, + "epoch": 0.015814741598418527, + "grad_norm": 7.799467546951294, + "learning_rate": 3.510971786833856e-06, + "loss": 0.956, "step": 112 }, { - "epoch": 0.023694694904592157, - "grad_norm": 7.762453896092673, - "learning_rate": 5.255813953488372e-06, - "loss": 2.836, + "epoch": 0.015955944648404406, + "grad_norm": 9.153361234737188, + "learning_rate": 3.542319749216301e-06, + "loss": 1.1614, "step": 113 }, { - "epoch": 0.02390438247011952, - "grad_norm": 6.567603453861468, - "learning_rate": 5.302325581395349e-06, - "loss": 3.0374, + "epoch": 0.016097147698390286, + "grad_norm": 7.816487563568254, + "learning_rate": 3.573667711598746e-06, + "loss": 0.9252, "step": 114 }, { - "epoch": 0.024114070035646886, - "grad_norm": 6.363662318269975, - "learning_rate": 5.348837209302326e-06, - "loss": 2.8409, + "epoch": 0.016238350748376165, + "grad_norm": 6.991350952981636, + "learning_rate": 3.6050156739811913e-06, + "loss": 0.9202, "step": 115 }, { - "epoch": 0.02432375760117425, - "grad_norm": 6.344329633093975, - "learning_rate": 5.395348837209303e-06, - "loss": 2.954, + "epoch": 0.016379553798362045, + "grad_norm": 8.046741425650566, + "learning_rate": 3.6363636363636366e-06, + "loss": 1.0653, "step": 116 }, { - "epoch": 0.024533445166701615, - "grad_norm": 7.114931878155349, - "learning_rate": 5.44186046511628e-06, - "loss": 2.7483, + "epoch": 0.016520756848347924, + "grad_norm": 8.206035829344017, + "learning_rate": 3.6677115987460823e-06, + "loss": 0.9128, "step": 117 }, { - "epoch": 0.02474313273222898, - "grad_norm": 6.97592248549356, - "learning_rate": 5.488372093023256e-06, - "loss": 2.8308, + "epoch": 0.016661959898333804, + "grad_norm": 7.45351581410239, + "learning_rate": 3.6990595611285267e-06, + "loss": 1.0055, "step": 118 }, { - "epoch": 0.024952820297756344, - "grad_norm": 6.749563673827139, - "learning_rate": 5.534883720930233e-06, - "loss": 3.0565, + "epoch": 0.016803162948319683, + "grad_norm": 8.422218495495787, + "learning_rate": 3.730407523510972e-06, + "loss": 0.8757, "step": 119 }, { - "epoch": 0.025162507863283706, - "grad_norm": 6.753963206841443, - "learning_rate": 5.58139534883721e-06, - "loss": 2.9406, + "epoch": 0.016944365998305563, + "grad_norm": 8.19658619309238, + "learning_rate": 3.7617554858934172e-06, + "loss": 0.876, "step": 120 }, { - "epoch": 0.02537219542881107, - "grad_norm": 7.029681435834868, - "learning_rate": 5.627906976744186e-06, - "loss": 3.0193, + "epoch": 0.017085569048291442, + "grad_norm": 8.632174035744185, + "learning_rate": 3.793103448275862e-06, + "loss": 0.9799, "step": 121 }, { - "epoch": 0.025581882994338435, - "grad_norm": 7.302060631425022, - "learning_rate": 5.674418604651163e-06, - "loss": 2.9422, + "epoch": 0.01722677209827732, + "grad_norm": 7.201975064478882, + "learning_rate": 3.824451410658307e-06, + "loss": 0.7682, "step": 122 }, { - "epoch": 0.0257915705598658, - "grad_norm": 6.279412677284418, - "learning_rate": 5.72093023255814e-06, - "loss": 3.1161, + "epoch": 0.0173679751482632, + "grad_norm": 7.41939564199568, + "learning_rate": 3.855799373040753e-06, + "loss": 0.9128, "step": 123 }, { - "epoch": 0.026001258125393164, - "grad_norm": 6.705253568776916, - "learning_rate": 5.7674418604651175e-06, - "loss": 2.968, + "epoch": 0.01750917819824908, + "grad_norm": 8.227459935092366, + "learning_rate": 3.887147335423198e-06, + "loss": 1.1737, "step": 124 }, { - "epoch": 0.02621094569092053, - "grad_norm": 7.307523262657048, - "learning_rate": 5.8139534883720935e-06, - "loss": 2.9159, + "epoch": 0.017650381248234964, + "grad_norm": 7.574187825540355, + "learning_rate": 3.918495297805643e-06, + "loss": 0.9508, "step": 125 }, { - "epoch": 0.026420633256447893, - "grad_norm": 7.026743804667868, - "learning_rate": 5.86046511627907e-06, - "loss": 3.0181, + "epoch": 0.017791584298220843, + "grad_norm": 7.630234001220519, + "learning_rate": 3.949843260188088e-06, + "loss": 0.9079, "step": 126 }, { - "epoch": 0.026630320821975258, - "grad_norm": 6.2928015872009935, - "learning_rate": 5.906976744186047e-06, - "loss": 2.7893, + "epoch": 0.017932787348206723, + "grad_norm": 8.535128471785614, + "learning_rate": 3.981191222570533e-06, + "loss": 1.0725, "step": 127 }, { - "epoch": 0.026840008387502622, - "grad_norm": 6.924752440313978, - "learning_rate": 5.953488372093023e-06, - "loss": 2.8785, + "epoch": 0.018073990398192602, + "grad_norm": 7.9385808624909275, + "learning_rate": 4.012539184952978e-06, + "loss": 0.9533, "step": 128 }, { - "epoch": 0.027049695953029987, - "grad_norm": 7.591356882506217, - "learning_rate": 6e-06, - "loss": 2.7213, + "epoch": 0.01821519344817848, + "grad_norm": 8.064381624825716, + "learning_rate": 4.043887147335424e-06, + "loss": 0.9964, "step": 129 }, { - "epoch": 0.027259383518557348, - "grad_norm": 7.315022847584364, - "learning_rate": 6.046511627906977e-06, - "loss": 2.955, + "epoch": 0.01835639649816436, + "grad_norm": 8.223493251464122, + "learning_rate": 4.075235109717869e-06, + "loss": 1.1347, "step": 130 }, { - "epoch": 0.027469071084084713, - "grad_norm": 7.933064282967163, - "learning_rate": 6.093023255813954e-06, - "loss": 3.1728, + "epoch": 0.01849759954815024, + "grad_norm": 7.183138344316719, + "learning_rate": 4.1065830721003135e-06, + "loss": 0.9797, "step": 131 }, { - "epoch": 0.027678758649612077, - "grad_norm": 6.795893029140771, - "learning_rate": 6.139534883720932e-06, - "loss": 3.042, + "epoch": 0.01863880259813612, + "grad_norm": 7.5344265624001725, + "learning_rate": 4.137931034482759e-06, + "loss": 0.897, "step": 132 }, { - "epoch": 0.027888446215139442, - "grad_norm": 7.206132100003922, - "learning_rate": 6.186046511627908e-06, - "loss": 2.9944, + "epoch": 0.018780005648122, + "grad_norm": 7.369089939683868, + "learning_rate": 4.169278996865204e-06, + "loss": 0.9249, "step": 133 }, { - "epoch": 0.028098133780666806, - "grad_norm": 6.8474156142385745, - "learning_rate": 6.2325581395348845e-06, - "loss": 2.8163, + "epoch": 0.01892120869810788, + "grad_norm": 7.1172427648489505, + "learning_rate": 4.20062695924765e-06, + "loss": 0.8232, "step": 134 }, { - "epoch": 0.02830782134619417, - "grad_norm": 7.976594233426404, - "learning_rate": 6.279069767441861e-06, - "loss": 2.8262, + "epoch": 0.01906241174809376, + "grad_norm": 7.548084196420549, + "learning_rate": 4.2319749216300945e-06, + "loss": 1.034, "step": 135 }, { - "epoch": 0.028517508911721536, - "grad_norm": 7.832725676171021, - "learning_rate": 6.325581395348837e-06, - "loss": 2.9799, + "epoch": 0.019203614798079638, + "grad_norm": 8.735116825365756, + "learning_rate": 4.263322884012539e-06, + "loss": 1.0003, "step": 136 }, { - "epoch": 0.0287271964772489, - "grad_norm": 7.5292709231393244, - "learning_rate": 6.372093023255814e-06, - "loss": 3.036, + "epoch": 0.019344817848065517, + "grad_norm": 8.179101551617698, + "learning_rate": 4.294670846394985e-06, + "loss": 0.8194, "step": 137 }, { - "epoch": 0.028936884042776265, - "grad_norm": 6.940805943484372, - "learning_rate": 6.418604651162791e-06, - "loss": 2.8853, + "epoch": 0.019486020898051397, + "grad_norm": 7.33820082796791, + "learning_rate": 4.32601880877743e-06, + "loss": 0.8458, "step": 138 }, { - "epoch": 0.02914657160830363, - "grad_norm": 6.746971856261738, - "learning_rate": 6.465116279069767e-06, - "loss": 3.0046, + "epoch": 0.019627223948037276, + "grad_norm": 6.577869920045354, + "learning_rate": 4.357366771159875e-06, + "loss": 0.835, "step": 139 }, { - "epoch": 0.02935625917383099, - "grad_norm": 6.2941878059733956, - "learning_rate": 6.511627906976745e-06, - "loss": 3.0217, + "epoch": 0.019768426998023156, + "grad_norm": 8.818016421185366, + "learning_rate": 4.3887147335423205e-06, + "loss": 1.2279, "step": 140 }, { - "epoch": 0.029565946739358355, - "grad_norm": 6.701930124928513, - "learning_rate": 6.558139534883722e-06, - "loss": 3.2442, + "epoch": 0.019909630048009035, + "grad_norm": 8.521624173184197, + "learning_rate": 4.420062695924765e-06, + "loss": 1.0621, "step": 141 }, { - "epoch": 0.02977563430488572, - "grad_norm": 7.288812866234423, - "learning_rate": 6.604651162790699e-06, - "loss": 2.7264, + "epoch": 0.02005083309799492, + "grad_norm": 7.32891975414404, + "learning_rate": 4.45141065830721e-06, + "loss": 1.0236, "step": 142 }, { - "epoch": 0.029985321870413084, - "grad_norm": 8.024095492047232, - "learning_rate": 6.651162790697675e-06, - "loss": 2.9517, + "epoch": 0.020192036147980798, + "grad_norm": 7.525756473002361, + "learning_rate": 4.482758620689656e-06, + "loss": 1.0636, "step": 143 }, { - "epoch": 0.03019500943594045, - "grad_norm": 6.646462356240576, - "learning_rate": 6.6976744186046515e-06, - "loss": 2.8886, + "epoch": 0.020333239197966677, + "grad_norm": 7.719305967369412, + "learning_rate": 4.514106583072101e-06, + "loss": 1.0331, "step": 144 }, { - "epoch": 0.030404697001467813, - "grad_norm": 7.4303408597697835, - "learning_rate": 6.744186046511628e-06, - "loss": 2.6785, + "epoch": 0.020474442247952557, + "grad_norm": 7.399362698492069, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.9871, "step": 145 }, { - "epoch": 0.030614384566995178, - "grad_norm": 7.469936851895788, - "learning_rate": 6.790697674418605e-06, - "loss": 3.0306, + "epoch": 0.020615645297938436, + "grad_norm": 6.679701389904611, + "learning_rate": 4.576802507836991e-06, + "loss": 1.0552, "step": 146 }, { - "epoch": 0.030824072132522543, - "grad_norm": 5.665115038975986, - "learning_rate": 6.837209302325581e-06, - "loss": 2.6079, + "epoch": 0.020756848347924316, + "grad_norm": 8.011343652619376, + "learning_rate": 4.608150470219436e-06, + "loss": 1.0866, "step": 147 }, { - "epoch": 0.031033759698049907, - "grad_norm": 6.743437262628165, - "learning_rate": 6.883720930232559e-06, - "loss": 2.8679, + "epoch": 0.020898051397910195, + "grad_norm": 6.958944421322256, + "learning_rate": 4.639498432601881e-06, + "loss": 1.0084, "step": 148 }, { - "epoch": 0.031243447263577268, - "grad_norm": 6.244858976220253, - "learning_rate": 6.930232558139536e-06, - "loss": 2.7485, + "epoch": 0.021039254447896075, + "grad_norm": 7.886896289928861, + "learning_rate": 4.670846394984327e-06, + "loss": 0.9412, "step": 149 }, { - "epoch": 0.03145313482910463, - "grad_norm": 7.46763190974299, - "learning_rate": 6.976744186046513e-06, - "loss": 2.7331, + "epoch": 0.021180457497881954, + "grad_norm": 6.481507835100823, + "learning_rate": 4.7021943573667714e-06, + "loss": 0.7664, "step": 150 }, { - "epoch": 0.031662822394632, - "grad_norm": 7.4836444766998245, - "learning_rate": 7.023255813953489e-06, - "loss": 2.9845, + "epoch": 0.021321660547867834, + "grad_norm": 7.309729219501239, + "learning_rate": 4.733542319749217e-06, + "loss": 1.0342, "step": 151 }, { - "epoch": 0.03187250996015936, - "grad_norm": 7.568525369731155, - "learning_rate": 7.069767441860466e-06, - "loss": 2.8402, + "epoch": 0.021462863597853713, + "grad_norm": 6.633514384163003, + "learning_rate": 4.764890282131662e-06, + "loss": 0.8934, "step": 152 }, { - "epoch": 0.03208219752568673, - "grad_norm": 6.391957644079795, - "learning_rate": 7.1162790697674425e-06, - "loss": 2.7253, + "epoch": 0.021604066647839593, + "grad_norm": 8.784506623524786, + "learning_rate": 4.796238244514107e-06, + "loss": 0.8059, "step": 153 }, { - "epoch": 0.03229188509121409, - "grad_norm": 6.204025705373054, - "learning_rate": 7.1627906976744185e-06, - "loss": 2.7282, + "epoch": 0.021745269697825472, + "grad_norm": 8.189863490933027, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.9883, "step": 154 }, { - "epoch": 0.03250157265674145, - "grad_norm": 7.185918901911493, - "learning_rate": 7.209302325581395e-06, - "loss": 3.0344, + "epoch": 0.02188647274781135, + "grad_norm": 6.552080517179776, + "learning_rate": 4.858934169278997e-06, + "loss": 1.0269, "step": 155 }, { - "epoch": 0.03271126022226882, - "grad_norm": 6.416435208819019, - "learning_rate": 7.255813953488373e-06, - "loss": 2.8597, + "epoch": 0.02202767579779723, + "grad_norm": 6.8021193253288965, + "learning_rate": 4.890282131661442e-06, + "loss": 0.9354, "step": 156 }, { - "epoch": 0.03292094778779618, - "grad_norm": 6.524052329984861, - "learning_rate": 7.30232558139535e-06, - "loss": 2.8257, + "epoch": 0.02216887884778311, + "grad_norm": 7.453130791464393, + "learning_rate": 4.921630094043888e-06, + "loss": 0.8857, "step": 157 }, { - "epoch": 0.03313063535332355, - "grad_norm": 7.2112682560885775, - "learning_rate": 7.348837209302326e-06, - "loss": 3.0819, + "epoch": 0.022310081897768994, + "grad_norm": 8.595456860827616, + "learning_rate": 4.952978056426333e-06, + "loss": 1.0767, "step": 158 }, { - "epoch": 0.03334032291885091, - "grad_norm": 6.061057043931095, - "learning_rate": 7.395348837209303e-06, - "loss": 2.9576, + "epoch": 0.022451284947754873, + "grad_norm": 7.400038898894051, + "learning_rate": 4.9843260188087776e-06, + "loss": 0.8515, "step": 159 }, { - "epoch": 0.03355001048437828, - "grad_norm": 6.998430546185198, - "learning_rate": 7.44186046511628e-06, - "loss": 2.8175, + "epoch": 0.022592487997740753, + "grad_norm": 6.864972018632919, + "learning_rate": 5.015673981191222e-06, + "loss": 0.8471, "step": 160 }, { - "epoch": 0.03375969804990564, - "grad_norm": 7.069055966691737, - "learning_rate": 7.488372093023256e-06, - "loss": 3.0261, + "epoch": 0.022733691047726632, + "grad_norm": 7.244583785380982, + "learning_rate": 5.047021943573668e-06, + "loss": 0.9584, "step": 161 }, { - "epoch": 0.03396938561543301, - "grad_norm": 6.627895182582551, - "learning_rate": 7.534883720930233e-06, - "loss": 2.8868, + "epoch": 0.02287489409771251, + "grad_norm": 7.812300048995693, + "learning_rate": 5.078369905956113e-06, + "loss": 0.8865, "step": 162 }, { - "epoch": 0.03417907318096037, - "grad_norm": 5.983232879869772, - "learning_rate": 7.5813953488372095e-06, - "loss": 2.7741, + "epoch": 0.02301609714769839, + "grad_norm": 7.004691874730641, + "learning_rate": 5.109717868338559e-06, + "loss": 1.0468, "step": 163 }, { - "epoch": 0.03438876074648773, - "grad_norm": 7.561401934252884, - "learning_rate": 7.627906976744187e-06, - "loss": 2.9465, + "epoch": 0.02315730019768427, + "grad_norm": 6.945133808108309, + "learning_rate": 5.1410658307210035e-06, + "loss": 0.9602, "step": 164 }, { - "epoch": 0.0345984483120151, - "grad_norm": 5.630058872583283, - "learning_rate": 7.674418604651164e-06, - "loss": 2.7352, + "epoch": 0.02329850324767015, + "grad_norm": 7.603837018151768, + "learning_rate": 5.172413793103449e-06, + "loss": 1.136, "step": 165 }, { - "epoch": 0.03480813587754246, - "grad_norm": 5.364526316403802, - "learning_rate": 7.72093023255814e-06, - "loss": 2.838, + "epoch": 0.02343970629765603, + "grad_norm": 8.144944589234624, + "learning_rate": 5.203761755485894e-06, + "loss": 0.877, "step": 166 }, { - "epoch": 0.03501782344306983, - "grad_norm": 5.478025763331473, - "learning_rate": 7.767441860465116e-06, - "loss": 2.7412, + "epoch": 0.02358090934764191, + "grad_norm": 7.050413546355765, + "learning_rate": 5.235109717868339e-06, + "loss": 0.8602, "step": 167 }, { - "epoch": 0.03522751100859719, - "grad_norm": 5.845870353207118, - "learning_rate": 7.813953488372094e-06, - "loss": 2.805, + "epoch": 0.02372211239762779, + "grad_norm": 7.2106434082690924, + "learning_rate": 5.266457680250784e-06, + "loss": 0.9794, "step": 168 }, { - "epoch": 0.03543719857412456, - "grad_norm": 6.813137869162234, - "learning_rate": 7.86046511627907e-06, - "loss": 3.0986, + "epoch": 0.023863315447613668, + "grad_norm": 7.654905889622911, + "learning_rate": 5.297805642633229e-06, + "loss": 1.0512, "step": 169 }, { - "epoch": 0.03564688613965192, - "grad_norm": 6.922835858133902, - "learning_rate": 7.906976744186048e-06, - "loss": 2.6966, + "epoch": 0.024004518497599547, + "grad_norm": 7.218193827248691, + "learning_rate": 5.329153605015674e-06, + "loss": 0.9071, "step": 170 }, { - "epoch": 0.035856573705179286, - "grad_norm": 6.3040691173508145, - "learning_rate": 7.953488372093024e-06, - "loss": 2.7955, + "epoch": 0.024145721547585427, + "grad_norm": 6.34625892925884, + "learning_rate": 5.36050156739812e-06, + "loss": 0.8166, "step": 171 }, { - "epoch": 0.03606626127070665, - "grad_norm": 6.476982121799255, - "learning_rate": 8.000000000000001e-06, - "loss": 2.9843, + "epoch": 0.024286924597571306, + "grad_norm": 6.9293933087403925, + "learning_rate": 5.391849529780565e-06, + "loss": 0.7874, "step": 172 }, { - "epoch": 0.03627594883623401, - "grad_norm": 6.250873244793059, - "learning_rate": 8.046511627906977e-06, - "loss": 2.6941, + "epoch": 0.024428127647557186, + "grad_norm": 7.005121592916882, + "learning_rate": 5.4231974921630105e-06, + "loss": 0.9914, "step": 173 }, { - "epoch": 0.036485636401761376, - "grad_norm": 6.142394880645442, - "learning_rate": 8.093023255813955e-06, - "loss": 3.0228, + "epoch": 0.024569330697543065, + "grad_norm": 6.414056669238483, + "learning_rate": 5.4545454545454545e-06, + "loss": 0.9896, "step": 174 }, { - "epoch": 0.03669532396728874, - "grad_norm": 6.455941279610695, - "learning_rate": 8.139534883720931e-06, - "loss": 3.0294, + "epoch": 0.02471053374752895, + "grad_norm": 6.7530037277615165, + "learning_rate": 5.4858934169279e-06, + "loss": 0.8085, "step": 175 }, { - "epoch": 0.036905011532816105, - "grad_norm": 7.036015991390055, - "learning_rate": 8.186046511627907e-06, - "loss": 2.9305, + "epoch": 0.024851736797514828, + "grad_norm": 8.179725977304141, + "learning_rate": 5.517241379310345e-06, + "loss": 1.112, "step": 176 }, { - "epoch": 0.037114699098343466, - "grad_norm": 6.930087474195064, - "learning_rate": 8.232558139534885e-06, - "loss": 2.8207, + "epoch": 0.024992939847500707, + "grad_norm": 6.53333509105362, + "learning_rate": 5.548589341692791e-06, + "loss": 1.0543, "step": 177 }, { - "epoch": 0.037324386663870834, - "grad_norm": 5.994329531574434, - "learning_rate": 8.279069767441861e-06, - "loss": 2.8105, + "epoch": 0.025134142897486587, + "grad_norm": 7.512819748003157, + "learning_rate": 5.5799373040752355e-06, + "loss": 0.8857, "step": 178 }, { - "epoch": 0.037534074229398195, - "grad_norm": 6.315983955737537, - "learning_rate": 8.325581395348837e-06, - "loss": 2.7665, + "epoch": 0.025275345947472466, + "grad_norm": 7.611626302273766, + "learning_rate": 5.611285266457681e-06, + "loss": 0.9241, "step": 179 }, { - "epoch": 0.037743761794925564, - "grad_norm": 6.264585173042879, - "learning_rate": 8.372093023255815e-06, - "loss": 2.873, + "epoch": 0.025416548997458346, + "grad_norm": 7.1173872377859695, + "learning_rate": 5.642633228840125e-06, + "loss": 1.0062, "step": 180 }, { - "epoch": 0.037953449360452925, - "grad_norm": 6.263543731620491, - "learning_rate": 8.418604651162792e-06, - "loss": 2.7261, + "epoch": 0.025557752047444225, + "grad_norm": 8.03266549777508, + "learning_rate": 5.673981191222571e-06, + "loss": 0.9286, "step": 181 }, { - "epoch": 0.03816313692598029, - "grad_norm": 6.910327208966558, - "learning_rate": 8.465116279069768e-06, - "loss": 2.6568, + "epoch": 0.025698955097430105, + "grad_norm": 7.597371042299757, + "learning_rate": 5.705329153605016e-06, + "loss": 0.908, "step": 182 }, { - "epoch": 0.038372824491507654, - "grad_norm": 7.188497048654426, - "learning_rate": 8.511627906976744e-06, - "loss": 3.0504, + "epoch": 0.025840158147415984, + "grad_norm": 7.409423818262954, + "learning_rate": 5.7366771159874614e-06, + "loss": 1.1088, "step": 183 }, { - "epoch": 0.038582512057035015, - "grad_norm": 5.911916724608364, - "learning_rate": 8.558139534883722e-06, - "loss": 2.7741, + "epoch": 0.025981361197401864, + "grad_norm": 6.864046570601934, + "learning_rate": 5.768025078369906e-06, + "loss": 1.0343, "step": 184 }, { - "epoch": 0.03879219962256238, - "grad_norm": 6.213909048610596, - "learning_rate": 8.604651162790698e-06, - "loss": 2.8767, + "epoch": 0.026122564247387743, + "grad_norm": 6.329092426366271, + "learning_rate": 5.799373040752352e-06, + "loss": 0.8898, "step": 185 }, { - "epoch": 0.039001887188089744, - "grad_norm": 6.601058676711457, - "learning_rate": 8.651162790697674e-06, - "loss": 2.8202, + "epoch": 0.026263767297373623, + "grad_norm": 8.164265970315288, + "learning_rate": 5.830721003134797e-06, + "loss": 1.1265, "step": 186 }, { - "epoch": 0.03921157475361711, - "grad_norm": 6.579091805768915, - "learning_rate": 8.697674418604652e-06, - "loss": 2.7949, + "epoch": 0.026404970347359502, + "grad_norm": 7.024578148497136, + "learning_rate": 5.862068965517242e-06, + "loss": 1.019, "step": 187 }, { - "epoch": 0.03942126231914447, - "grad_norm": 6.225942052043143, - "learning_rate": 8.74418604651163e-06, - "loss": 2.7729, + "epoch": 0.02654617339734538, + "grad_norm": 6.996290405725953, + "learning_rate": 5.8934169278996865e-06, + "loss": 1.0395, "step": 188 }, { - "epoch": 0.03963094988467184, - "grad_norm": 6.645340141686921, - "learning_rate": 8.790697674418606e-06, - "loss": 2.6869, + "epoch": 0.02668737644733126, + "grad_norm": 7.653102387363891, + "learning_rate": 5.924764890282132e-06, + "loss": 0.9276, "step": 189 }, { - "epoch": 0.0398406374501992, - "grad_norm": 6.342066424715, - "learning_rate": 8.837209302325582e-06, - "loss": 2.7648, + "epoch": 0.02682857949731714, + "grad_norm": 7.766088426361559, + "learning_rate": 5.956112852664577e-06, + "loss": 1.0962, "step": 190 }, { - "epoch": 0.04005032501572657, - "grad_norm": 6.942050391424554, - "learning_rate": 8.88372093023256e-06, - "loss": 2.44, + "epoch": 0.02696978254730302, + "grad_norm": 6.701338384563943, + "learning_rate": 5.987460815047023e-06, + "loss": 1.1105, "step": 191 }, { - "epoch": 0.04026001258125393, - "grad_norm": 6.879752276057378, - "learning_rate": 8.930232558139535e-06, - "loss": 2.6724, + "epoch": 0.027110985597288903, + "grad_norm": 5.864439552872452, + "learning_rate": 6.0188087774294676e-06, + "loss": 0.9879, "step": 192 }, { - "epoch": 0.04046970014678129, - "grad_norm": 6.557447416503833, - "learning_rate": 8.976744186046511e-06, - "loss": 2.896, + "epoch": 0.027252188647274782, + "grad_norm": 6.625683545637496, + "learning_rate": 6.050156739811913e-06, + "loss": 1.0501, "step": 193 }, { - "epoch": 0.04067938771230866, - "grad_norm": 6.7668012971340055, - "learning_rate": 9.023255813953489e-06, - "loss": 2.8112, + "epoch": 0.027393391697260662, + "grad_norm": 6.359718266354579, + "learning_rate": 6.081504702194357e-06, + "loss": 0.815, "step": 194 }, { - "epoch": 0.04088907527783602, - "grad_norm": 6.149504065706865, - "learning_rate": 9.069767441860465e-06, - "loss": 2.6075, + "epoch": 0.02753459474724654, + "grad_norm": 6.2709473882940125, + "learning_rate": 6.112852664576803e-06, + "loss": 0.992, "step": 195 }, { - "epoch": 0.04109876284336339, - "grad_norm": 5.771621281957409, - "learning_rate": 9.116279069767443e-06, - "loss": 2.6253, + "epoch": 0.02767579779723242, + "grad_norm": 6.831492474357971, + "learning_rate": 6.144200626959248e-06, + "loss": 1.0035, "step": 196 }, { - "epoch": 0.04130845040889075, - "grad_norm": 6.225253601777948, - "learning_rate": 9.162790697674419e-06, - "loss": 3.0023, + "epoch": 0.0278170008472183, + "grad_norm": 6.62691349429465, + "learning_rate": 6.1755485893416935e-06, + "loss": 0.9039, "step": 197 }, { - "epoch": 0.04151813797441812, - "grad_norm": 6.672574003306967, - "learning_rate": 9.209302325581397e-06, - "loss": 2.6836, + "epoch": 0.02795820389720418, + "grad_norm": 6.490045598857795, + "learning_rate": 6.206896551724138e-06, + "loss": 0.9872, "step": 198 }, { - "epoch": 0.04172782553994548, - "grad_norm": 5.512500309008176, - "learning_rate": 9.255813953488373e-06, - "loss": 2.5928, + "epoch": 0.02809940694719006, + "grad_norm": 7.788972800265241, + "learning_rate": 6.238244514106584e-06, + "loss": 1.2879, "step": 199 }, { - "epoch": 0.04193751310547285, - "grad_norm": 5.5206322292409356, - "learning_rate": 9.30232558139535e-06, - "loss": 2.8166, + "epoch": 0.02824060999717594, + "grad_norm": 7.098781003122065, + "learning_rate": 6.269592476489029e-06, + "loss": 0.9176, "step": 200 }, { - "epoch": 0.04214720067100021, - "grad_norm": 6.493257736910734, - "learning_rate": 9.348837209302326e-06, - "loss": 2.6397, + "epoch": 0.02838181304716182, + "grad_norm": 7.226799120218369, + "learning_rate": 6.300940438871474e-06, + "loss": 1.1116, "step": 201 }, { - "epoch": 0.04235688823652757, - "grad_norm": 5.561370011709336, - "learning_rate": 9.395348837209302e-06, - "loss": 2.7628, + "epoch": 0.028523016097147698, + "grad_norm": 7.66234418755526, + "learning_rate": 6.3322884012539185e-06, + "loss": 1.0238, "step": 202 }, { - "epoch": 0.04256657580205494, - "grad_norm": 6.4883724309013635, - "learning_rate": 9.44186046511628e-06, - "loss": 2.8979, + "epoch": 0.028664219147133577, + "grad_norm": 8.209472071357272, + "learning_rate": 6.363636363636364e-06, + "loss": 1.025, "step": 203 }, { - "epoch": 0.0427762633675823, - "grad_norm": 6.221200515288901, - "learning_rate": 9.488372093023258e-06, - "loss": 3.0369, + "epoch": 0.028805422197119457, + "grad_norm": 6.72047643899154, + "learning_rate": 6.394984326018809e-06, + "loss": 1.0451, "step": 204 }, { - "epoch": 0.04298595093310967, - "grad_norm": 6.039474974421818, - "learning_rate": 9.534883720930234e-06, - "loss": 2.7035, + "epoch": 0.028946625247105336, + "grad_norm": 6.769673768271201, + "learning_rate": 6.426332288401255e-06, + "loss": 0.9877, "step": 205 }, { - "epoch": 0.04319563849863703, - "grad_norm": 5.525237525788932, - "learning_rate": 9.58139534883721e-06, - "loss": 2.8386, + "epoch": 0.029087828297091216, + "grad_norm": 7.123716516659776, + "learning_rate": 6.4576802507837e-06, + "loss": 0.8619, "step": 206 }, { - "epoch": 0.0434053260641644, - "grad_norm": 5.976020120493838, - "learning_rate": 9.627906976744188e-06, - "loss": 2.8688, + "epoch": 0.029229031347077095, + "grad_norm": 7.413501442190635, + "learning_rate": 6.489028213166145e-06, + "loss": 1.148, "step": 207 }, { - "epoch": 0.04361501362969176, - "grad_norm": 6.3776814779411675, - "learning_rate": 9.674418604651164e-06, - "loss": 2.7952, + "epoch": 0.029370234397062978, + "grad_norm": 6.967619248385326, + "learning_rate": 6.520376175548589e-06, + "loss": 0.9802, "step": 208 }, { - "epoch": 0.043824701195219126, - "grad_norm": 6.965685190392773, - "learning_rate": 9.72093023255814e-06, - "loss": 2.7989, + "epoch": 0.029511437447048858, + "grad_norm": 7.951663301564583, + "learning_rate": 6.551724137931035e-06, + "loss": 1.2734, "step": 209 }, { - "epoch": 0.04403438876074649, - "grad_norm": 5.971415176834104, - "learning_rate": 9.767441860465117e-06, - "loss": 2.9307, + "epoch": 0.029652640497034737, + "grad_norm": 6.113622492591106, + "learning_rate": 6.58307210031348e-06, + "loss": 0.9453, "step": 210 }, { - "epoch": 0.044244076326273855, - "grad_norm": 6.085648978675961, - "learning_rate": 9.813953488372093e-06, - "loss": 2.8971, + "epoch": 0.029793843547020617, + "grad_norm": 8.254114737069598, + "learning_rate": 6.6144200626959255e-06, + "loss": 0.9883, "step": 211 }, { - "epoch": 0.044453763891801217, - "grad_norm": 6.762147772482043, - "learning_rate": 9.860465116279071e-06, - "loss": 2.7088, + "epoch": 0.029935046597006496, + "grad_norm": 7.200862338252834, + "learning_rate": 6.64576802507837e-06, + "loss": 0.9004, "step": 212 }, { - "epoch": 0.04466345145732858, - "grad_norm": 6.088741491621783, - "learning_rate": 9.906976744186047e-06, - "loss": 2.6899, + "epoch": 0.030076249646992376, + "grad_norm": 7.732074227436989, + "learning_rate": 6.677115987460816e-06, + "loss": 1.0184, "step": 213 }, { - "epoch": 0.044873139022855946, - "grad_norm": 6.3357003480094285, - "learning_rate": 9.953488372093025e-06, - "loss": 2.9618, + "epoch": 0.030217452696978255, + "grad_norm": 6.643036335751048, + "learning_rate": 6.70846394984326e-06, + "loss": 1.0927, "step": 214 }, { - "epoch": 0.04508282658838331, - "grad_norm": 5.800128149585371, - "learning_rate": 1e-05, - "loss": 2.8261, + "epoch": 0.030358655746964135, + "grad_norm": 6.990791179636594, + "learning_rate": 6.739811912225706e-06, + "loss": 1.0011, "step": 215 }, { - "epoch": 0.045292514153910675, - "grad_norm": 5.7763607505575445, - "learning_rate": 1.0046511627906979e-05, - "loss": 2.7597, + "epoch": 0.030499858796950014, + "grad_norm": 6.516712068369841, + "learning_rate": 6.771159874608151e-06, + "loss": 1.0647, "step": 216 }, { - "epoch": 0.045502201719438036, - "grad_norm": 5.4007140383531285, - "learning_rate": 1.0093023255813955e-05, - "loss": 2.782, + "epoch": 0.030641061846935894, + "grad_norm": 7.591672062474376, + "learning_rate": 6.802507836990596e-06, + "loss": 1.1377, "step": 217 }, { - "epoch": 0.045711889284965404, - "grad_norm": 5.5562906602558755, - "learning_rate": 1.0139534883720932e-05, - "loss": 2.7545, + "epoch": 0.030782264896921773, + "grad_norm": 7.023053158722401, + "learning_rate": 6.833855799373041e-06, + "loss": 1.1098, "step": 218 }, { - "epoch": 0.045921576850492765, - "grad_norm": 5.160675907752467, - "learning_rate": 1.0186046511627907e-05, - "loss": 2.6036, + "epoch": 0.030923467946907653, + "grad_norm": 7.682046253111874, + "learning_rate": 6.865203761755487e-06, + "loss": 1.0534, "step": 219 }, { - "epoch": 0.04613126441602013, - "grad_norm": 6.482894622193402, - "learning_rate": 1.0232558139534884e-05, - "loss": 2.6946, + "epoch": 0.031064670996893532, + "grad_norm": 7.768258871589601, + "learning_rate": 6.896551724137932e-06, + "loss": 1.0257, "step": 220 }, { - "epoch": 0.046340951981547494, - "grad_norm": 7.134759447652219, - "learning_rate": 1.027906976744186e-05, - "loss": 2.8082, + "epoch": 0.03120587404687941, + "grad_norm": 8.972781908035136, + "learning_rate": 6.9278996865203765e-06, + "loss": 0.997, "step": 221 }, { - "epoch": 0.046550639547074855, - "grad_norm": 6.712574801259102, - "learning_rate": 1.0325581395348838e-05, - "loss": 2.8778, + "epoch": 0.031347077096865295, + "grad_norm": 7.2369475078109, + "learning_rate": 6.959247648902821e-06, + "loss": 1.0176, "step": 222 }, { - "epoch": 0.046760327112602224, - "grad_norm": 6.142230410657661, - "learning_rate": 1.0372093023255816e-05, - "loss": 2.8448, + "epoch": 0.03148828014685117, + "grad_norm": 5.763434319127323, + "learning_rate": 6.990595611285267e-06, + "loss": 1.0166, "step": 223 }, { - "epoch": 0.046970014678129585, - "grad_norm": 5.792950804593206, - "learning_rate": 1.0418604651162792e-05, - "loss": 2.5658, + "epoch": 0.031629483196837054, + "grad_norm": 6.258367593132719, + "learning_rate": 7.021943573667712e-06, + "loss": 0.988, "step": 224 }, { - "epoch": 0.04717970224365695, - "grad_norm": 6.252937108520555, - "learning_rate": 1.046511627906977e-05, - "loss": 2.7542, + "epoch": 0.03177068624682293, + "grad_norm": 6.4208488380894435, + "learning_rate": 7.0532915360501576e-06, + "loss": 0.8481, "step": 225 }, { - "epoch": 0.047389389809184314, - "grad_norm": 6.903655994729894, - "learning_rate": 1.0511627906976744e-05, - "loss": 2.8082, + "epoch": 0.03191188929680881, + "grad_norm": 7.00325042344024, + "learning_rate": 7.084639498432602e-06, + "loss": 0.9924, "step": 226 }, { - "epoch": 0.04759907737471168, - "grad_norm": 5.967108627828422, - "learning_rate": 1.0558139534883722e-05, - "loss": 2.6372, + "epoch": 0.03205309234679469, + "grad_norm": 7.2807418741085606, + "learning_rate": 7.115987460815048e-06, + "loss": 1.0043, "step": 227 }, { - "epoch": 0.04780876494023904, - "grad_norm": 5.785176436017485, - "learning_rate": 1.0604651162790698e-05, - "loss": 2.7638, + "epoch": 0.03219429539678057, + "grad_norm": 7.857645959514648, + "learning_rate": 7.147335423197492e-06, + "loss": 1.0441, "step": 228 }, { - "epoch": 0.04801845250576641, - "grad_norm": 5.911200548984772, - "learning_rate": 1.0651162790697675e-05, - "loss": 2.7544, + "epoch": 0.03233549844676645, + "grad_norm": 6.590381537664548, + "learning_rate": 7.178683385579938e-06, + "loss": 0.8702, "step": 229 }, { - "epoch": 0.04822814007129377, - "grad_norm": 6.285402909258177, - "learning_rate": 1.0697674418604651e-05, - "loss": 2.9015, + "epoch": 0.03247670149675233, + "grad_norm": 6.309134241317297, + "learning_rate": 7.210031347962383e-06, + "loss": 0.9362, "step": 230 }, { - "epoch": 0.04843782763682113, - "grad_norm": 5.535772285457667, - "learning_rate": 1.0744186046511629e-05, - "loss": 2.8226, + "epoch": 0.032617904546738206, + "grad_norm": 7.5306558948588656, + "learning_rate": 7.241379310344828e-06, + "loss": 1.1168, "step": 231 }, { - "epoch": 0.0486475152023485, - "grad_norm": 5.891636230427452, - "learning_rate": 1.0790697674418607e-05, - "loss": 2.7513, + "epoch": 0.03275910759672409, + "grad_norm": 6.924984359537364, + "learning_rate": 7.272727272727273e-06, + "loss": 1.0031, "step": 232 }, { - "epoch": 0.04885720276787586, - "grad_norm": 5.576360166046296, - "learning_rate": 1.0837209302325583e-05, - "loss": 2.7401, + "epoch": 0.03290031064670997, + "grad_norm": 6.833615445982986, + "learning_rate": 7.304075235109719e-06, + "loss": 1.003, "step": 233 }, { - "epoch": 0.04906689033340323, - "grad_norm": 5.054243993611499, - "learning_rate": 1.088372093023256e-05, - "loss": 2.8342, + "epoch": 0.03304151369669585, + "grad_norm": 8.320714511863434, + "learning_rate": 7.3354231974921645e-06, + "loss": 1.1908, "step": 234 }, { - "epoch": 0.04927657789893059, - "grad_norm": 7.005846836843509, - "learning_rate": 1.0930232558139535e-05, - "loss": 2.837, + "epoch": 0.03318271674668173, + "grad_norm": 7.344265285722987, + "learning_rate": 7.3667711598746085e-06, + "loss": 1.1049, "step": 235 }, { - "epoch": 0.04948626546445796, - "grad_norm": 6.15741902291841, - "learning_rate": 1.0976744186046513e-05, - "loss": 2.8706, + "epoch": 0.03332391979666761, + "grad_norm": 6.339242422888241, + "learning_rate": 7.398119122257053e-06, + "loss": 0.9367, "step": 236 }, { - "epoch": 0.04969595302998532, - "grad_norm": 5.913666044172258, - "learning_rate": 1.1023255813953489e-05, - "loss": 2.6429, + "epoch": 0.03346512284665349, + "grad_norm": 7.531043499443148, + "learning_rate": 7.429467084639499e-06, + "loss": 1.1008, "step": 237 }, { - "epoch": 0.04990564059551269, - "grad_norm": 5.950289705708864, - "learning_rate": 1.1069767441860466e-05, - "loss": 2.8165, + "epoch": 0.033606325896639366, + "grad_norm": 6.74515752492174, + "learning_rate": 7.460815047021944e-06, + "loss": 0.9319, "step": 238 }, { - "epoch": 0.05011532816104005, - "grad_norm": 5.274677163222211, - "learning_rate": 1.1116279069767444e-05, - "loss": 2.6037, + "epoch": 0.03374752894662525, + "grad_norm": 6.641173087192396, + "learning_rate": 7.49216300940439e-06, + "loss": 0.8657, "step": 239 }, { - "epoch": 0.05032501572656741, - "grad_norm": 5.348982008009826, - "learning_rate": 1.116279069767442e-05, - "loss": 2.469, + "epoch": 0.033888731996611125, + "grad_norm": 7.19435608221455, + "learning_rate": 7.5235109717868345e-06, + "loss": 1.0358, "step": 240 }, { - "epoch": 0.05053470329209478, - "grad_norm": 5.478702212197505, - "learning_rate": 1.1209302325581398e-05, - "loss": 2.6849, + "epoch": 0.03402993504659701, + "grad_norm": 6.740198448819316, + "learning_rate": 7.554858934169279e-06, + "loss": 1.0419, "step": 241 }, { - "epoch": 0.05074439085762214, - "grad_norm": 6.806770523232891, - "learning_rate": 1.1255813953488372e-05, - "loss": 2.9733, + "epoch": 0.034171138096582884, + "grad_norm": 6.899726169941688, + "learning_rate": 7.586206896551724e-06, + "loss": 1.0432, "step": 242 }, { - "epoch": 0.05095407842314951, - "grad_norm": 5.274062061765246, - "learning_rate": 1.130232558139535e-05, - "loss": 2.7172, + "epoch": 0.03431234114656877, + "grad_norm": 6.238918986390443, + "learning_rate": 7.61755485893417e-06, + "loss": 0.9841, "step": 243 }, { - "epoch": 0.05116376598867687, - "grad_norm": 5.92775882781891, - "learning_rate": 1.1348837209302326e-05, - "loss": 2.686, + "epoch": 0.03445354419655464, + "grad_norm": 5.395154920458694, + "learning_rate": 7.648902821316615e-06, + "loss": 0.6961, "step": 244 }, { - "epoch": 0.05137345355420424, - "grad_norm": 5.422156852561135, - "learning_rate": 1.1395348837209304e-05, - "loss": 2.8409, + "epoch": 0.034594747246540526, + "grad_norm": 6.1640422907309524, + "learning_rate": 7.68025078369906e-06, + "loss": 1.0119, "step": 245 }, { - "epoch": 0.0515831411197316, - "grad_norm": 5.47457712009494, - "learning_rate": 1.144186046511628e-05, - "loss": 2.6795, + "epoch": 0.0347359502965264, + "grad_norm": 6.726209308033153, + "learning_rate": 7.711598746081506e-06, + "loss": 0.9094, "step": 246 }, { - "epoch": 0.05179282868525897, - "grad_norm": 6.5079623854230215, - "learning_rate": 1.1488372093023257e-05, - "loss": 2.6468, + "epoch": 0.034877153346512285, + "grad_norm": 10.893811698535975, + "learning_rate": 7.742946708463952e-06, + "loss": 0.9771, "step": 247 }, { - "epoch": 0.05200251625078633, - "grad_norm": 6.052979203695229, - "learning_rate": 1.1534883720930235e-05, - "loss": 2.7911, + "epoch": 0.03501835639649816, + "grad_norm": 7.498601486235813, + "learning_rate": 7.774294670846396e-06, + "loss": 1.1061, "step": 248 }, { - "epoch": 0.052212203816313696, - "grad_norm": 6.2211508374215905, - "learning_rate": 1.158139534883721e-05, - "loss": 2.646, + "epoch": 0.035159559446484044, + "grad_norm": 8.986638701902864, + "learning_rate": 7.80564263322884e-06, + "loss": 1.3214, "step": 249 }, { - "epoch": 0.05242189138184106, - "grad_norm": 6.100754247474486, - "learning_rate": 1.1627906976744187e-05, - "loss": 2.7139, + "epoch": 0.03530076249646993, + "grad_norm": 7.082930061150734, + "learning_rate": 7.836990595611285e-06, + "loss": 0.8836, "step": 250 }, { - "epoch": 0.05263157894736842, - "grad_norm": 6.641575016899216, - "learning_rate": 1.1674418604651163e-05, - "loss": 2.8107, + "epoch": 0.0354419655464558, + "grad_norm": 7.277989450437325, + "learning_rate": 7.868338557993731e-06, + "loss": 0.9712, "step": 251 }, { - "epoch": 0.052841266512895786, - "grad_norm": 7.103962964668787, - "learning_rate": 1.172093023255814e-05, - "loss": 3.0844, + "epoch": 0.035583168596441686, + "grad_norm": 6.64538298891202, + "learning_rate": 7.899686520376177e-06, + "loss": 1.0079, "step": 252 }, { - "epoch": 0.05305095407842315, - "grad_norm": 6.158991274742413, - "learning_rate": 1.1767441860465117e-05, - "loss": 2.6862, + "epoch": 0.03572437164642756, + "grad_norm": 7.373745682677788, + "learning_rate": 7.93103448275862e-06, + "loss": 1.0341, "step": 253 }, { - "epoch": 0.053260641643950515, - "grad_norm": 5.493349088138242, - "learning_rate": 1.1813953488372095e-05, - "loss": 2.72, + "epoch": 0.035865574696413445, + "grad_norm": 6.953966508907405, + "learning_rate": 7.962382445141067e-06, + "loss": 1.0001, "step": 254 }, { - "epoch": 0.053470329209477876, - "grad_norm": 6.94758241674908, - "learning_rate": 1.1860465116279072e-05, - "loss": 2.9364, + "epoch": 0.03600677774639932, + "grad_norm": 7.361235977717567, + "learning_rate": 7.99373040752351e-06, + "loss": 1.1033, "step": 255 }, { - "epoch": 0.053680016775005245, - "grad_norm": 6.477111019252701, - "learning_rate": 1.1906976744186047e-05, - "loss": 2.9216, + "epoch": 0.036147980796385204, + "grad_norm": 8.032169384964144, + "learning_rate": 8.025078369905956e-06, + "loss": 0.9814, "step": 256 }, { - "epoch": 0.053889704340532606, - "grad_norm": 6.713537137328682, - "learning_rate": 1.1953488372093024e-05, - "loss": 2.7497, + "epoch": 0.03628918384637108, + "grad_norm": 6.712940649969857, + "learning_rate": 8.056426332288402e-06, + "loss": 1.0296, "step": 257 }, { - "epoch": 0.054099391906059974, - "grad_norm": 5.457202553876317, - "learning_rate": 1.2e-05, - "loss": 2.686, + "epoch": 0.03643038689635696, + "grad_norm": 7.335659618829718, + "learning_rate": 8.087774294670848e-06, + "loss": 1.1228, "step": 258 }, { - "epoch": 0.054309079471587335, - "grad_norm": 5.928454809699452, - "learning_rate": 1.2046511627906978e-05, - "loss": 2.4659, + "epoch": 0.03657158994634284, + "grad_norm": 6.8229346358008165, + "learning_rate": 8.119122257053292e-06, + "loss": 0.8835, "step": 259 }, { - "epoch": 0.054518767037114696, - "grad_norm": 6.572174351073993, - "learning_rate": 1.2093023255813954e-05, - "loss": 2.7379, + "epoch": 0.03671279299632872, + "grad_norm": 6.844637075084648, + "learning_rate": 8.150470219435737e-06, + "loss": 1.0386, "step": 260 }, { - "epoch": 0.054728454602642064, - "grad_norm": 6.014485618362067, - "learning_rate": 1.2139534883720932e-05, - "loss": 2.6661, + "epoch": 0.0368539960463146, + "grad_norm": 7.701163144197415, + "learning_rate": 8.181818181818183e-06, + "loss": 1.0405, "step": 261 }, { - "epoch": 0.054938142168169425, - "grad_norm": 6.828827666462322, - "learning_rate": 1.2186046511627908e-05, - "loss": 2.7704, + "epoch": 0.03699519909630048, + "grad_norm": 7.463688175051193, + "learning_rate": 8.213166144200627e-06, + "loss": 0.974, "step": 262 }, { - "epoch": 0.05514782973369679, - "grad_norm": 5.5016885224260825, - "learning_rate": 1.2232558139534886e-05, - "loss": 2.7052, + "epoch": 0.03713640214628636, + "grad_norm": 6.211074177188676, + "learning_rate": 8.244514106583073e-06, + "loss": 0.9123, "step": 263 }, { - "epoch": 0.055357517299224154, - "grad_norm": 5.856462966659524, - "learning_rate": 1.2279069767441863e-05, - "loss": 3.0589, + "epoch": 0.03727760519627224, + "grad_norm": 6.364075707460534, + "learning_rate": 8.275862068965518e-06, + "loss": 0.9195, "step": 264 }, { - "epoch": 0.05556720486475152, - "grad_norm": 5.913114702326443, - "learning_rate": 1.2325581395348838e-05, - "loss": 2.6912, + "epoch": 0.037418808246258116, + "grad_norm": 6.426994151994939, + "learning_rate": 8.307210031347962e-06, + "loss": 1.0276, "step": 265 }, { - "epoch": 0.055776892430278883, - "grad_norm": 5.879154738611566, - "learning_rate": 1.2372093023255815e-05, - "loss": 2.7632, + "epoch": 0.037560011296244, + "grad_norm": 5.918745798154022, + "learning_rate": 8.338557993730408e-06, + "loss": 1.0192, "step": 266 }, { - "epoch": 0.05598657999580625, - "grad_norm": 5.980828430907205, - "learning_rate": 1.2418604651162791e-05, - "loss": 2.7148, + "epoch": 0.03770121434622988, + "grad_norm": 5.97185206372574, + "learning_rate": 8.369905956112854e-06, + "loss": 0.9448, "step": 267 }, { - "epoch": 0.05619626756133361, - "grad_norm": 5.886866577946389, - "learning_rate": 1.2465116279069769e-05, - "loss": 2.7365, + "epoch": 0.03784241739621576, + "grad_norm": 6.8096930751921105, + "learning_rate": 8.4012539184953e-06, + "loss": 1.1044, "step": 268 }, { - "epoch": 0.056405955126860974, - "grad_norm": 5.930187686501461, - "learning_rate": 1.2511627906976745e-05, - "loss": 2.709, + "epoch": 0.03798362044620164, + "grad_norm": 6.4786220813846125, + "learning_rate": 8.432601880877743e-06, + "loss": 1.0572, "step": 269 }, { - "epoch": 0.05661564269238834, - "grad_norm": 6.380802844368986, - "learning_rate": 1.2558139534883723e-05, - "loss": 2.7103, + "epoch": 0.03812482349618752, + "grad_norm": 6.36567265613983, + "learning_rate": 8.463949843260189e-06, + "loss": 1.1283, "step": 270 }, { - "epoch": 0.0568253302579157, - "grad_norm": 5.708232470252725, - "learning_rate": 1.26046511627907e-05, - "loss": 2.6117, + "epoch": 0.0382660265461734, + "grad_norm": 7.441630181439689, + "learning_rate": 8.495297805642633e-06, + "loss": 1.044, "step": 271 }, { - "epoch": 0.05703501782344307, - "grad_norm": 7.25828101588181, - "learning_rate": 1.2651162790697675e-05, - "loss": 2.662, + "epoch": 0.038407229596159276, + "grad_norm": 7.031370991366347, + "learning_rate": 8.526645768025079e-06, + "loss": 1.0039, "step": 272 }, { - "epoch": 0.05724470538897043, - "grad_norm": 6.696050405178439, - "learning_rate": 1.2697674418604653e-05, - "loss": 2.7142, + "epoch": 0.03854843264614516, + "grad_norm": 7.535364706946894, + "learning_rate": 8.557993730407524e-06, + "loss": 1.1076, "step": 273 }, { - "epoch": 0.0574543929544978, - "grad_norm": 5.94183434264606, - "learning_rate": 1.2744186046511629e-05, - "loss": 2.629, + "epoch": 0.038689635696131035, + "grad_norm": 6.624210020710945, + "learning_rate": 8.58934169278997e-06, + "loss": 1.0522, "step": 274 }, { - "epoch": 0.05766408052002516, - "grad_norm": 6.277838004747979, - "learning_rate": 1.2790697674418606e-05, - "loss": 2.6538, + "epoch": 0.03883083874611692, + "grad_norm": 7.196509976708002, + "learning_rate": 8.620689655172414e-06, + "loss": 1.027, "step": 275 }, { - "epoch": 0.05787376808555253, - "grad_norm": 6.172170849994645, - "learning_rate": 1.2837209302325582e-05, - "loss": 2.6504, + "epoch": 0.038972041796102794, + "grad_norm": 7.8080749802315745, + "learning_rate": 8.65203761755486e-06, + "loss": 1.1557, "step": 276 }, { - "epoch": 0.05808345565107989, - "grad_norm": 5.709414960334309, - "learning_rate": 1.288372093023256e-05, - "loss": 2.7447, + "epoch": 0.03911324484608868, + "grad_norm": 5.386038111297556, + "learning_rate": 8.683385579937304e-06, + "loss": 0.8177, "step": 277 }, { - "epoch": 0.05829314321660726, - "grad_norm": 5.920686961359142, - "learning_rate": 1.2930232558139534e-05, - "loss": 2.877, + "epoch": 0.03925444789607455, + "grad_norm": 6.319860957540828, + "learning_rate": 8.71473354231975e-06, + "loss": 0.9943, "step": 278 }, { - "epoch": 0.05850283078213462, - "grad_norm": 6.123953867392272, - "learning_rate": 1.2976744186046512e-05, - "loss": 2.9756, + "epoch": 0.039395650946060436, + "grad_norm": 6.410320102028986, + "learning_rate": 8.746081504702195e-06, + "loss": 0.9559, "step": 279 }, { - "epoch": 0.05871251834766198, - "grad_norm": 7.748672546393689, - "learning_rate": 1.302325581395349e-05, - "loss": 3.1345, + "epoch": 0.03953685399604631, + "grad_norm": 7.347415880072892, + "learning_rate": 8.777429467084641e-06, + "loss": 1.2127, "step": 280 }, { - "epoch": 0.05892220591318935, - "grad_norm": 5.1763281843620765, - "learning_rate": 1.3069767441860466e-05, - "loss": 2.552, + "epoch": 0.039678057046032195, + "grad_norm": 6.731832307677044, + "learning_rate": 8.808777429467087e-06, + "loss": 0.9603, "step": 281 }, { - "epoch": 0.05913189347871671, - "grad_norm": 5.420225289246078, - "learning_rate": 1.3116279069767443e-05, - "loss": 2.6374, + "epoch": 0.03981926009601807, + "grad_norm": 6.665964602881954, + "learning_rate": 8.84012539184953e-06, + "loss": 0.9668, "step": 282 }, { - "epoch": 0.05934158104424408, - "grad_norm": 5.837474880142761, - "learning_rate": 1.316279069767442e-05, - "loss": 2.5835, + "epoch": 0.039960463146003954, + "grad_norm": 6.862446246774935, + "learning_rate": 8.871473354231975e-06, + "loss": 0.8228, "step": 283 }, { - "epoch": 0.05955126860977144, - "grad_norm": 6.150600393884646, - "learning_rate": 1.3209302325581397e-05, - "loss": 2.902, + "epoch": 0.04010166619598984, + "grad_norm": 6.939353471259851, + "learning_rate": 8.90282131661442e-06, + "loss": 1.0398, "step": 284 }, { - "epoch": 0.05976095617529881, - "grad_norm": 7.2554870088917855, - "learning_rate": 1.3255813953488372e-05, - "loss": 2.7621, + "epoch": 0.04024286924597571, + "grad_norm": 5.509553426301109, + "learning_rate": 8.934169278996866e-06, + "loss": 0.8675, "step": 285 }, { - "epoch": 0.05997064374082617, - "grad_norm": 6.463778666206971, - "learning_rate": 1.330232558139535e-05, - "loss": 2.874, + "epoch": 0.040384072295961596, + "grad_norm": 6.45388308838189, + "learning_rate": 8.965517241379312e-06, + "loss": 0.9256, "step": 286 }, { - "epoch": 0.060180331306353536, - "grad_norm": 6.452281179169747, - "learning_rate": 1.3348837209302327e-05, - "loss": 3.0631, + "epoch": 0.04052527534594747, + "grad_norm": 8.349517078867416, + "learning_rate": 8.996865203761757e-06, + "loss": 1.1676, "step": 287 }, { - "epoch": 0.0603900188718809, - "grad_norm": 5.779936757298579, - "learning_rate": 1.3395348837209303e-05, - "loss": 2.7905, + "epoch": 0.040666478395933355, + "grad_norm": 6.526353795675566, + "learning_rate": 9.028213166144201e-06, + "loss": 1.1668, "step": 288 }, { - "epoch": 0.06059970643740826, - "grad_norm": 6.306669741091084, - "learning_rate": 1.344186046511628e-05, - "loss": 2.8452, + "epoch": 0.04080768144591923, + "grad_norm": 6.645493517145117, + "learning_rate": 9.059561128526645e-06, + "loss": 1.1798, "step": 289 }, { - "epoch": 0.06080939400293563, - "grad_norm": 5.906054386458197, - "learning_rate": 1.3488372093023257e-05, - "loss": 2.8006, + "epoch": 0.040948884495905113, + "grad_norm": 7.838148558461977, + "learning_rate": 9.090909090909091e-06, + "loss": 1.3024, "step": 290 }, { - "epoch": 0.06101908156846299, - "grad_norm": 5.533973914178475, - "learning_rate": 1.3534883720930234e-05, - "loss": 2.7965, + "epoch": 0.04109008754589099, + "grad_norm": 6.651623417219865, + "learning_rate": 9.122257053291537e-06, + "loss": 1.1004, "step": 291 }, { - "epoch": 0.061228769133990356, - "grad_norm": 5.35877542558296, - "learning_rate": 1.358139534883721e-05, - "loss": 2.7042, + "epoch": 0.04123129059587687, + "grad_norm": 7.507640499972883, + "learning_rate": 9.153605015673982e-06, + "loss": 1.2961, "step": 292 }, { - "epoch": 0.06143845669951772, - "grad_norm": 5.4596094718721195, - "learning_rate": 1.3627906976744188e-05, - "loss": 2.4664, + "epoch": 0.04137249364586275, + "grad_norm": 8.060457966536285, + "learning_rate": 9.184952978056428e-06, + "loss": 1.0851, "step": 293 }, { - "epoch": 0.061648144265045085, - "grad_norm": 5.53754396482365, - "learning_rate": 1.3674418604651163e-05, - "loss": 2.6338, + "epoch": 0.04151369669584863, + "grad_norm": 7.133169520998299, + "learning_rate": 9.216300940438872e-06, + "loss": 1.2597, "step": 294 }, { - "epoch": 0.061857831830572446, - "grad_norm": 6.198278716409282, - "learning_rate": 1.372093023255814e-05, - "loss": 2.698, + "epoch": 0.04165489974583451, + "grad_norm": 7.374011658051953, + "learning_rate": 9.247648902821318e-06, + "loss": 1.0425, "step": 295 }, { - "epoch": 0.062067519396099814, - "grad_norm": 6.463561706972344, - "learning_rate": 1.3767441860465118e-05, - "loss": 2.6886, + "epoch": 0.04179610279582039, + "grad_norm": 6.863258233996512, + "learning_rate": 9.278996865203762e-06, + "loss": 1.0537, "step": 296 }, { - "epoch": 0.062277206961627175, - "grad_norm": 5.259633563530183, - "learning_rate": 1.3813953488372094e-05, - "loss": 2.6933, + "epoch": 0.041937305845806266, + "grad_norm": 6.645888377260501, + "learning_rate": 9.310344827586207e-06, + "loss": 1.2074, "step": 297 }, { - "epoch": 0.062486894527154536, - "grad_norm": 5.204962451071752, - "learning_rate": 1.3860465116279072e-05, - "loss": 2.9227, + "epoch": 0.04207850889579215, + "grad_norm": 6.436893265619479, + "learning_rate": 9.341692789968653e-06, + "loss": 1.022, "step": 298 }, { - "epoch": 0.0626965820926819, - "grad_norm": 5.43931196567226, - "learning_rate": 1.3906976744186048e-05, - "loss": 2.573, + "epoch": 0.042219711945778025, + "grad_norm": 5.918291582500843, + "learning_rate": 9.373040752351097e-06, + "loss": 1.0726, "step": 299 }, { - "epoch": 0.06290626965820927, - "grad_norm": 5.726436537818361, - "learning_rate": 1.3953488372093025e-05, - "loss": 2.6359, + "epoch": 0.04236091499576391, + "grad_norm": 7.9883766871730435, + "learning_rate": 9.404388714733543e-06, + "loss": 1.2079, "step": 300 }, { - "epoch": 0.06311595722373663, - "grad_norm": 5.266601831643755, - "learning_rate": 1.4e-05, - "loss": 2.5716, + "epoch": 0.04250211804574979, + "grad_norm": 6.742810188796365, + "learning_rate": 9.435736677115989e-06, + "loss": 1.0749, "step": 301 }, { - "epoch": 0.063325644789264, - "grad_norm": 5.869268627325999, - "learning_rate": 1.4046511627906978e-05, - "loss": 2.364, + "epoch": 0.04264332109573567, + "grad_norm": 6.412702085699694, + "learning_rate": 9.467084639498434e-06, + "loss": 0.9456, "step": 302 }, { - "epoch": 0.06353533235479136, - "grad_norm": 6.226587082492448, - "learning_rate": 1.4093023255813955e-05, - "loss": 2.6806, + "epoch": 0.04278452414572155, + "grad_norm": 6.356434035310211, + "learning_rate": 9.498432601880878e-06, + "loss": 1.0771, "step": 303 }, { - "epoch": 0.06374501992031872, - "grad_norm": 6.030103234425802, - "learning_rate": 1.4139534883720931e-05, - "loss": 2.7833, + "epoch": 0.042925727195707426, + "grad_norm": 7.066962668758025, + "learning_rate": 9.529780564263324e-06, + "loss": 0.9493, "step": 304 }, { - "epoch": 0.06395470748584609, - "grad_norm": 6.478761520238879, - "learning_rate": 1.4186046511627909e-05, - "loss": 2.4918, + "epoch": 0.04306693024569331, + "grad_norm": 8.20812618445608, + "learning_rate": 9.561128526645768e-06, + "loss": 0.9852, "step": 305 }, { - "epoch": 0.06416439505137346, - "grad_norm": 5.20215793431158, - "learning_rate": 1.4232558139534885e-05, - "loss": 2.7573, + "epoch": 0.043208133295679185, + "grad_norm": 7.16284184553327, + "learning_rate": 9.592476489028214e-06, + "loss": 1.1978, "step": 306 }, { - "epoch": 0.06437408261690082, - "grad_norm": 5.7622062877702716, - "learning_rate": 1.4279069767441863e-05, - "loss": 2.5148, + "epoch": 0.04334933634566507, + "grad_norm": 6.587795861008629, + "learning_rate": 9.62382445141066e-06, + "loss": 1.0712, "step": 307 }, { - "epoch": 0.06458377018242818, - "grad_norm": 5.956530426109749, - "learning_rate": 1.4325581395348837e-05, - "loss": 2.6827, + "epoch": 0.043490539395650944, + "grad_norm": 7.2249488180463315, + "learning_rate": 9.655172413793105e-06, + "loss": 1.1, "step": 308 }, { - "epoch": 0.06479345774795554, - "grad_norm": 6.059123409614743, - "learning_rate": 1.4372093023255815e-05, - "loss": 2.6931, + "epoch": 0.04363174244563683, + "grad_norm": 6.432077592343574, + "learning_rate": 9.686520376175549e-06, + "loss": 1.0193, "step": 309 }, { - "epoch": 0.0650031453134829, - "grad_norm": 6.2455309061981685, - "learning_rate": 1.441860465116279e-05, - "loss": 3.0446, + "epoch": 0.0437729454956227, + "grad_norm": 6.968885477214826, + "learning_rate": 9.717868338557995e-06, + "loss": 1.0749, "step": 310 }, { - "epoch": 0.06521283287901028, - "grad_norm": 6.613281937512961, - "learning_rate": 1.4465116279069768e-05, - "loss": 2.9515, + "epoch": 0.043914148545608586, + "grad_norm": 6.447244929165667, + "learning_rate": 9.749216300940439e-06, + "loss": 0.9041, "step": 311 }, { - "epoch": 0.06542252044453764, - "grad_norm": 5.412172509867639, - "learning_rate": 1.4511627906976746e-05, - "loss": 2.6634, + "epoch": 0.04405535159559446, + "grad_norm": 8.16839291130948, + "learning_rate": 9.780564263322884e-06, + "loss": 1.3465, "step": 312 }, { - "epoch": 0.065632208010065, - "grad_norm": 6.567250187892535, - "learning_rate": 1.4558139534883722e-05, - "loss": 3.0405, + "epoch": 0.044196554645580345, + "grad_norm": 7.098880356443508, + "learning_rate": 9.81191222570533e-06, + "loss": 1.0409, "step": 313 }, { - "epoch": 0.06584189557559236, - "grad_norm": 5.84932492317624, - "learning_rate": 1.46046511627907e-05, - "loss": 2.7963, + "epoch": 0.04433775769556622, + "grad_norm": 6.141462288488842, + "learning_rate": 9.843260188087776e-06, + "loss": 1.013, "step": 314 }, { - "epoch": 0.06605158314111974, - "grad_norm": 6.602110069465178, - "learning_rate": 1.4651162790697674e-05, - "loss": 2.7062, + "epoch": 0.044478960745552104, + "grad_norm": 6.207098948110343, + "learning_rate": 9.874608150470221e-06, + "loss": 1.0002, "step": 315 }, { - "epoch": 0.0662612707066471, - "grad_norm": 5.498020477299307, - "learning_rate": 1.4697674418604652e-05, - "loss": 2.6295, + "epoch": 0.04462016379553799, + "grad_norm": 6.130320763460261, + "learning_rate": 9.905956112852665e-06, + "loss": 1.0626, "step": 316 }, { - "epoch": 0.06647095827217446, - "grad_norm": 6.61416538162114, - "learning_rate": 1.4744186046511628e-05, - "loss": 2.7408, + "epoch": 0.04476136684552386, + "grad_norm": 6.381605388589148, + "learning_rate": 9.93730407523511e-06, + "loss": 1.1705, "step": 317 }, { - "epoch": 0.06668064583770182, - "grad_norm": 5.826827768593727, - "learning_rate": 1.4790697674418606e-05, - "loss": 2.4461, + "epoch": 0.044902569895509746, + "grad_norm": 6.270972063913611, + "learning_rate": 9.968652037617555e-06, + "loss": 0.9597, "step": 318 }, { - "epoch": 0.06689033340322918, - "grad_norm": 6.660216321981914, - "learning_rate": 1.4837209302325583e-05, - "loss": 2.7952, + "epoch": 0.04504377294549562, + "grad_norm": 6.019869352545629, + "learning_rate": 1e-05, + "loss": 0.9363, "step": 319 }, { - "epoch": 0.06710002096875656, - "grad_norm": 4.981011650003628, - "learning_rate": 1.488372093023256e-05, - "loss": 2.8064, + "epoch": 0.045184975995481505, + "grad_norm": 6.901333039486993, + "learning_rate": 1.0031347962382445e-05, + "loss": 1.0765, "step": 320 }, { - "epoch": 0.06730970853428392, - "grad_norm": 5.831091516525705, - "learning_rate": 1.4930232558139537e-05, - "loss": 2.5788, + "epoch": 0.04532617904546738, + "grad_norm": 6.751245381913681, + "learning_rate": 1.0062695924764892e-05, + "loss": 0.969, "step": 321 }, { - "epoch": 0.06751939609981128, - "grad_norm": 6.194265249427232, - "learning_rate": 1.4976744186046512e-05, - "loss": 2.7381, + "epoch": 0.045467382095453264, + "grad_norm": 6.551280548846206, + "learning_rate": 1.0094043887147336e-05, + "loss": 0.9448, "step": 322 }, { - "epoch": 0.06772908366533864, - "grad_norm": 6.170332847848081, - "learning_rate": 1.5023255813953491e-05, - "loss": 2.599, + "epoch": 0.04560858514543914, + "grad_norm": 8.199253942789689, + "learning_rate": 1.0125391849529782e-05, + "loss": 0.9585, "step": 323 }, { - "epoch": 0.06793877123086602, - "grad_norm": 6.797964060613839, - "learning_rate": 1.5069767441860465e-05, - "loss": 2.7531, + "epoch": 0.04574978819542502, + "grad_norm": 6.160985973356477, + "learning_rate": 1.0156739811912226e-05, + "loss": 0.8928, "step": 324 }, { - "epoch": 0.06814845879639338, - "grad_norm": 5.443594485202397, - "learning_rate": 1.5116279069767443e-05, - "loss": 2.7429, + "epoch": 0.0458909912454109, + "grad_norm": 6.475959976778824, + "learning_rate": 1.0188087774294673e-05, + "loss": 1.0992, "step": 325 }, { - "epoch": 0.06835814636192074, - "grad_norm": 6.024837507150557, - "learning_rate": 1.5162790697674419e-05, - "loss": 2.8143, + "epoch": 0.04603219429539678, + "grad_norm": 6.151307243678867, + "learning_rate": 1.0219435736677117e-05, + "loss": 0.9654, "step": 326 }, { - "epoch": 0.0685678339274481, - "grad_norm": 5.997588507355551, - "learning_rate": 1.5209302325581397e-05, - "loss": 2.8557, + "epoch": 0.04617339734538266, + "grad_norm": 6.240444356304697, + "learning_rate": 1.0250783699059561e-05, + "loss": 1.092, "step": 327 }, { - "epoch": 0.06877752149297546, - "grad_norm": 6.172351164496978, - "learning_rate": 1.5255813953488374e-05, - "loss": 2.5614, + "epoch": 0.04631460039536854, + "grad_norm": 5.067191185910136, + "learning_rate": 1.0282131661442007e-05, + "loss": 0.8948, "step": 328 }, { - "epoch": 0.06898720905850284, - "grad_norm": 5.419519907161505, - "learning_rate": 1.530232558139535e-05, - "loss": 2.8489, + "epoch": 0.04645580344535442, + "grad_norm": 8.138115119156627, + "learning_rate": 1.0313479623824451e-05, + "loss": 1.2203, "step": 329 }, { - "epoch": 0.0691968966240302, - "grad_norm": 5.278909144433219, - "learning_rate": 1.5348837209302328e-05, - "loss": 2.4734, + "epoch": 0.0465970064953403, + "grad_norm": 5.976453536420087, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.8202, "step": 330 }, { - "epoch": 0.06940658418955756, - "grad_norm": 6.484816379060119, - "learning_rate": 1.5395348837209303e-05, - "loss": 2.5781, + "epoch": 0.046738209545326176, + "grad_norm": 7.3463537451702905, + "learning_rate": 1.0376175548589342e-05, + "loss": 1.1936, "step": 331 }, { - "epoch": 0.06961627175508492, - "grad_norm": 6.057262863221625, - "learning_rate": 1.544186046511628e-05, - "loss": 2.7545, + "epoch": 0.04687941259531206, + "grad_norm": 6.617975055069619, + "learning_rate": 1.0407523510971788e-05, + "loss": 0.9834, "step": 332 }, { - "epoch": 0.0698259593206123, - "grad_norm": 5.799531411057827, - "learning_rate": 1.5488372093023255e-05, - "loss": 2.8803, + "epoch": 0.04702061564529794, + "grad_norm": 6.006700710428823, + "learning_rate": 1.0438871473354234e-05, + "loss": 1.1119, "step": 333 }, { - "epoch": 0.07003564688613965, - "grad_norm": 7.044371563199469, - "learning_rate": 1.5534883720930232e-05, - "loss": 2.6231, + "epoch": 0.04716181869528382, + "grad_norm": 5.617955004157817, + "learning_rate": 1.0470219435736678e-05, + "loss": 0.8598, "step": 334 }, { - "epoch": 0.07024533445166702, - "grad_norm": 5.974593860547467, - "learning_rate": 1.558139534883721e-05, - "loss": 2.856, + "epoch": 0.0473030217452697, + "grad_norm": 5.743124385312946, + "learning_rate": 1.0501567398119123e-05, + "loss": 0.9658, "step": 335 }, { - "epoch": 0.07045502201719438, - "grad_norm": 5.407087813680099, - "learning_rate": 1.5627906976744188e-05, - "loss": 2.8117, + "epoch": 0.04744422479525558, + "grad_norm": 6.73849235202559, + "learning_rate": 1.0532915360501567e-05, + "loss": 1.0877, "step": 336 }, { - "epoch": 0.07066470958272174, - "grad_norm": 5.744303926677864, - "learning_rate": 1.5674418604651165e-05, - "loss": 2.8545, + "epoch": 0.04758542784524146, + "grad_norm": 7.000001727656123, + "learning_rate": 1.0564263322884015e-05, + "loss": 1.0504, "step": 337 }, { - "epoch": 0.07087439714824911, - "grad_norm": 5.228189399069513, - "learning_rate": 1.572093023255814e-05, - "loss": 2.4886, + "epoch": 0.047726630895227336, + "grad_norm": 6.495995948036809, + "learning_rate": 1.0595611285266459e-05, + "loss": 1.02, "step": 338 }, { - "epoch": 0.07108408471377647, - "grad_norm": 6.035957800156823, - "learning_rate": 1.5767441860465117e-05, - "loss": 2.505, + "epoch": 0.04786783394521322, + "grad_norm": 6.361802727831078, + "learning_rate": 1.0626959247648904e-05, + "loss": 1.1229, "step": 339 }, { - "epoch": 0.07129377227930384, - "grad_norm": 5.719013219290617, - "learning_rate": 1.5813953488372095e-05, - "loss": 2.7319, + "epoch": 0.048009036995199095, + "grad_norm": 6.071181411988096, + "learning_rate": 1.0658307210031348e-05, + "loss": 0.8984, "step": 340 }, { - "epoch": 0.0715034598448312, - "grad_norm": 6.062696765411479, - "learning_rate": 1.5860465116279073e-05, - "loss": 2.6984, + "epoch": 0.04815024004518498, + "grad_norm": 6.487582735086764, + "learning_rate": 1.0689655172413792e-05, + "loss": 0.977, "step": 341 }, { - "epoch": 0.07171314741035857, - "grad_norm": 6.249201740756078, - "learning_rate": 1.5906976744186047e-05, - "loss": 2.658, + "epoch": 0.048291443095170854, + "grad_norm": 6.017610844470211, + "learning_rate": 1.072100313479624e-05, + "loss": 1.0152, "step": 342 }, { - "epoch": 0.07192283497588593, - "grad_norm": 5.834905026219851, - "learning_rate": 1.5953488372093025e-05, - "loss": 2.6295, + "epoch": 0.04843264614515674, + "grad_norm": 6.50504700821475, + "learning_rate": 1.0752351097178684e-05, + "loss": 0.9227, "step": 343 }, { - "epoch": 0.0721325225414133, - "grad_norm": 5.533371756152266, - "learning_rate": 1.6000000000000003e-05, - "loss": 2.7681, + "epoch": 0.04857384919514261, + "grad_norm": 6.164255154735449, + "learning_rate": 1.078369905956113e-05, + "loss": 1.0049, "step": 344 }, { - "epoch": 0.07234221010694065, - "grad_norm": 5.690360174250401, - "learning_rate": 1.6046511627906977e-05, - "loss": 2.4966, + "epoch": 0.048715052245128496, + "grad_norm": 8.051149001280118, + "learning_rate": 1.0815047021943574e-05, + "loss": 1.188, "step": 345 }, { - "epoch": 0.07255189767246802, - "grad_norm": 5.753144723036307, - "learning_rate": 1.6093023255813955e-05, - "loss": 2.7304, + "epoch": 0.04885625529511437, + "grad_norm": 7.261561894113257, + "learning_rate": 1.0846394984326021e-05, + "loss": 1.0836, "step": 346 }, { - "epoch": 0.07276158523799539, - "grad_norm": 5.83958583185687, - "learning_rate": 1.6139534883720932e-05, - "loss": 2.6349, + "epoch": 0.048997458345100255, + "grad_norm": 6.867974412226677, + "learning_rate": 1.0877742946708465e-05, + "loss": 1.0593, "step": 347 }, { - "epoch": 0.07297127280352275, - "grad_norm": 6.4398852781250016, - "learning_rate": 1.618604651162791e-05, - "loss": 2.6976, + "epoch": 0.04913866139508613, + "grad_norm": 7.17318256473674, + "learning_rate": 1.0909090909090909e-05, + "loss": 1.1374, "step": 348 }, { - "epoch": 0.07318096036905011, - "grad_norm": 5.735850012586663, - "learning_rate": 1.6232558139534884e-05, - "loss": 2.8161, + "epoch": 0.049279864445072014, + "grad_norm": 5.813809558691895, + "learning_rate": 1.0940438871473356e-05, + "loss": 0.8977, "step": 349 }, { - "epoch": 0.07339064793457747, - "grad_norm": 5.3063108217473935, - "learning_rate": 1.6279069767441862e-05, - "loss": 2.5386, + "epoch": 0.0494210674950579, + "grad_norm": 6.555088464855176, + "learning_rate": 1.09717868338558e-05, + "loss": 0.971, "step": 350 }, { - "epoch": 0.07360033550010485, - "grad_norm": 5.475051540165912, - "learning_rate": 1.632558139534884e-05, - "loss": 2.7839, + "epoch": 0.04956227054504377, + "grad_norm": 7.39669928960688, + "learning_rate": 1.1003134796238246e-05, + "loss": 1.4236, "step": 351 }, { - "epoch": 0.07381002306563221, - "grad_norm": 5.191799752934426, - "learning_rate": 1.6372093023255814e-05, - "loss": 2.7398, + "epoch": 0.049703473595029656, + "grad_norm": 6.984526566441196, + "learning_rate": 1.103448275862069e-05, + "loss": 1.104, "step": 352 }, { - "epoch": 0.07401971063115957, - "grad_norm": 6.5219365945949885, - "learning_rate": 1.6418604651162792e-05, - "loss": 2.6315, + "epoch": 0.04984467664501553, + "grad_norm": 7.007940824714307, + "learning_rate": 1.1065830721003134e-05, + "loss": 1.0743, "step": 353 }, { - "epoch": 0.07422939819668693, - "grad_norm": 6.511208446166503, - "learning_rate": 1.646511627906977e-05, - "loss": 2.6904, + "epoch": 0.049985879695001414, + "grad_norm": 6.64849902599171, + "learning_rate": 1.1097178683385581e-05, + "loss": 1.1107, "step": 354 }, { - "epoch": 0.07443908576221431, - "grad_norm": 5.586213650329757, - "learning_rate": 1.6511627906976747e-05, - "loss": 2.72, + "epoch": 0.05012708274498729, + "grad_norm": 6.141976025475596, + "learning_rate": 1.1128526645768025e-05, + "loss": 1.0355, "step": 355 }, { - "epoch": 0.07464877332774167, - "grad_norm": 5.77605073598907, - "learning_rate": 1.6558139534883722e-05, - "loss": 2.3914, + "epoch": 0.05026828579497317, + "grad_norm": 5.491538837751872, + "learning_rate": 1.1159874608150471e-05, + "loss": 0.9341, "step": 356 }, { - "epoch": 0.07485846089326903, - "grad_norm": 5.3201906015908875, - "learning_rate": 1.66046511627907e-05, - "loss": 2.2321, + "epoch": 0.05040948884495905, + "grad_norm": 5.722767102016528, + "learning_rate": 1.1191222570532915e-05, + "loss": 0.9994, "step": 357 }, { - "epoch": 0.07506814845879639, - "grad_norm": 5.638380031214597, - "learning_rate": 1.6651162790697674e-05, - "loss": 2.649, + "epoch": 0.05055069189494493, + "grad_norm": 6.631829679544342, + "learning_rate": 1.1222570532915362e-05, + "loss": 1.0061, "step": 358 }, { - "epoch": 0.07527783602432375, - "grad_norm": 6.692083446003021, - "learning_rate": 1.669767441860465e-05, - "loss": 2.9075, + "epoch": 0.05069189494493081, + "grad_norm": 7.281886895858746, + "learning_rate": 1.1253918495297806e-05, + "loss": 1.0628, "step": 359 }, { - "epoch": 0.07548752358985113, - "grad_norm": 6.270560780931083, - "learning_rate": 1.674418604651163e-05, - "loss": 2.6546, + "epoch": 0.05083309799491669, + "grad_norm": 7.933782928157221, + "learning_rate": 1.128526645768025e-05, + "loss": 1.0381, "step": 360 }, { - "epoch": 0.07569721115537849, - "grad_norm": 6.9541218281501385, - "learning_rate": 1.6790697674418607e-05, - "loss": 3.0673, + "epoch": 0.05097430104490257, + "grad_norm": 6.576475147230626, + "learning_rate": 1.1316614420062698e-05, + "loss": 1.0814, "step": 361 }, { - "epoch": 0.07590689872090585, - "grad_norm": 5.284210590793171, - "learning_rate": 1.6837209302325585e-05, - "loss": 2.6862, + "epoch": 0.05111550409488845, + "grad_norm": 6.209802222934887, + "learning_rate": 1.1347962382445142e-05, + "loss": 0.9795, "step": 362 }, { - "epoch": 0.07611658628643321, - "grad_norm": 5.858099499474185, - "learning_rate": 1.688372093023256e-05, - "loss": 2.9625, + "epoch": 0.051256707144874326, + "grad_norm": 7.786458666601698, + "learning_rate": 1.1379310344827587e-05, + "loss": 1.1773, "step": 363 }, { - "epoch": 0.07632627385196059, - "grad_norm": 6.269620854637626, - "learning_rate": 1.6930232558139537e-05, - "loss": 2.6235, + "epoch": 0.05139791019486021, + "grad_norm": 5.574790343903885, + "learning_rate": 1.1410658307210031e-05, + "loss": 1.1332, "step": 364 }, { - "epoch": 0.07653596141748795, - "grad_norm": 5.629066276393372, - "learning_rate": 1.697674418604651e-05, - "loss": 2.7425, + "epoch": 0.051539113244846085, + "grad_norm": 6.098493060813073, + "learning_rate": 1.1442006269592479e-05, + "loss": 1.0307, "step": 365 }, { - "epoch": 0.07674564898301531, - "grad_norm": 6.506410310740582, - "learning_rate": 1.702325581395349e-05, - "loss": 2.5681, + "epoch": 0.05168031629483197, + "grad_norm": 6.989261097373715, + "learning_rate": 1.1473354231974923e-05, + "loss": 0.9769, "step": 366 }, { - "epoch": 0.07695533654854267, - "grad_norm": 5.609809614006658, - "learning_rate": 1.7069767441860466e-05, - "loss": 2.6783, + "epoch": 0.05182151934481785, + "grad_norm": 5.826928191078909, + "learning_rate": 1.1504702194357367e-05, + "loss": 0.9813, "step": 367 }, { - "epoch": 0.07716502411407003, - "grad_norm": 6.013526338635598, - "learning_rate": 1.7116279069767444e-05, - "loss": 2.3599, + "epoch": 0.05196272239480373, + "grad_norm": 7.203358236222731, + "learning_rate": 1.1536050156739813e-05, + "loss": 1.1263, "step": 368 }, { - "epoch": 0.0773747116795974, - "grad_norm": 6.935955960212009, - "learning_rate": 1.7162790697674422e-05, - "loss": 2.9178, + "epoch": 0.05210392544478961, + "grad_norm": 6.24780742125562, + "learning_rate": 1.1567398119122257e-05, + "loss": 1.1283, "step": 369 }, { - "epoch": 0.07758439924512477, - "grad_norm": 6.187788175726091, - "learning_rate": 1.7209302325581396e-05, - "loss": 2.7254, + "epoch": 0.052245128494775486, + "grad_norm": 6.048084193381313, + "learning_rate": 1.1598746081504704e-05, + "loss": 1.2, "step": 370 }, { - "epoch": 0.07779408681065213, - "grad_norm": 6.328826494689137, - "learning_rate": 1.7255813953488374e-05, - "loss": 2.5233, + "epoch": 0.05238633154476137, + "grad_norm": 6.511947459120634, + "learning_rate": 1.1630094043887148e-05, + "loss": 1.0383, "step": 371 }, { - "epoch": 0.07800377437617949, - "grad_norm": 5.518535549260242, - "learning_rate": 1.7302325581395348e-05, - "loss": 2.7285, + "epoch": 0.052527534594747245, + "grad_norm": 5.908367494278469, + "learning_rate": 1.1661442006269594e-05, + "loss": 0.9815, "step": 372 }, { - "epoch": 0.07821346194170686, - "grad_norm": 5.773188387861025, - "learning_rate": 1.7348837209302326e-05, - "loss": 2.6425, + "epoch": 0.05266873764473313, + "grad_norm": 6.322198148637384, + "learning_rate": 1.169278996865204e-05, + "loss": 0.9974, "step": 373 }, { - "epoch": 0.07842314950723422, - "grad_norm": 5.909261393230419, - "learning_rate": 1.7395348837209304e-05, - "loss": 2.6059, + "epoch": 0.052809940694719004, + "grad_norm": 6.423055168612659, + "learning_rate": 1.1724137931034483e-05, + "loss": 1.0798, "step": 374 }, { - "epoch": 0.07863283707276159, - "grad_norm": 5.5539565046327795, - "learning_rate": 1.744186046511628e-05, - "loss": 2.6028, + "epoch": 0.05295114374470489, + "grad_norm": 6.0639015615665315, + "learning_rate": 1.1755485893416929e-05, + "loss": 0.9342, "step": 375 }, { - "epoch": 0.07884252463828895, - "grad_norm": 6.503018069121384, - "learning_rate": 1.748837209302326e-05, - "loss": 2.9041, + "epoch": 0.05309234679469076, + "grad_norm": 6.8992590351614655, + "learning_rate": 1.1786833855799373e-05, + "loss": 0.9345, "step": 376 }, { - "epoch": 0.07905221220381631, - "grad_norm": 5.25224348650878, - "learning_rate": 1.7534883720930233e-05, - "loss": 2.7723, + "epoch": 0.053233549844676646, + "grad_norm": 5.873807882193555, + "learning_rate": 1.181818181818182e-05, + "loss": 1.0018, "step": 377 }, { - "epoch": 0.07926189976934368, - "grad_norm": 6.589235208720605, - "learning_rate": 1.758139534883721e-05, - "loss": 2.8494, + "epoch": 0.05337475289466252, + "grad_norm": 6.609613070967434, + "learning_rate": 1.1849529780564264e-05, + "loss": 1.284, "step": 378 }, { - "epoch": 0.07947158733487104, - "grad_norm": 6.6664500399532365, - "learning_rate": 1.7627906976744185e-05, - "loss": 2.8985, + "epoch": 0.053515955944648405, + "grad_norm": 5.698838549028839, + "learning_rate": 1.188087774294671e-05, + "loss": 0.9497, "step": 379 }, { - "epoch": 0.0796812749003984, - "grad_norm": 6.6145968018730095, - "learning_rate": 1.7674418604651163e-05, - "loss": 2.7433, + "epoch": 0.05365715899463428, + "grad_norm": 6.072739066698938, + "learning_rate": 1.1912225705329154e-05, + "loss": 1.0029, "step": 380 }, { - "epoch": 0.07989096246592577, - "grad_norm": 5.9400843764979845, - "learning_rate": 1.772093023255814e-05, - "loss": 2.6454, + "epoch": 0.053798362044620164, + "grad_norm": 6.588778790896909, + "learning_rate": 1.1943573667711598e-05, + "loss": 1.1751, "step": 381 }, { - "epoch": 0.08010065003145314, - "grad_norm": 5.345420643176109, - "learning_rate": 1.776744186046512e-05, - "loss": 2.6805, + "epoch": 0.05393956509460604, + "grad_norm": 6.118528728411645, + "learning_rate": 1.1974921630094045e-05, + "loss": 1.1642, "step": 382 }, { - "epoch": 0.0803103375969805, - "grad_norm": 5.434026829259804, - "learning_rate": 1.7813953488372096e-05, - "loss": 2.8746, + "epoch": 0.05408076814459192, + "grad_norm": 6.252547973893893, + "learning_rate": 1.200626959247649e-05, + "loss": 1.1578, "step": 383 }, { - "epoch": 0.08052002516250786, - "grad_norm": 6.511662481541995, - "learning_rate": 1.786046511627907e-05, - "loss": 2.7112, + "epoch": 0.054221971194577806, + "grad_norm": 6.535542859953819, + "learning_rate": 1.2037617554858935e-05, + "loss": 1.2021, "step": 384 }, { - "epoch": 0.08072971272803522, - "grad_norm": 5.102070572258234, - "learning_rate": 1.790697674418605e-05, - "loss": 2.6143, + "epoch": 0.05436317424456368, + "grad_norm": 6.710044909907913, + "learning_rate": 1.206896551724138e-05, + "loss": 1.2086, "step": 385 }, { - "epoch": 0.08093940029356259, - "grad_norm": 5.604191365388476, - "learning_rate": 1.7953488372093023e-05, - "loss": 2.6727, + "epoch": 0.054504377294549565, + "grad_norm": 6.81495157343338, + "learning_rate": 1.2100313479623827e-05, + "loss": 1.0907, "step": 386 }, { - "epoch": 0.08114908785908996, - "grad_norm": 5.459784455774207, - "learning_rate": 1.8e-05, - "loss": 2.4293, + "epoch": 0.05464558034453544, + "grad_norm": 5.634081917231528, + "learning_rate": 1.213166144200627e-05, + "loss": 0.9641, "step": 387 }, { - "epoch": 0.08135877542461732, - "grad_norm": 5.3878168111739875, - "learning_rate": 1.8046511627906978e-05, - "loss": 2.6991, + "epoch": 0.054786783394521324, + "grad_norm": 6.170800752005018, + "learning_rate": 1.2163009404388715e-05, + "loss": 1.0229, "step": 388 }, { - "epoch": 0.08156846299014468, - "grad_norm": 4.948042673945027, - "learning_rate": 1.8093023255813956e-05, - "loss": 2.725, + "epoch": 0.0549279864445072, + "grad_norm": 5.595215697006681, + "learning_rate": 1.2194357366771162e-05, + "loss": 0.9963, "step": 389 }, { - "epoch": 0.08177815055567204, - "grad_norm": 6.510208918388066, - "learning_rate": 1.813953488372093e-05, - "loss": 2.6172, + "epoch": 0.05506918949449308, + "grad_norm": 7.311732811142929, + "learning_rate": 1.2225705329153606e-05, + "loss": 1.0786, "step": 390 }, { - "epoch": 0.08198783812119942, - "grad_norm": 6.386512430603227, - "learning_rate": 1.8186046511627908e-05, - "loss": 2.8203, + "epoch": 0.05521039254447896, + "grad_norm": 6.0555475349761485, + "learning_rate": 1.2257053291536052e-05, + "loss": 0.8199, "step": 391 }, { - "epoch": 0.08219752568672678, - "grad_norm": 4.695139056701934, - "learning_rate": 1.8232558139534886e-05, - "loss": 2.6786, + "epoch": 0.05535159559446484, + "grad_norm": 7.586531151959653, + "learning_rate": 1.2288401253918496e-05, + "loss": 1.1295, "step": 392 }, { - "epoch": 0.08240721325225414, - "grad_norm": 4.6082432474810195, - "learning_rate": 1.827906976744186e-05, - "loss": 2.9227, + "epoch": 0.05549279864445072, + "grad_norm": 8.536236638622649, + "learning_rate": 1.2319749216300943e-05, + "loss": 0.9228, "step": 393 }, { - "epoch": 0.0826169008177815, - "grad_norm": 6.1298162665772855, - "learning_rate": 1.8325581395348838e-05, - "loss": 2.8086, + "epoch": 0.0556340016944366, + "grad_norm": 7.023745557804657, + "learning_rate": 1.2351097178683387e-05, + "loss": 1.0789, "step": 394 }, { - "epoch": 0.08282658838330886, - "grad_norm": 5.234715116361226, - "learning_rate": 1.8372093023255815e-05, - "loss": 2.8152, + "epoch": 0.05577520474442248, + "grad_norm": 5.884896707857233, + "learning_rate": 1.2382445141065831e-05, + "loss": 1.0251, "step": 395 }, { - "epoch": 0.08303627594883624, - "grad_norm": 5.902818586391133, - "learning_rate": 1.8418604651162793e-05, - "loss": 2.7639, + "epoch": 0.05591640779440836, + "grad_norm": 5.754018586417228, + "learning_rate": 1.2413793103448277e-05, + "loss": 1.0936, "step": 396 }, { - "epoch": 0.0832459635143636, - "grad_norm": 5.927165057197696, - "learning_rate": 1.8465116279069767e-05, - "loss": 2.6674, + "epoch": 0.056057610844394236, + "grad_norm": 6.08443834146453, + "learning_rate": 1.244514106583072e-05, + "loss": 1.0688, "step": 397 }, { - "epoch": 0.08345565107989096, - "grad_norm": 6.251341397312741, - "learning_rate": 1.8511627906976745e-05, - "loss": 2.8503, + "epoch": 0.05619881389438012, + "grad_norm": 8.164243328699555, + "learning_rate": 1.2476489028213168e-05, + "loss": 1.0192, "step": 398 }, { - "epoch": 0.08366533864541832, - "grad_norm": 5.6091230543080215, - "learning_rate": 1.8558139534883723e-05, - "loss": 2.7752, + "epoch": 0.056340016944366, + "grad_norm": 6.383066440696785, + "learning_rate": 1.2507836990595612e-05, + "loss": 1.18, "step": 399 }, { - "epoch": 0.0838750262109457, - "grad_norm": 4.917752911305816, - "learning_rate": 1.86046511627907e-05, - "loss": 2.7482, + "epoch": 0.05648121999435188, + "grad_norm": 5.732033358217053, + "learning_rate": 1.2539184952978058e-05, + "loss": 1.0299, "step": 400 }, { - "epoch": 0.08408471377647306, - "grad_norm": 5.125438800059427, - "learning_rate": 1.865116279069768e-05, - "loss": 2.7315, + "epoch": 0.05662242304433776, + "grad_norm": 5.5882468073872476, + "learning_rate": 1.2570532915360503e-05, + "loss": 1.0071, "step": 401 }, { - "epoch": 0.08429440134200042, - "grad_norm": 5.286615016197741, - "learning_rate": 1.8697674418604653e-05, - "loss": 2.6158, + "epoch": 0.05676362609432364, + "grad_norm": 6.451365586740487, + "learning_rate": 1.2601880877742947e-05, + "loss": 1.0087, "step": 402 }, { - "epoch": 0.08450408890752778, - "grad_norm": 5.304510797463541, - "learning_rate": 1.874418604651163e-05, - "loss": 2.5846, + "epoch": 0.05690482914430952, + "grad_norm": 6.328321035610544, + "learning_rate": 1.2633228840125393e-05, + "loss": 1.1824, "step": 403 }, { - "epoch": 0.08471377647305514, - "grad_norm": 4.792311878902139, - "learning_rate": 1.8790697674418605e-05, - "loss": 2.7005, + "epoch": 0.057046032194295396, + "grad_norm": 5.778553569926996, + "learning_rate": 1.2664576802507837e-05, + "loss": 1.0006, "step": 404 }, { - "epoch": 0.08492346403858252, - "grad_norm": 5.617132431670244, - "learning_rate": 1.8837209302325582e-05, - "loss": 2.9455, + "epoch": 0.05718723524428128, + "grad_norm": 6.161754346703109, + "learning_rate": 1.2695924764890284e-05, + "loss": 1.055, "step": 405 }, { - "epoch": 0.08513315160410988, - "grad_norm": 5.944688564217573, - "learning_rate": 1.888372093023256e-05, - "loss": 2.5341, + "epoch": 0.057328438294267155, + "grad_norm": 6.4316646835760185, + "learning_rate": 1.2727272727272728e-05, + "loss": 1.1508, "step": 406 }, { - "epoch": 0.08534283916963724, - "grad_norm": 5.457418856033933, - "learning_rate": 1.8930232558139538e-05, - "loss": 2.707, + "epoch": 0.05746964134425304, + "grad_norm": 6.464118752577883, + "learning_rate": 1.2758620689655174e-05, + "loss": 1.1197, "step": 407 }, { - "epoch": 0.0855525267351646, - "grad_norm": 7.0496032230026735, - "learning_rate": 1.8976744186046516e-05, - "loss": 2.9835, + "epoch": 0.057610844394238914, + "grad_norm": 6.230579768015557, + "learning_rate": 1.2789968652037618e-05, + "loss": 1.0087, "step": 408 }, { - "epoch": 0.08576221430069197, - "grad_norm": 5.520813416086713, - "learning_rate": 1.902325581395349e-05, - "loss": 2.5473, + "epoch": 0.0577520474442248, + "grad_norm": 6.3455758299734395, + "learning_rate": 1.2821316614420062e-05, + "loss": 1.0038, "step": 409 }, { - "epoch": 0.08597190186621934, - "grad_norm": 4.87398199825445, - "learning_rate": 1.9069767441860468e-05, - "loss": 2.4163, + "epoch": 0.05789325049421067, + "grad_norm": 6.869550755127195, + "learning_rate": 1.285266457680251e-05, + "loss": 1.0798, "step": 410 }, { - "epoch": 0.0861815894317467, - "grad_norm": 6.251413406652676, - "learning_rate": 1.9116279069767442e-05, - "loss": 2.6383, + "epoch": 0.058034453544196556, + "grad_norm": 7.5630217101904105, + "learning_rate": 1.2884012539184954e-05, + "loss": 1.2505, "step": 411 }, { - "epoch": 0.08639127699727406, - "grad_norm": 5.2699362722532, - "learning_rate": 1.916279069767442e-05, - "loss": 2.4343, + "epoch": 0.05817565659418243, + "grad_norm": 6.154565001301907, + "learning_rate": 1.29153605015674e-05, + "loss": 0.9161, "step": 412 }, { - "epoch": 0.08660096456280142, - "grad_norm": 5.107110416988616, - "learning_rate": 1.9209302325581397e-05, - "loss": 2.5345, + "epoch": 0.058316859644168315, + "grad_norm": 6.421016396089281, + "learning_rate": 1.2946708463949845e-05, + "loss": 1.172, "step": 413 }, { - "epoch": 0.0868106521283288, - "grad_norm": 6.6423414665446705, - "learning_rate": 1.9255813953488375e-05, - "loss": 2.9373, + "epoch": 0.05845806269415419, + "grad_norm": 7.237135973611175, + "learning_rate": 1.297805642633229e-05, + "loss": 1.2409, "step": 414 }, { - "epoch": 0.08702033969385616, - "grad_norm": 5.349537171025931, - "learning_rate": 1.9302325581395353e-05, - "loss": 2.7612, + "epoch": 0.058599265744140074, + "grad_norm": 6.964626385298887, + "learning_rate": 1.3009404388714735e-05, + "loss": 1.2627, "step": 415 }, { - "epoch": 0.08723002725938352, - "grad_norm": 6.576590652857589, - "learning_rate": 1.9348837209302327e-05, - "loss": 2.7716, + "epoch": 0.058740468794125957, + "grad_norm": 6.130986684964869, + "learning_rate": 1.3040752351097179e-05, + "loss": 0.8719, "step": 416 }, { - "epoch": 0.08743971482491088, - "grad_norm": 6.006553360145802, - "learning_rate": 1.9395348837209305e-05, - "loss": 2.8141, + "epoch": 0.05888167184411183, + "grad_norm": 6.4891033533190035, + "learning_rate": 1.3072100313479626e-05, + "loss": 1.1481, "step": 417 }, { - "epoch": 0.08764940239043825, - "grad_norm": 5.556741814789794, - "learning_rate": 1.944186046511628e-05, - "loss": 2.6062, + "epoch": 0.059022874894097715, + "grad_norm": 6.261390374690267, + "learning_rate": 1.310344827586207e-05, + "loss": 0.9482, "step": 418 }, { - "epoch": 0.08785908995596561, - "grad_norm": 5.407688232080843, - "learning_rate": 1.9488372093023257e-05, - "loss": 2.6978, + "epoch": 0.05916407794408359, + "grad_norm": 7.39329935205469, + "learning_rate": 1.3134796238244516e-05, + "loss": 1.0942, "step": 419 }, { - "epoch": 0.08806877752149297, - "grad_norm": 5.456501422868993, - "learning_rate": 1.9534883720930235e-05, - "loss": 2.5827, + "epoch": 0.059305280994069474, + "grad_norm": 6.324103294687855, + "learning_rate": 1.316614420062696e-05, + "loss": 1.0391, "step": 420 }, { - "epoch": 0.08827846508702034, - "grad_norm": 6.386595479843006, - "learning_rate": 1.9581395348837212e-05, - "loss": 2.8944, + "epoch": 0.05944648404405535, + "grad_norm": 6.788249018384576, + "learning_rate": 1.3197492163009404e-05, + "loss": 1.1609, "step": 421 }, { - "epoch": 0.08848815265254771, - "grad_norm": 6.692831232313039, - "learning_rate": 1.9627906976744187e-05, - "loss": 2.5248, + "epoch": 0.05958768709404123, + "grad_norm": 6.571402566366432, + "learning_rate": 1.3228840125391851e-05, + "loss": 1.3202, "step": 422 }, { - "epoch": 0.08869784021807507, - "grad_norm": 5.341320791247144, - "learning_rate": 1.9674418604651164e-05, - "loss": 2.3906, + "epoch": 0.05972889014402711, + "grad_norm": 5.9262192112467345, + "learning_rate": 1.3260188087774295e-05, + "loss": 1.0198, "step": 423 }, { - "epoch": 0.08890752778360243, - "grad_norm": 5.70436415960133, - "learning_rate": 1.9720930232558142e-05, - "loss": 2.575, + "epoch": 0.05987009319401299, + "grad_norm": 5.935144328380646, + "learning_rate": 1.329153605015674e-05, + "loss": 1.083, "step": 424 }, { - "epoch": 0.0891172153491298, - "grad_norm": 5.0890164850281625, - "learning_rate": 1.9767441860465116e-05, - "loss": 2.7715, + "epoch": 0.06001129624399887, + "grad_norm": 6.09784368949035, + "learning_rate": 1.3322884012539186e-05, + "loss": 1.2458, "step": 425 }, { - "epoch": 0.08932690291465716, - "grad_norm": 5.721675549050049, - "learning_rate": 1.9813953488372094e-05, - "loss": 2.845, + "epoch": 0.06015249929398475, + "grad_norm": 6.244017748538626, + "learning_rate": 1.3354231974921632e-05, + "loss": 1.1666, "step": 426 }, { - "epoch": 0.08953659048018453, - "grad_norm": 5.857880106965985, - "learning_rate": 1.9860465116279072e-05, - "loss": 2.6757, + "epoch": 0.06029370234397063, + "grad_norm": 6.061248209200218, + "learning_rate": 1.3385579937304076e-05, + "loss": 0.9919, "step": 427 }, { - "epoch": 0.08974627804571189, - "grad_norm": 5.273886980398418, - "learning_rate": 1.990697674418605e-05, - "loss": 2.705, + "epoch": 0.06043490539395651, + "grad_norm": 6.296866126158882, + "learning_rate": 1.341692789968652e-05, + "loss": 1.1745, "step": 428 }, { - "epoch": 0.08995596561123925, - "grad_norm": 5.414827969068109, - "learning_rate": 1.9953488372093024e-05, - "loss": 2.5024, + "epoch": 0.060576108443942386, + "grad_norm": 6.445561375865732, + "learning_rate": 1.3448275862068967e-05, + "loss": 0.9518, "step": 429 }, { - "epoch": 0.09016565317676661, - "grad_norm": 6.0070052114533325, - "learning_rate": 2e-05, - "loss": 2.53, + "epoch": 0.06071731149392827, + "grad_norm": 7.230463456975804, + "learning_rate": 1.3479623824451411e-05, + "loss": 1.1376, "step": 430 }, { - "epoch": 0.09037534074229399, - "grad_norm": 5.960425903800965, - "learning_rate": 1.999999974374133e-05, - "loss": 2.6704, + "epoch": 0.060858514543914145, + "grad_norm": 6.959747448277786, + "learning_rate": 1.3510971786833857e-05, + "loss": 1.0553, "step": 431 }, { - "epoch": 0.09058502830782135, - "grad_norm": 6.41635429498773, - "learning_rate": 1.9999998974965327e-05, - "loss": 2.5043, + "epoch": 0.06099971759390003, + "grad_norm": 6.595465120612675, + "learning_rate": 1.3542319749216301e-05, + "loss": 1.073, "step": 432 }, { - "epoch": 0.09079471587334871, - "grad_norm": 5.621730372574778, - "learning_rate": 1.999999769367203e-05, - "loss": 2.7389, + "epoch": 0.06114092064388591, + "grad_norm": 5.383404595788152, + "learning_rate": 1.3573667711598749e-05, + "loss": 0.885, "step": 433 }, { - "epoch": 0.09100440343887607, - "grad_norm": 5.884022718989794, - "learning_rate": 1.999999589986151e-05, - "loss": 2.8743, + "epoch": 0.06128212369387179, + "grad_norm": 6.119023697119455, + "learning_rate": 1.3605015673981193e-05, + "loss": 1.3059, "step": 434 }, { - "epoch": 0.09121409100440343, - "grad_norm": 6.437487178912405, - "learning_rate": 1.9999993593533858e-05, - "loss": 2.6191, + "epoch": 0.06142332674385767, + "grad_norm": 6.617801794193726, + "learning_rate": 1.3636363636363637e-05, + "loss": 1.2297, "step": 435 }, { - "epoch": 0.09142377856993081, - "grad_norm": 5.472045448097777, - "learning_rate": 1.9999990774689186e-05, - "loss": 2.7156, + "epoch": 0.061564529793843546, + "grad_norm": 5.7943172628970565, + "learning_rate": 1.3667711598746082e-05, + "loss": 1.0181, "step": 436 }, { - "epoch": 0.09163346613545817, - "grad_norm": 5.9600134087330625, - "learning_rate": 1.9999987443327645e-05, - "loss": 2.5117, + "epoch": 0.06170573284382943, + "grad_norm": 6.179960661932418, + "learning_rate": 1.3699059561128526e-05, + "loss": 0.9769, "step": 437 }, { - "epoch": 0.09184315370098553, - "grad_norm": 5.040329827674821, - "learning_rate": 1.9999983599449403e-05, - "loss": 2.6345, + "epoch": 0.061846935893815305, + "grad_norm": 6.412368905043698, + "learning_rate": 1.3730407523510974e-05, + "loss": 0.9235, "step": 438 }, { - "epoch": 0.09205284126651289, - "grad_norm": 5.4961592366018674, - "learning_rate": 1.999997924305466e-05, - "loss": 2.6613, + "epoch": 0.06198813894380119, + "grad_norm": 5.756054839510773, + "learning_rate": 1.3761755485893418e-05, + "loss": 1.0357, "step": 439 }, { - "epoch": 0.09226252883204027, - "grad_norm": 5.7771173965444005, - "learning_rate": 1.9999974374143632e-05, - "loss": 2.8014, + "epoch": 0.062129341993787064, + "grad_norm": 5.4813663413032385, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.8484, "step": 440 }, { - "epoch": 0.09247221639756763, - "grad_norm": 5.71809983482399, - "learning_rate": 1.9999968992716574e-05, - "loss": 2.6042, + "epoch": 0.06227054504377295, + "grad_norm": 6.965157066220166, + "learning_rate": 1.3824451410658309e-05, + "loss": 1.0424, "step": 441 }, { - "epoch": 0.09268190396309499, - "grad_norm": 5.402741944518257, - "learning_rate": 1.9999963098773763e-05, - "loss": 2.6389, + "epoch": 0.06241174809375882, + "grad_norm": 5.644234255718042, + "learning_rate": 1.3855799373040753e-05, + "loss": 1.0126, "step": 442 }, { - "epoch": 0.09289159152862235, - "grad_norm": 4.91373081975557, - "learning_rate": 1.9999956692315496e-05, - "loss": 2.5403, + "epoch": 0.0625529511437447, + "grad_norm": 6.035212831918365, + "learning_rate": 1.3887147335423199e-05, + "loss": 1.0505, "step": 443 }, { - "epoch": 0.09310127909414971, - "grad_norm": 6.049854120263461, - "learning_rate": 1.999994977334211e-05, - "loss": 2.6504, + "epoch": 0.06269415419373059, + "grad_norm": 7.256280102345634, + "learning_rate": 1.3918495297805643e-05, + "loss": 1.1926, "step": 444 }, { - "epoch": 0.09331096665967709, - "grad_norm": 4.83617941446969, - "learning_rate": 1.999994234185395e-05, - "loss": 2.6666, + "epoch": 0.06283535724371646, + "grad_norm": 6.182333192512244, + "learning_rate": 1.394984326018809e-05, + "loss": 1.1631, "step": 445 }, { - "epoch": 0.09352065422520445, - "grad_norm": 5.375626449202231, - "learning_rate": 1.99999343978514e-05, - "loss": 2.7496, + "epoch": 0.06297656029370234, + "grad_norm": 5.800825645136416, + "learning_rate": 1.3981191222570534e-05, + "loss": 0.9231, "step": 446 }, { - "epoch": 0.09373034179073181, - "grad_norm": 5.80436245690412, - "learning_rate": 1.9999925941334873e-05, - "loss": 2.7277, + "epoch": 0.06311776334368822, + "grad_norm": 6.549637896102323, + "learning_rate": 1.401253918495298e-05, + "loss": 1.1561, "step": 447 }, { - "epoch": 0.09394002935625917, - "grad_norm": 5.588237511682181, - "learning_rate": 1.9999916972304795e-05, - "loss": 2.5495, + "epoch": 0.06325896639367411, + "grad_norm": 6.17637429100724, + "learning_rate": 1.4043887147335424e-05, + "loss": 1.1849, "step": 448 }, { - "epoch": 0.09414971692178654, - "grad_norm": 5.783832722230089, - "learning_rate": 1.9999907490761624e-05, - "loss": 2.7139, + "epoch": 0.06340016944365999, + "grad_norm": 7.240368391150512, + "learning_rate": 1.4075235109717868e-05, + "loss": 1.1659, "step": 449 }, { - "epoch": 0.0943594044873139, - "grad_norm": 6.0148287387028345, - "learning_rate": 1.9999897496705856e-05, - "loss": 2.8294, + "epoch": 0.06354137249364586, + "grad_norm": 5.807241012418055, + "learning_rate": 1.4106583072100315e-05, + "loss": 1.0264, "step": 450 }, { - "epoch": 0.09456909205284127, - "grad_norm": 4.823691801875174, - "learning_rate": 1.9999886990137995e-05, - "loss": 2.4045, + "epoch": 0.06368257554363174, + "grad_norm": 6.71173316611635, + "learning_rate": 1.4137931034482759e-05, + "loss": 1.1706, "step": 451 }, { - "epoch": 0.09477877961836863, - "grad_norm": 5.664666742946905, - "learning_rate": 1.999987597105858e-05, - "loss": 2.8138, + "epoch": 0.06382377859361762, + "grad_norm": 6.169610363982729, + "learning_rate": 1.4169278996865205e-05, + "loss": 1.1525, "step": 452 }, { - "epoch": 0.09498846718389599, - "grad_norm": 5.175960849309011, - "learning_rate": 1.999986443946818e-05, - "loss": 2.5709, + "epoch": 0.06396498164360351, + "grad_norm": 6.67150414627448, + "learning_rate": 1.420062695924765e-05, + "loss": 1.2384, "step": 453 }, { - "epoch": 0.09519815474942336, - "grad_norm": 5.4327949225361065, - "learning_rate": 1.9999852395367384e-05, - "loss": 2.8675, + "epoch": 0.06410618469358938, + "grad_norm": 6.919929841468454, + "learning_rate": 1.4231974921630096e-05, + "loss": 1.3341, "step": 454 }, { - "epoch": 0.09540784231495072, - "grad_norm": 6.5224952842492305, - "learning_rate": 1.999983983875681e-05, - "loss": 2.7747, + "epoch": 0.06424738774357526, + "grad_norm": 5.595472676314099, + "learning_rate": 1.426332288401254e-05, + "loss": 0.9739, "step": 455 }, { - "epoch": 0.09561752988047809, - "grad_norm": 6.785685535621866, - "learning_rate": 1.99998267696371e-05, - "loss": 2.6202, + "epoch": 0.06438859079356114, + "grad_norm": 7.341705285146319, + "learning_rate": 1.4294670846394984e-05, + "loss": 1.1755, "step": 456 }, { - "epoch": 0.09582721744600545, - "grad_norm": 7.9980755631953535, - "learning_rate": 1.999981318800892e-05, - "loss": 2.8397, + "epoch": 0.06452979384354703, + "grad_norm": 6.607899093283406, + "learning_rate": 1.4326018808777432e-05, + "loss": 1.1249, "step": 457 }, { - "epoch": 0.09603690501153282, - "grad_norm": 5.624821398469246, - "learning_rate": 1.9999799093872974e-05, - "loss": 2.6769, + "epoch": 0.0646709968935329, + "grad_norm": 6.205452324610187, + "learning_rate": 1.4357366771159876e-05, + "loss": 1.1244, "step": 458 }, { - "epoch": 0.09624659257706018, - "grad_norm": 5.60366178340573, - "learning_rate": 1.9999784487229978e-05, - "loss": 2.6743, + "epoch": 0.06481219994351878, + "grad_norm": 6.422620522724941, + "learning_rate": 1.4388714733542321e-05, + "loss": 1.0215, "step": 459 }, { - "epoch": 0.09645628014258754, - "grad_norm": 5.290331678024499, - "learning_rate": 1.9999769368080684e-05, - "loss": 2.3815, + "epoch": 0.06495340299350466, + "grad_norm": 5.513568377439427, + "learning_rate": 1.4420062695924765e-05, + "loss": 1.0523, "step": 460 }, { - "epoch": 0.0966659677081149, - "grad_norm": 6.199980941313438, - "learning_rate": 1.999975373642587e-05, - "loss": 2.6348, + "epoch": 0.06509460604349054, + "grad_norm": 6.863710439073776, + "learning_rate": 1.4451410658307213e-05, + "loss": 1.0948, "step": 461 }, { - "epoch": 0.09687565527364227, - "grad_norm": 5.837642116719691, - "learning_rate": 1.999973759226633e-05, - "loss": 2.6432, + "epoch": 0.06523580909347641, + "grad_norm": 6.466814014679621, + "learning_rate": 1.4482758620689657e-05, + "loss": 1.2171, "step": 462 }, { - "epoch": 0.09708534283916964, - "grad_norm": 5.713414124198615, - "learning_rate": 1.9999720935602896e-05, - "loss": 2.5827, + "epoch": 0.0653770121434623, + "grad_norm": 6.571709729956653, + "learning_rate": 1.45141065830721e-05, + "loss": 1.1121, "step": 463 }, { - "epoch": 0.097295030404697, - "grad_norm": 5.634915914795623, - "learning_rate": 1.9999703766436416e-05, - "loss": 2.6695, + "epoch": 0.06551821519344818, + "grad_norm": 7.45591231216958, + "learning_rate": 1.4545454545454546e-05, + "loss": 1.1951, "step": 464 }, { - "epoch": 0.09750471797022436, - "grad_norm": 5.6630857711538045, - "learning_rate": 1.999968608476778e-05, - "loss": 2.6379, + "epoch": 0.06565941824343406, + "grad_norm": 6.460185508560586, + "learning_rate": 1.4576802507836992e-05, + "loss": 1.2531, "step": 465 }, { - "epoch": 0.09771440553575172, - "grad_norm": 5.66602732631865, - "learning_rate": 1.9999667890597885e-05, - "loss": 2.7869, + "epoch": 0.06580062129341994, + "grad_norm": 5.472168217526706, + "learning_rate": 1.4608150470219438e-05, + "loss": 0.9932, "step": 466 }, { - "epoch": 0.0979240931012791, - "grad_norm": 5.308354869384293, - "learning_rate": 1.9999649183927673e-05, - "loss": 2.6358, + "epoch": 0.06594182434340581, + "grad_norm": 5.609935378754374, + "learning_rate": 1.4639498432601882e-05, + "loss": 1.0227, "step": 467 }, { - "epoch": 0.09813378066680646, - "grad_norm": 5.496323230462743, - "learning_rate": 1.9999629964758093e-05, - "loss": 2.7075, + "epoch": 0.0660830273933917, + "grad_norm": 5.856835217316755, + "learning_rate": 1.4670846394984329e-05, + "loss": 1.0027, "step": 468 }, { - "epoch": 0.09834346823233382, - "grad_norm": 5.542408691249312, - "learning_rate": 1.999961023309013e-05, - "loss": 2.7467, + "epoch": 0.06622423044337758, + "grad_norm": 6.897239918818994, + "learning_rate": 1.4702194357366773e-05, + "loss": 1.0837, "step": 469 }, { - "epoch": 0.09855315579786118, - "grad_norm": 5.422128619389672, - "learning_rate": 1.9999589988924804e-05, - "loss": 2.7005, + "epoch": 0.06636543349336346, + "grad_norm": 5.075493341272089, + "learning_rate": 1.4733542319749217e-05, + "loss": 1.0563, "step": 470 }, { - "epoch": 0.09876284336338854, - "grad_norm": 5.274937203640006, - "learning_rate": 1.9999569232263145e-05, - "loss": 2.6139, + "epoch": 0.06650663654334933, + "grad_norm": 6.487047601012693, + "learning_rate": 1.4764890282131663e-05, + "loss": 1.164, "step": 471 }, { - "epoch": 0.09897253092891592, - "grad_norm": 5.9220959491954215, - "learning_rate": 1.999954796310622e-05, - "loss": 2.6594, + "epoch": 0.06664783959333521, + "grad_norm": 5.561060553990291, + "learning_rate": 1.4796238244514107e-05, + "loss": 0.9517, "step": 472 }, { - "epoch": 0.09918221849444328, - "grad_norm": 5.39217619898896, - "learning_rate": 1.9999526181455116e-05, - "loss": 2.6863, + "epoch": 0.0667890426433211, + "grad_norm": 5.306478619977663, + "learning_rate": 1.4827586206896554e-05, + "loss": 0.9706, "step": 473 }, { - "epoch": 0.09939190605997064, - "grad_norm": 5.90160115406471, - "learning_rate": 1.999950388731096e-05, - "loss": 2.6227, + "epoch": 0.06693024569330698, + "grad_norm": 5.545504602796221, + "learning_rate": 1.4858934169278998e-05, + "loss": 1.0224, "step": 474 }, { - "epoch": 0.099601593625498, - "grad_norm": 5.98777534779657, - "learning_rate": 1.999948108067488e-05, - "loss": 2.506, + "epoch": 0.06707144874329285, + "grad_norm": 6.096402444223812, + "learning_rate": 1.4890282131661444e-05, + "loss": 1.2514, "step": 475 }, { - "epoch": 0.09981128119102538, - "grad_norm": 6.196923671026067, - "learning_rate": 1.9999457761548054e-05, - "loss": 2.7249, + "epoch": 0.06721265179327873, + "grad_norm": 7.044657635371257, + "learning_rate": 1.4921630094043888e-05, + "loss": 1.3064, "step": 476 }, { - "epoch": 0.10002096875655274, - "grad_norm": 5.470458354976303, - "learning_rate": 1.999943392993167e-05, - "loss": 2.6591, + "epoch": 0.06735385484326462, + "grad_norm": 6.974941097082132, + "learning_rate": 1.4952978056426334e-05, + "loss": 1.1552, "step": 477 }, { - "epoch": 0.1002306563220801, - "grad_norm": 5.490708029945062, - "learning_rate": 1.999940958582696e-05, - "loss": 2.4634, + "epoch": 0.0674950578932505, + "grad_norm": 5.518244664747658, + "learning_rate": 1.498432601880878e-05, + "loss": 1.0333, "step": 478 }, { - "epoch": 0.10044034388760746, - "grad_norm": 4.78497026880034, - "learning_rate": 1.9999384729235164e-05, - "loss": 2.522, + "epoch": 0.06763626094323637, + "grad_norm": 5.705166168869933, + "learning_rate": 1.5015673981191223e-05, + "loss": 0.9881, "step": 479 }, { - "epoch": 0.10065003145313482, - "grad_norm": 5.86210394443645, - "learning_rate": 1.9999359360157557e-05, - "loss": 2.5902, + "epoch": 0.06777746399322225, + "grad_norm": 6.733230778134236, + "learning_rate": 1.5047021943573669e-05, + "loss": 1.2252, "step": 480 }, { - "epoch": 0.1008597190186622, - "grad_norm": 5.431408794205825, - "learning_rate": 1.999933347859544e-05, - "loss": 2.5776, + "epoch": 0.06791866704320813, + "grad_norm": 7.318963337402553, + "learning_rate": 1.5078369905956115e-05, + "loss": 0.9486, "step": 481 }, { - "epoch": 0.10106940658418956, - "grad_norm": 4.772194403747424, - "learning_rate": 1.9999307084550133e-05, - "loss": 2.6012, + "epoch": 0.06805987009319402, + "grad_norm": 5.71036970305862, + "learning_rate": 1.5109717868338559e-05, + "loss": 0.8924, "step": 482 }, { - "epoch": 0.10127909414971692, - "grad_norm": 5.957703332978828, - "learning_rate": 1.9999280178023004e-05, - "loss": 2.6663, + "epoch": 0.0682010731431799, + "grad_norm": 6.447491605316806, + "learning_rate": 1.5141065830721004e-05, + "loss": 1.0869, "step": 483 }, { - "epoch": 0.10148878171524428, - "grad_norm": 6.117437833472381, - "learning_rate": 1.9999252759015418e-05, - "loss": 2.8025, + "epoch": 0.06834227619316577, + "grad_norm": 6.5559537592079336, + "learning_rate": 1.5172413793103448e-05, + "loss": 1.2647, "step": 484 }, { - "epoch": 0.10169846928077166, - "grad_norm": 4.853564132624564, - "learning_rate": 1.999922482752879e-05, - "loss": 2.4937, + "epoch": 0.06848347924315165, + "grad_norm": 6.252401468409138, + "learning_rate": 1.5203761755485896e-05, + "loss": 0.9774, "step": 485 }, { - "epoch": 0.10190815684629902, - "grad_norm": 4.868952464252578, - "learning_rate": 1.9999196383564544e-05, - "loss": 2.4379, + "epoch": 0.06862468229313753, + "grad_norm": 5.927837383671542, + "learning_rate": 1.523510971786834e-05, + "loss": 0.9423, "step": 486 }, { - "epoch": 0.10211784441182638, - "grad_norm": 5.414300387662243, - "learning_rate": 1.999916742712414e-05, - "loss": 2.5766, + "epoch": 0.06876588534312342, + "grad_norm": 5.324097854415276, + "learning_rate": 1.5266457680250787e-05, + "loss": 0.9911, "step": 487 }, { - "epoch": 0.10232753197735374, - "grad_norm": 6.4439519224976145, - "learning_rate": 1.999913795820906e-05, - "loss": 2.6016, + "epoch": 0.06890708839310929, + "grad_norm": 5.753233975849189, + "learning_rate": 1.529780564263323e-05, + "loss": 0.9426, "step": 488 }, { - "epoch": 0.10253721954288111, - "grad_norm": 5.535209643558027, - "learning_rate": 1.9999107976820823e-05, - "loss": 2.6869, + "epoch": 0.06904829144309517, + "grad_norm": 6.750991687413456, + "learning_rate": 1.5329153605015675e-05, + "loss": 1.0388, "step": 489 }, { - "epoch": 0.10274690710840848, - "grad_norm": 5.408600507073794, - "learning_rate": 1.9999077482960955e-05, - "loss": 2.6929, + "epoch": 0.06918949449308105, + "grad_norm": 6.182367507382677, + "learning_rate": 1.536050156739812e-05, + "loss": 0.9484, "step": 490 }, { - "epoch": 0.10295659467393584, - "grad_norm": 4.74942703402957, - "learning_rate": 1.9999046476631025e-05, - "loss": 2.6543, + "epoch": 0.06933069754306694, + "grad_norm": 5.836684795131817, + "learning_rate": 1.5391849529780566e-05, + "loss": 1.1077, "step": 491 }, { - "epoch": 0.1031662822394632, - "grad_norm": 5.623442300379496, - "learning_rate": 1.999901495783262e-05, - "loss": 2.5465, + "epoch": 0.0694719005930528, + "grad_norm": 6.74346044890058, + "learning_rate": 1.5423197492163012e-05, + "loss": 1.2073, "step": 492 }, { - "epoch": 0.10337596980499056, - "grad_norm": 5.353827901957465, - "learning_rate": 1.999898292656736e-05, - "loss": 2.7119, + "epoch": 0.06961310364303869, + "grad_norm": 6.454369234799304, + "learning_rate": 1.5454545454545454e-05, + "loss": 1.3091, "step": 493 }, { - "epoch": 0.10358565737051793, - "grad_norm": 5.265793390687354, - "learning_rate": 1.9998950382836875e-05, - "loss": 2.306, + "epoch": 0.06975430669302457, + "grad_norm": 6.007095637133718, + "learning_rate": 1.5485893416927904e-05, + "loss": 0.9159, "step": 494 }, { - "epoch": 0.1037953449360453, - "grad_norm": 4.601576136085532, - "learning_rate": 1.999891732664285e-05, - "loss": 2.3583, + "epoch": 0.06989550974301045, + "grad_norm": 6.53976612139777, + "learning_rate": 1.5517241379310346e-05, + "loss": 1.0973, "step": 495 }, { - "epoch": 0.10400503250157266, - "grad_norm": 7.0899812151715675, - "learning_rate": 1.999888375798696e-05, - "loss": 2.5927, + "epoch": 0.07003671279299632, + "grad_norm": 5.955631406069896, + "learning_rate": 1.554858934169279e-05, + "loss": 1.1143, "step": 496 }, { - "epoch": 0.10421472006710002, - "grad_norm": 5.770738287851966, - "learning_rate": 1.9998849676870943e-05, - "loss": 2.8128, + "epoch": 0.0701779158429822, + "grad_norm": 5.4171902019084825, + "learning_rate": 1.5579937304075237e-05, + "loss": 0.9846, "step": 497 }, { - "epoch": 0.10442440763262739, - "grad_norm": 5.343882586098956, - "learning_rate": 1.9998815083296536e-05, - "loss": 2.6729, + "epoch": 0.07031911889296809, + "grad_norm": 6.793090063833778, + "learning_rate": 1.561128526645768e-05, + "loss": 1.1967, "step": 498 }, { - "epoch": 0.10463409519815475, - "grad_norm": 5.439766649151531, - "learning_rate": 1.999877997726551e-05, - "loss": 2.626, + "epoch": 0.07046032194295397, + "grad_norm": 6.742796010345787, + "learning_rate": 1.564263322884013e-05, + "loss": 1.2463, "step": 499 }, { - "epoch": 0.10484378276368211, - "grad_norm": 5.3463005089974365, - "learning_rate": 1.9998744358779672e-05, - "loss": 2.5737, + "epoch": 0.07060152499293985, + "grad_norm": 5.344652204172256, + "learning_rate": 1.567398119122257e-05, + "loss": 1.0464, "step": 500 }, { - "epoch": 0.10505347032920948, - "grad_norm": 5.266449331120233, - "learning_rate": 1.9998708227840842e-05, - "loss": 2.5286, + "epoch": 0.07074272804292572, + "grad_norm": 6.39036293454649, + "learning_rate": 1.5705329153605017e-05, + "loss": 1.0594, "step": 501 }, { - "epoch": 0.10526315789473684, - "grad_norm": 5.794319244925462, - "learning_rate": 1.9998671584450876e-05, - "loss": 2.5409, + "epoch": 0.0708839310929116, + "grad_norm": 6.194662126507768, + "learning_rate": 1.5736677115987462e-05, + "loss": 0.9793, "step": 502 }, { - "epoch": 0.10547284546026421, - "grad_norm": 5.895220317942617, - "learning_rate": 1.9998634428611646e-05, - "loss": 2.5332, + "epoch": 0.07102513414289749, + "grad_norm": 5.464672530516132, + "learning_rate": 1.5768025078369908e-05, + "loss": 0.9024, "step": 503 }, { - "epoch": 0.10568253302579157, - "grad_norm": 5.55712877854596, - "learning_rate": 1.999859676032506e-05, - "loss": 2.6529, + "epoch": 0.07116633719288337, + "grad_norm": 6.068700476242649, + "learning_rate": 1.5799373040752354e-05, + "loss": 1.1162, "step": 504 }, { - "epoch": 0.10589222059131893, - "grad_norm": 5.626329177204718, - "learning_rate": 1.9998558579593052e-05, - "loss": 2.7653, + "epoch": 0.07130754024286924, + "grad_norm": 6.464978869074758, + "learning_rate": 1.5830721003134796e-05, + "loss": 1.2319, "step": 505 }, { - "epoch": 0.1061019081568463, - "grad_norm": 6.1517665640732435, - "learning_rate": 1.999851988641757e-05, - "loss": 2.6343, + "epoch": 0.07144874329285512, + "grad_norm": 5.941790207913838, + "learning_rate": 1.586206896551724e-05, + "loss": 1.3378, "step": 506 }, { - "epoch": 0.10631159572237367, - "grad_norm": 5.528137493345177, - "learning_rate": 1.9998480680800607e-05, - "loss": 2.6106, + "epoch": 0.07158994634284101, + "grad_norm": 5.682901754070349, + "learning_rate": 1.5893416927899687e-05, + "loss": 1.0293, "step": 507 }, { - "epoch": 0.10652128328790103, - "grad_norm": 5.730074344966369, - "learning_rate": 1.9998440962744164e-05, - "loss": 2.5419, + "epoch": 0.07173114939282689, + "grad_norm": 5.872876297279923, + "learning_rate": 1.5924764890282133e-05, + "loss": 1.0867, "step": 508 }, { - "epoch": 0.10673097085342839, - "grad_norm": 5.5326329704019415, - "learning_rate": 1.9998400732250284e-05, - "loss": 2.5181, + "epoch": 0.07187235244281276, + "grad_norm": 5.371428406042664, + "learning_rate": 1.595611285266458e-05, + "loss": 1.0894, "step": 509 }, { - "epoch": 0.10694065841895575, - "grad_norm": 5.908567066648113, - "learning_rate": 1.999835998932102e-05, - "loss": 2.6386, + "epoch": 0.07201355549279864, + "grad_norm": 7.314432059488115, + "learning_rate": 1.598746081504702e-05, + "loss": 1.021, "step": 510 }, { - "epoch": 0.10715034598448311, - "grad_norm": 6.082411372734441, - "learning_rate": 1.999831873395847e-05, - "loss": 2.3721, + "epoch": 0.07215475854278453, + "grad_norm": 7.426048653578476, + "learning_rate": 1.601880877742947e-05, + "loss": 1.3518, "step": 511 }, { - "epoch": 0.10736003355001049, - "grad_norm": 5.978197588276751, - "learning_rate": 1.9998276966164742e-05, - "loss": 2.8777, + "epoch": 0.07229596159277041, + "grad_norm": 5.880104246923241, + "learning_rate": 1.6050156739811912e-05, + "loss": 1.0042, "step": 512 }, { - "epoch": 0.10756972111553785, - "grad_norm": 5.679058175856486, - "learning_rate": 1.999823468594198e-05, - "loss": 2.6859, + "epoch": 0.07243716464275628, + "grad_norm": 6.657460638622406, + "learning_rate": 1.6081504702194358e-05, + "loss": 1.2185, "step": 513 }, { - "epoch": 0.10777940868106521, - "grad_norm": 4.853073321879911, - "learning_rate": 1.9998191893292347e-05, - "loss": 2.5453, + "epoch": 0.07257836769274216, + "grad_norm": 5.563185186463338, + "learning_rate": 1.6112852664576804e-05, + "loss": 1.1372, "step": 514 }, { - "epoch": 0.10798909624659257, - "grad_norm": 5.80047845788725, - "learning_rate": 1.9998148588218038e-05, - "loss": 2.6444, + "epoch": 0.07271957074272804, + "grad_norm": 6.429058405573293, + "learning_rate": 1.614420062695925e-05, + "loss": 1.2332, "step": 515 }, { - "epoch": 0.10819878381211995, - "grad_norm": 5.576124317347189, - "learning_rate": 1.9998104770721274e-05, - "loss": 2.7963, + "epoch": 0.07286077379271393, + "grad_norm": 6.478270156760181, + "learning_rate": 1.6175548589341695e-05, + "loss": 1.1697, "step": 516 }, { - "epoch": 0.10840847137764731, - "grad_norm": 5.515732212854999, - "learning_rate": 1.9998060440804302e-05, - "loss": 2.6533, + "epoch": 0.07300197684269981, + "grad_norm": 5.9238191638797755, + "learning_rate": 1.6206896551724137e-05, + "loss": 1.3669, "step": 517 }, { - "epoch": 0.10861815894317467, - "grad_norm": 5.833174455645435, - "learning_rate": 1.999801559846939e-05, - "loss": 2.4803, + "epoch": 0.07314317989268568, + "grad_norm": 5.668610797091928, + "learning_rate": 1.6238244514106583e-05, + "loss": 1.1832, "step": 518 }, { - "epoch": 0.10882784650870203, - "grad_norm": 4.788179962873536, - "learning_rate": 1.9997970243718835e-05, - "loss": 2.4361, + "epoch": 0.07328438294267156, + "grad_norm": 6.009943466356328, + "learning_rate": 1.626959247648903e-05, + "loss": 1.0866, "step": 519 }, { - "epoch": 0.10903753407422939, - "grad_norm": 7.199003589993666, - "learning_rate": 1.9997924376554967e-05, - "loss": 2.5668, + "epoch": 0.07342558599265744, + "grad_norm": 6.193366721972212, + "learning_rate": 1.6300940438871475e-05, + "loss": 1.4709, "step": 520 }, { - "epoch": 0.10924722163975677, - "grad_norm": 5.677166471762073, - "learning_rate": 1.9997877996980135e-05, - "loss": 2.5804, + "epoch": 0.07356678904264333, + "grad_norm": 5.823633698202906, + "learning_rate": 1.633228840125392e-05, + "loss": 1.148, "step": 521 }, { - "epoch": 0.10945690920528413, - "grad_norm": 6.667982980235848, - "learning_rate": 1.999783110499671e-05, - "loss": 2.3781, + "epoch": 0.0737079920926292, + "grad_norm": 6.1852921008929105, + "learning_rate": 1.6363636363636366e-05, + "loss": 1.0969, "step": 522 }, { - "epoch": 0.10966659677081149, - "grad_norm": 5.512445878703349, - "learning_rate": 1.9997783700607107e-05, - "loss": 2.5678, + "epoch": 0.07384919514261508, + "grad_norm": 5.953213590087683, + "learning_rate": 1.639498432601881e-05, + "loss": 1.0933, "step": 523 }, { - "epoch": 0.10987628433633885, - "grad_norm": 4.780213508580989, - "learning_rate": 1.9997735783813748e-05, - "loss": 2.5523, + "epoch": 0.07399039819260096, + "grad_norm": 5.710244906292251, + "learning_rate": 1.6426332288401254e-05, + "loss": 0.9773, "step": 524 }, { - "epoch": 0.11008597190186623, - "grad_norm": 5.8430158104820356, - "learning_rate": 1.999768735461909e-05, - "loss": 2.6848, + "epoch": 0.07413160124258684, + "grad_norm": 5.667687375942904, + "learning_rate": 1.64576802507837e-05, + "loss": 1.1034, "step": 525 }, { - "epoch": 0.11029565946739359, - "grad_norm": 5.261270035066101, - "learning_rate": 1.9997638413025612e-05, - "loss": 2.7444, + "epoch": 0.07427280429257271, + "grad_norm": 5.802881994662777, + "learning_rate": 1.6489028213166145e-05, + "loss": 1.0898, "step": 526 }, { - "epoch": 0.11050534703292095, - "grad_norm": 4.937057553288801, - "learning_rate": 1.9997588959035826e-05, - "loss": 2.6237, + "epoch": 0.0744140073425586, + "grad_norm": 5.682791346168672, + "learning_rate": 1.652037617554859e-05, + "loss": 1.0491, "step": 527 }, { - "epoch": 0.11071503459844831, - "grad_norm": 5.7624052028360255, - "learning_rate": 1.9997538992652267e-05, - "loss": 2.7303, + "epoch": 0.07455521039254448, + "grad_norm": 6.030807394394101, + "learning_rate": 1.6551724137931037e-05, + "loss": 1.1362, "step": 528 }, { - "epoch": 0.11092472216397567, - "grad_norm": 5.172071280493205, - "learning_rate": 1.9997488513877498e-05, - "loss": 2.6619, + "epoch": 0.07469641344253036, + "grad_norm": 5.923891859838122, + "learning_rate": 1.6583072100313482e-05, + "loss": 1.1302, "step": 529 }, { - "epoch": 0.11113440972950304, - "grad_norm": 5.345373830140982, - "learning_rate": 1.99974375227141e-05, - "loss": 2.6435, + "epoch": 0.07483761649251623, + "grad_norm": 5.952051482905965, + "learning_rate": 1.6614420062695925e-05, + "loss": 1.1015, "step": 530 }, { - "epoch": 0.1113440972950304, - "grad_norm": 4.909306076745643, - "learning_rate": 1.9997386019164687e-05, - "loss": 2.4053, + "epoch": 0.07497881954250211, + "grad_norm": 6.209313248137419, + "learning_rate": 1.664576802507837e-05, + "loss": 1.1234, "step": 531 }, { - "epoch": 0.11155378486055777, - "grad_norm": 4.77733420661915, - "learning_rate": 1.9997334003231904e-05, - "loss": 2.4678, + "epoch": 0.075120022592488, + "grad_norm": 6.158602408239308, + "learning_rate": 1.6677115987460816e-05, + "loss": 1.373, "step": 532 }, { - "epoch": 0.11176347242608513, - "grad_norm": 5.16986640342695, - "learning_rate": 1.999728147491841e-05, - "loss": 2.4487, + "epoch": 0.07526122564247388, + "grad_norm": 4.796258131831851, + "learning_rate": 1.6708463949843262e-05, + "loss": 1.0352, "step": 533 }, { - "epoch": 0.1119731599916125, - "grad_norm": 5.230628640510131, - "learning_rate": 1.9997228434226903e-05, - "loss": 2.6181, + "epoch": 0.07540242869245976, + "grad_norm": 5.60296135867059, + "learning_rate": 1.6739811912225707e-05, + "loss": 1.0324, "step": 534 }, { - "epoch": 0.11218284755713986, - "grad_norm": 5.853022871575048, - "learning_rate": 1.99971748811601e-05, - "loss": 2.6099, + "epoch": 0.07554363174244563, + "grad_norm": 7.331734880940631, + "learning_rate": 1.6771159874608153e-05, + "loss": 1.2744, "step": 535 }, { - "epoch": 0.11239253512266723, - "grad_norm": 6.20050163281424, - "learning_rate": 1.9997120815720748e-05, - "loss": 2.7233, + "epoch": 0.07568483479243152, + "grad_norm": 5.852543930085999, + "learning_rate": 1.68025078369906e-05, + "loss": 1.1039, "step": 536 }, { - "epoch": 0.11260222268819459, - "grad_norm": 6.214963391517287, - "learning_rate": 1.9997066237911608e-05, - "loss": 2.6142, + "epoch": 0.0758260378424174, + "grad_norm": 6.145110329481626, + "learning_rate": 1.683385579937304e-05, + "loss": 1.2887, "step": 537 }, { - "epoch": 0.11281191025372195, - "grad_norm": 5.4798180256015945, - "learning_rate": 1.999701114773549e-05, - "loss": 2.5321, + "epoch": 0.07596724089240328, + "grad_norm": 5.259349636789632, + "learning_rate": 1.6865203761755487e-05, + "loss": 1.1346, "step": 538 }, { - "epoch": 0.11302159781924932, - "grad_norm": 5.375298864194612, - "learning_rate": 1.999695554519521e-05, - "loss": 2.7338, + "epoch": 0.07610844394238915, + "grad_norm": 5.286352779670908, + "learning_rate": 1.6896551724137932e-05, + "loss": 1.1865, "step": 539 }, { - "epoch": 0.11323128538477668, - "grad_norm": 5.7725418154852814, - "learning_rate": 1.999689943029362e-05, - "loss": 2.6815, + "epoch": 0.07624964699237503, + "grad_norm": 4.897339601994153, + "learning_rate": 1.6927899686520378e-05, + "loss": 1.0371, "step": 540 }, { - "epoch": 0.11344097295030404, - "grad_norm": 5.7692346185079515, - "learning_rate": 1.9996842803033597e-05, - "loss": 2.6926, + "epoch": 0.07639085004236092, + "grad_norm": 6.692016492969598, + "learning_rate": 1.6959247648902824e-05, + "loss": 1.2827, "step": 541 }, { - "epoch": 0.1136506605158314, - "grad_norm": 5.695150210183655, - "learning_rate": 1.9996785663418038e-05, - "loss": 2.7382, + "epoch": 0.0765320530923468, + "grad_norm": 5.740634310469214, + "learning_rate": 1.6990595611285266e-05, + "loss": 1.1825, "step": 542 }, { - "epoch": 0.11386034808135878, - "grad_norm": 6.273053977740661, - "learning_rate": 1.9996728011449877e-05, - "loss": 2.8509, + "epoch": 0.07667325614233267, + "grad_norm": 6.351661143462508, + "learning_rate": 1.7021943573667712e-05, + "loss": 1.1249, "step": 543 }, { - "epoch": 0.11407003564688614, - "grad_norm": 5.753481087512013, - "learning_rate": 1.9996669847132067e-05, - "loss": 2.6474, + "epoch": 0.07681445919231855, + "grad_norm": 6.46660632944194, + "learning_rate": 1.7053291536050158e-05, + "loss": 1.2392, "step": 544 }, { - "epoch": 0.1142797232124135, - "grad_norm": 5.562055661939869, - "learning_rate": 1.9996611170467587e-05, - "loss": 2.4718, + "epoch": 0.07695566224230443, + "grad_norm": 5.526638690733727, + "learning_rate": 1.7084639498432603e-05, + "loss": 1.1576, "step": 545 }, { - "epoch": 0.11448941077794086, - "grad_norm": 6.269581425546245, - "learning_rate": 1.9996551981459448e-05, - "loss": 2.5538, + "epoch": 0.07709686529229032, + "grad_norm": 6.114235327852676, + "learning_rate": 1.711598746081505e-05, + "loss": 1.1267, "step": 546 }, { - "epoch": 0.11469909834346823, - "grad_norm": 4.778157525726834, - "learning_rate": 1.9996492280110684e-05, - "loss": 2.4509, + "epoch": 0.07723806834227619, + "grad_norm": 5.264342286333056, + "learning_rate": 1.7147335423197495e-05, + "loss": 0.9464, "step": 547 }, { - "epoch": 0.1149087859089956, - "grad_norm": 6.260218852948971, - "learning_rate": 1.9996432066424352e-05, - "loss": 2.6991, + "epoch": 0.07737927139226207, + "grad_norm": 6.834650078271172, + "learning_rate": 1.717868338557994e-05, + "loss": 1.159, "step": 548 }, { - "epoch": 0.11511847347452296, - "grad_norm": 7.078277664927638, - "learning_rate": 1.9996371340403535e-05, - "loss": 2.6498, + "epoch": 0.07752047444224795, + "grad_norm": 5.996079921122092, + "learning_rate": 1.7210031347962383e-05, + "loss": 1.0824, "step": 549 }, { - "epoch": 0.11532816104005032, - "grad_norm": 5.994450909932426, - "learning_rate": 1.9996310102051354e-05, - "loss": 2.5986, + "epoch": 0.07766167749223384, + "grad_norm": 7.086148350401072, + "learning_rate": 1.7241379310344828e-05, + "loss": 1.3272, "step": 550 }, { - "epoch": 0.11553784860557768, - "grad_norm": 5.715137939220944, - "learning_rate": 1.999624835137094e-05, - "loss": 2.6444, + "epoch": 0.07780288054221972, + "grad_norm": 5.543735541393911, + "learning_rate": 1.7272727272727274e-05, + "loss": 0.9808, "step": 551 }, { - "epoch": 0.11574753617110506, - "grad_norm": 5.544481841817648, - "learning_rate": 1.999618608836546e-05, - "loss": 2.5788, + "epoch": 0.07794408359220559, + "grad_norm": 7.0843576897384315, + "learning_rate": 1.730407523510972e-05, + "loss": 1.5222, "step": 552 }, { - "epoch": 0.11595722373663242, - "grad_norm": 4.784007352390835, - "learning_rate": 1.9996123313038105e-05, - "loss": 2.5964, + "epoch": 0.07808528664219147, + "grad_norm": 4.838105665253886, + "learning_rate": 1.7335423197492165e-05, + "loss": 0.9439, "step": 553 }, { - "epoch": 0.11616691130215978, - "grad_norm": 4.7725629325319785, - "learning_rate": 1.9996060025392093e-05, - "loss": 2.43, + "epoch": 0.07822648969217735, + "grad_norm": 5.403135721254861, + "learning_rate": 1.7366771159874608e-05, + "loss": 1.1423, "step": 554 }, { - "epoch": 0.11637659886768714, - "grad_norm": 5.394147929053285, - "learning_rate": 1.999599622543067e-05, - "loss": 2.6378, + "epoch": 0.07836769274216324, + "grad_norm": 5.558285600533167, + "learning_rate": 1.7398119122257057e-05, + "loss": 1.0969, "step": 555 }, { - "epoch": 0.11658628643321452, - "grad_norm": 4.848390711476544, - "learning_rate": 1.99959319131571e-05, - "loss": 2.5703, + "epoch": 0.0785088957921491, + "grad_norm": 6.578773247578049, + "learning_rate": 1.74294670846395e-05, + "loss": 1.1369, "step": 556 }, { - "epoch": 0.11679597399874188, - "grad_norm": 6.326768339397591, - "learning_rate": 1.999586708857468e-05, - "loss": 2.6833, + "epoch": 0.07865009884213499, + "grad_norm": 5.480957390246865, + "learning_rate": 1.7460815047021945e-05, + "loss": 1.1984, "step": 557 }, { - "epoch": 0.11700566156426924, - "grad_norm": 6.0610930927826265, - "learning_rate": 1.999580175168674e-05, - "loss": 2.3693, + "epoch": 0.07879130189212087, + "grad_norm": 5.843736131099421, + "learning_rate": 1.749216300940439e-05, + "loss": 1.0589, "step": 558 }, { - "epoch": 0.1172153491297966, - "grad_norm": 4.661353301262643, - "learning_rate": 1.9995735902496622e-05, - "loss": 2.7019, + "epoch": 0.07893250494210675, + "grad_norm": 5.8052981592197765, + "learning_rate": 1.7523510971786836e-05, + "loss": 1.0831, "step": 559 }, { - "epoch": 0.11742503669532396, - "grad_norm": 5.635216063572443, - "learning_rate": 1.9995669541007703e-05, - "loss": 2.482, + "epoch": 0.07907370799209262, + "grad_norm": 6.241678234286253, + "learning_rate": 1.7554858934169282e-05, + "loss": 1.122, "step": 560 }, { - "epoch": 0.11763472426085134, - "grad_norm": 5.214617419063985, - "learning_rate": 1.999560266722338e-05, - "loss": 2.8172, + "epoch": 0.0792149110420785, + "grad_norm": 5.849170627484907, + "learning_rate": 1.7586206896551724e-05, + "loss": 1.0995, "step": 561 }, { - "epoch": 0.1178444118263787, - "grad_norm": 5.934904408462649, - "learning_rate": 1.999553528114709e-05, - "loss": 2.7383, + "epoch": 0.07935611409206439, + "grad_norm": 5.884474410784104, + "learning_rate": 1.7617554858934173e-05, + "loss": 0.9374, "step": 562 }, { - "epoch": 0.11805409939190606, - "grad_norm": 5.480689337975177, - "learning_rate": 1.9995467382782278e-05, - "loss": 2.5456, + "epoch": 0.07949731714205027, + "grad_norm": 5.832116996080886, + "learning_rate": 1.7648902821316615e-05, + "loss": 1.1393, "step": 563 }, { - "epoch": 0.11826378695743342, - "grad_norm": 5.864430945744579, - "learning_rate": 1.9995398972132422e-05, - "loss": 2.4998, + "epoch": 0.07963852019203614, + "grad_norm": 5.320624700179814, + "learning_rate": 1.768025078369906e-05, + "loss": 1.0044, "step": 564 }, { - "epoch": 0.1184734745229608, - "grad_norm": 5.572352742451088, - "learning_rate": 1.9995330049201037e-05, - "loss": 2.7039, + "epoch": 0.07977972324202202, + "grad_norm": 6.911040887792742, + "learning_rate": 1.7711598746081507e-05, + "loss": 1.1676, "step": 565 }, { - "epoch": 0.11868316208848816, - "grad_norm": 5.485299336145905, - "learning_rate": 1.999526061399165e-05, - "loss": 2.595, + "epoch": 0.07992092629200791, + "grad_norm": 6.853399074295397, + "learning_rate": 1.774294670846395e-05, + "loss": 1.132, "step": 566 }, { - "epoch": 0.11889284965401552, - "grad_norm": 5.449914823873228, - "learning_rate": 1.9995190666507818e-05, - "loss": 2.5257, + "epoch": 0.08006212934199379, + "grad_norm": 6.898205190949468, + "learning_rate": 1.7774294670846398e-05, + "loss": 1.2241, "step": 567 }, { - "epoch": 0.11910253721954288, - "grad_norm": 5.828691865177637, - "learning_rate": 1.9995120206753132e-05, - "loss": 2.4034, + "epoch": 0.08020333239197967, + "grad_norm": 5.576836953504434, + "learning_rate": 1.780564263322884e-05, + "loss": 1.075, "step": 568 }, { - "epoch": 0.11931222478507024, - "grad_norm": 5.456849930767592, - "learning_rate": 1.9995049234731198e-05, - "loss": 2.6921, + "epoch": 0.08034453544196554, + "grad_norm": 8.443395424117693, + "learning_rate": 1.7836990595611286e-05, + "loss": 1.4261, "step": 569 }, { - "epoch": 0.11952191235059761, - "grad_norm": 5.232394499420569, - "learning_rate": 1.9994977750445653e-05, - "loss": 2.5938, + "epoch": 0.08048573849195143, + "grad_norm": 5.0152196245214276, + "learning_rate": 1.7868338557993732e-05, + "loss": 0.9979, "step": 570 }, { - "epoch": 0.11973159991612498, - "grad_norm": 4.948895008756202, - "learning_rate": 1.9994905753900168e-05, - "loss": 2.6296, + "epoch": 0.08062694154193731, + "grad_norm": 6.270426407981272, + "learning_rate": 1.7899686520376178e-05, + "loss": 1.3631, "step": 571 }, { - "epoch": 0.11994128748165234, - "grad_norm": 5.679156712807628, - "learning_rate": 1.9994833245098423e-05, - "loss": 2.5833, + "epoch": 0.08076814459192319, + "grad_norm": 5.736030805487781, + "learning_rate": 1.7931034482758623e-05, + "loss": 1.0218, "step": 572 }, { - "epoch": 0.1201509750471797, - "grad_norm": 5.685530218272313, - "learning_rate": 1.9994760224044142e-05, - "loss": 2.759, + "epoch": 0.08090934764190906, + "grad_norm": 5.852691561823155, + "learning_rate": 1.7962382445141066e-05, + "loss": 1.2907, "step": 573 }, { - "epoch": 0.12036066261270707, - "grad_norm": 6.043767935873941, - "learning_rate": 1.9994686690741066e-05, - "loss": 2.7435, + "epoch": 0.08105055069189494, + "grad_norm": 5.848770184732747, + "learning_rate": 1.7993730407523515e-05, + "loss": 1.1728, "step": 574 }, { - "epoch": 0.12057035017823443, - "grad_norm": 5.502999826879568, - "learning_rate": 1.9994612645192957e-05, - "loss": 2.5119, + "epoch": 0.08119175374188083, + "grad_norm": 5.421475194093066, + "learning_rate": 1.8025078369905957e-05, + "loss": 1.1785, "step": 575 }, { - "epoch": 0.1207800377437618, - "grad_norm": 6.050329567382397, - "learning_rate": 1.9994538087403623e-05, - "loss": 2.4762, + "epoch": 0.08133295679186671, + "grad_norm": 5.6427491867124075, + "learning_rate": 1.8056426332288403e-05, + "loss": 1.1839, "step": 576 }, { - "epoch": 0.12098972530928916, - "grad_norm": 5.008427850487914, - "learning_rate": 1.999446301737687e-05, - "loss": 2.6466, + "epoch": 0.08147415984185258, + "grad_norm": 5.507774894364867, + "learning_rate": 1.808777429467085e-05, + "loss": 1.0556, "step": 577 }, { - "epoch": 0.12119941287481652, - "grad_norm": 5.533879498629465, - "learning_rate": 1.9994387435116557e-05, - "loss": 2.5645, + "epoch": 0.08161536289183846, + "grad_norm": 5.716432649942122, + "learning_rate": 1.811912225705329e-05, + "loss": 1.0405, "step": 578 }, { - "epoch": 0.12140910044034389, - "grad_norm": 6.042839943721938, - "learning_rate": 1.999431134062655e-05, - "loss": 2.5004, + "epoch": 0.08175656594182434, + "grad_norm": 5.837884875128244, + "learning_rate": 1.815047021943574e-05, + "loss": 1.1678, "step": 579 }, { - "epoch": 0.12161878800587125, - "grad_norm": 5.467755065739939, - "learning_rate": 1.9994234733910754e-05, - "loss": 2.3032, + "epoch": 0.08189776899181023, + "grad_norm": 5.300377434600539, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.9418, "step": 580 }, { - "epoch": 0.12182847557139861, - "grad_norm": 5.63914808406953, - "learning_rate": 1.9994157614973094e-05, - "loss": 2.5145, + "epoch": 0.0820389720417961, + "grad_norm": 6.725320147590814, + "learning_rate": 1.8213166144200628e-05, + "loss": 1.232, "step": 581 }, { - "epoch": 0.12203816313692598, - "grad_norm": 6.162111971834968, - "learning_rate": 1.999407998381752e-05, - "loss": 2.7087, + "epoch": 0.08218017509178198, + "grad_norm": 6.099267875865595, + "learning_rate": 1.8244514106583073e-05, + "loss": 1.1636, "step": 582 }, { - "epoch": 0.12224785070245335, - "grad_norm": 5.225174953294268, - "learning_rate": 1.999400184044802e-05, - "loss": 2.3773, + "epoch": 0.08232137814176786, + "grad_norm": 7.996285191743562, + "learning_rate": 1.827586206896552e-05, + "loss": 1.3492, "step": 583 }, { - "epoch": 0.12245753826798071, - "grad_norm": 5.679697001972673, - "learning_rate": 1.9993923184868588e-05, - "loss": 2.2435, + "epoch": 0.08246258119175374, + "grad_norm": 5.016637946543329, + "learning_rate": 1.8307210031347965e-05, + "loss": 0.9026, "step": 584 }, { - "epoch": 0.12266722583350807, - "grad_norm": 6.070179558093573, - "learning_rate": 1.999384401708326e-05, - "loss": 2.7846, + "epoch": 0.08260378424173963, + "grad_norm": 5.495762050588071, + "learning_rate": 1.8338557993730407e-05, + "loss": 0.9803, "step": 585 }, { - "epoch": 0.12287691339903543, - "grad_norm": 4.897891464402482, - "learning_rate": 1.999376433709609e-05, - "loss": 2.3082, + "epoch": 0.0827449872917255, + "grad_norm": 6.3922637350643265, + "learning_rate": 1.8369905956112856e-05, + "loss": 1.0984, "step": 586 }, { - "epoch": 0.1230866009645628, - "grad_norm": 5.836124997880415, - "learning_rate": 1.9993684144911167e-05, - "loss": 2.7073, + "epoch": 0.08288619034171138, + "grad_norm": 5.392388211200259, + "learning_rate": 1.84012539184953e-05, + "loss": 0.894, "step": 587 }, { - "epoch": 0.12329628853009017, - "grad_norm": 5.627032650978459, - "learning_rate": 1.9993603440532596e-05, - "loss": 2.5463, + "epoch": 0.08302739339169726, + "grad_norm": 6.690298945925227, + "learning_rate": 1.8432601880877744e-05, + "loss": 1.2125, "step": 588 }, { - "epoch": 0.12350597609561753, - "grad_norm": 6.265836229503592, - "learning_rate": 1.9993522223964518e-05, - "loss": 2.6539, + "epoch": 0.08316859644168315, + "grad_norm": 5.671717685738797, + "learning_rate": 1.846394984326019e-05, + "loss": 0.9612, "step": 589 }, { - "epoch": 0.12371566366114489, - "grad_norm": 5.781070108933454, - "learning_rate": 1.999344049521109e-05, - "loss": 2.6672, + "epoch": 0.08330979949166901, + "grad_norm": 5.067388868181858, + "learning_rate": 1.8495297805642636e-05, + "loss": 1.0224, "step": 590 }, { - "epoch": 0.12392535122667225, - "grad_norm": 4.983512777309647, - "learning_rate": 1.9993358254276508e-05, - "loss": 2.6151, + "epoch": 0.0834510025416549, + "grad_norm": 5.623803728888419, + "learning_rate": 1.852664576802508e-05, + "loss": 0.9653, "step": 591 }, { - "epoch": 0.12413503879219963, - "grad_norm": 5.321385752891051, - "learning_rate": 1.999327550116498e-05, - "loss": 2.5062, + "epoch": 0.08359220559164078, + "grad_norm": 6.601627551141263, + "learning_rate": 1.8557993730407524e-05, + "loss": 1.1374, "step": 592 }, { - "epoch": 0.12434472635772699, - "grad_norm": 5.398206354535945, - "learning_rate": 1.999319223588075e-05, - "loss": 2.3726, + "epoch": 0.08373340864162666, + "grad_norm": 6.6027030820492545, + "learning_rate": 1.858934169278997e-05, + "loss": 1.1728, "step": 593 }, { - "epoch": 0.12455441392325435, - "grad_norm": 5.114306534153933, - "learning_rate": 1.9993108458428087e-05, - "loss": 2.523, + "epoch": 0.08387461169161253, + "grad_norm": 5.333670417374938, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.9548, "step": 594 }, { - "epoch": 0.12476410148878171, - "grad_norm": 5.170649912916199, - "learning_rate": 1.999302416881128e-05, - "loss": 2.5864, + "epoch": 0.08401581474159842, + "grad_norm": 6.198321222438251, + "learning_rate": 1.865203761755486e-05, + "loss": 1.2297, "step": 595 }, { - "epoch": 0.12497378905430907, - "grad_norm": 5.6163494832277845, - "learning_rate": 1.999293936703466e-05, - "loss": 2.5966, + "epoch": 0.0841570177915843, + "grad_norm": 5.9431558207548925, + "learning_rate": 1.8683385579937306e-05, + "loss": 1.014, "step": 596 }, { - "epoch": 0.12518347661983645, - "grad_norm": 5.456525534616905, - "learning_rate": 1.9992854053102556e-05, - "loss": 2.7464, + "epoch": 0.08429822084157018, + "grad_norm": 6.710774989517947, + "learning_rate": 1.8714733542319752e-05, + "loss": 1.249, "step": 597 }, { - "epoch": 0.1253931641853638, - "grad_norm": 6.050680813839292, - "learning_rate": 1.9992768227019357e-05, - "loss": 2.5624, + "epoch": 0.08443942389155605, + "grad_norm": 5.379538607412715, + "learning_rate": 1.8746081504702194e-05, + "loss": 0.957, "step": 598 }, { - "epoch": 0.12560285175089117, - "grad_norm": 5.283074996332358, - "learning_rate": 1.9992681888789453e-05, - "loss": 2.6451, + "epoch": 0.08458062694154193, + "grad_norm": 7.303180959939755, + "learning_rate": 1.877742946708464e-05, + "loss": 1.2566, "step": 599 }, { - "epoch": 0.12581253931641853, - "grad_norm": 5.367844491072011, - "learning_rate": 1.999259503841727e-05, - "loss": 2.4811, + "epoch": 0.08472182999152782, + "grad_norm": 5.86211539459515, + "learning_rate": 1.8808777429467086e-05, + "loss": 1.0868, "step": 600 }, { - "epoch": 0.1260222268819459, - "grad_norm": 6.248909724328069, - "learning_rate": 1.999250767590726e-05, - "loss": 2.722, + "epoch": 0.0848630330415137, + "grad_norm": 5.239541128255798, + "learning_rate": 1.884012539184953e-05, + "loss": 1.1452, "step": 601 }, { - "epoch": 0.12623191444747325, - "grad_norm": 5.552832606107895, - "learning_rate": 1.9992419801263902e-05, - "loss": 2.7244, + "epoch": 0.08500423609149958, + "grad_norm": 6.397091785620323, + "learning_rate": 1.8871473354231977e-05, + "loss": 1.1904, "step": 602 }, { - "epoch": 0.12644160201300064, - "grad_norm": 5.252252969697778, - "learning_rate": 1.9992331414491697e-05, - "loss": 2.688, + "epoch": 0.08514543914148545, + "grad_norm": 6.520562809731438, + "learning_rate": 1.8902821316614423e-05, + "loss": 1.3573, "step": 603 }, { - "epoch": 0.126651289578528, - "grad_norm": 6.123895763343174, - "learning_rate": 1.9992242515595175e-05, - "loss": 2.7344, + "epoch": 0.08528664219147133, + "grad_norm": 4.590438272201157, + "learning_rate": 1.893416927899687e-05, + "loss": 0.8604, "step": 604 }, { - "epoch": 0.12686097714405536, - "grad_norm": 5.302942206769204, - "learning_rate": 1.9992153104578892e-05, - "loss": 2.6337, + "epoch": 0.08542784524145722, + "grad_norm": 5.576165116459939, + "learning_rate": 1.896551724137931e-05, + "loss": 1.0791, "step": 605 }, { - "epoch": 0.12707066470958273, - "grad_norm": 5.914274981623615, - "learning_rate": 1.9992063181447437e-05, - "loss": 2.732, + "epoch": 0.0855690482914431, + "grad_norm": 5.920010495599947, + "learning_rate": 1.8996865203761756e-05, + "loss": 1.1234, "step": 606 }, { - "epoch": 0.1272803522751101, - "grad_norm": 6.142168819060165, - "learning_rate": 1.999197274620541e-05, - "loss": 2.6441, + "epoch": 0.08571025134142897, + "grad_norm": 6.190645354123114, + "learning_rate": 1.9028213166144202e-05, + "loss": 1.1197, "step": 607 }, { - "epoch": 0.12749003984063745, - "grad_norm": 4.973284653106787, - "learning_rate": 1.9991881798857453e-05, - "loss": 2.611, + "epoch": 0.08585145439141485, + "grad_norm": 6.495354708349006, + "learning_rate": 1.9059561128526648e-05, + "loss": 1.3107, "step": 608 }, { - "epoch": 0.1276997274061648, - "grad_norm": 5.264163042836751, - "learning_rate": 1.999179033940822e-05, - "loss": 2.6344, + "epoch": 0.08599265744140074, + "grad_norm": 6.182901350359474, + "learning_rate": 1.9090909090909094e-05, + "loss": 1.062, "step": 609 }, { - "epoch": 0.12790941497169217, - "grad_norm": 4.9408790866947845, - "learning_rate": 1.9991698367862402e-05, - "loss": 2.553, + "epoch": 0.08613386049138662, + "grad_norm": 5.3296862461077215, + "learning_rate": 1.9122257053291536e-05, + "loss": 0.9507, "step": 610 }, { - "epoch": 0.12811910253721953, - "grad_norm": 5.91128114331133, - "learning_rate": 1.9991605884224718e-05, - "loss": 2.5662, + "epoch": 0.08627506354137249, + "grad_norm": 5.766012786864861, + "learning_rate": 1.915360501567398e-05, + "loss": 0.996, "step": 611 }, { - "epoch": 0.12832879010274692, - "grad_norm": 6.278252666825148, - "learning_rate": 1.99915128884999e-05, - "loss": 2.6621, + "epoch": 0.08641626659135837, + "grad_norm": 5.058032189774383, + "learning_rate": 1.9184952978056427e-05, + "loss": 1.0008, "step": 612 }, { - "epoch": 0.12853847766827428, - "grad_norm": 6.73800212945431, - "learning_rate": 1.9991419380692712e-05, - "loss": 2.5745, + "epoch": 0.08655746964134425, + "grad_norm": 5.4571204911719144, + "learning_rate": 1.9216300940438873e-05, + "loss": 1.1685, "step": 613 }, { - "epoch": 0.12874816523380164, - "grad_norm": 5.702948291536183, - "learning_rate": 1.999132536080796e-05, - "loss": 2.524, + "epoch": 0.08669867269133014, + "grad_norm": 7.885139131692505, + "learning_rate": 1.924764890282132e-05, + "loss": 1.3801, "step": 614 }, { - "epoch": 0.128957852799329, - "grad_norm": 5.48889743512358, - "learning_rate": 1.9991230828850448e-05, - "loss": 2.5485, + "epoch": 0.086839875741316, + "grad_norm": 5.70905121788261, + "learning_rate": 1.9278996865203764e-05, + "loss": 1.07, "step": 615 }, { - "epoch": 0.12916754036485636, - "grad_norm": 5.739661561642836, - "learning_rate": 1.999113578482503e-05, - "loss": 2.4045, + "epoch": 0.08698107879130189, + "grad_norm": 5.789647737306275, + "learning_rate": 1.931034482758621e-05, + "loss": 1.1097, "step": 616 }, { - "epoch": 0.12937722793038373, - "grad_norm": 5.238697864654882, - "learning_rate": 1.9991040228736573e-05, - "loss": 2.5972, + "epoch": 0.08712228184128777, + "grad_norm": 5.333824466358461, + "learning_rate": 1.9341692789968652e-05, + "loss": 1.0032, "step": 617 }, { - "epoch": 0.1295869154959111, - "grad_norm": 6.376141328533404, - "learning_rate": 1.9990944160589975e-05, - "loss": 2.7524, + "epoch": 0.08726348489127365, + "grad_norm": 5.279567562817471, + "learning_rate": 1.9373040752351098e-05, + "loss": 0.9815, "step": 618 }, { - "epoch": 0.12979660306143845, - "grad_norm": 5.536560112421979, - "learning_rate": 1.9990847580390164e-05, - "loss": 2.6407, + "epoch": 0.08740468794125954, + "grad_norm": 5.946563250288684, + "learning_rate": 1.9404388714733544e-05, + "loss": 1.2617, "step": 619 }, { - "epoch": 0.1300062906269658, - "grad_norm": 6.489063070000069, - "learning_rate": 1.9990750488142084e-05, - "loss": 2.7913, + "epoch": 0.0875458909912454, + "grad_norm": 5.945772051557553, + "learning_rate": 1.943573667711599e-05, + "loss": 1.0628, "step": 620 }, { - "epoch": 0.1302159781924932, - "grad_norm": 5.734717822189088, - "learning_rate": 1.9990652883850712e-05, - "loss": 2.6737, + "epoch": 0.08768709404123129, + "grad_norm": 6.66789076667815, + "learning_rate": 1.9467084639498435e-05, + "loss": 1.3254, "step": 621 }, { - "epoch": 0.13042566575802056, - "grad_norm": 5.4608587689447745, - "learning_rate": 1.9990554767521052e-05, - "loss": 2.419, + "epoch": 0.08782829709121717, + "grad_norm": 6.229491991717858, + "learning_rate": 1.9498432601880877e-05, + "loss": 1.147, "step": 622 }, { - "epoch": 0.13063535332354792, - "grad_norm": 4.682214953059233, - "learning_rate": 1.9990456139158136e-05, - "loss": 2.4798, + "epoch": 0.08796950014120306, + "grad_norm": 5.2543706086447735, + "learning_rate": 1.9529780564263326e-05, + "loss": 1.0044, "step": 623 }, { - "epoch": 0.13084504088907528, - "grad_norm": 5.643331962031569, - "learning_rate": 1.999035699876701e-05, - "loss": 2.684, + "epoch": 0.08811070319118892, + "grad_norm": 4.64518538823868, + "learning_rate": 1.956112852664577e-05, + "loss": 0.8507, "step": 624 }, { - "epoch": 0.13105472845460264, - "grad_norm": 5.776023410811839, - "learning_rate": 1.9990257346352765e-05, - "loss": 2.59, + "epoch": 0.08825190624117481, + "grad_norm": 5.87949355033456, + "learning_rate": 1.9592476489028214e-05, + "loss": 1.1571, "step": 625 }, { - "epoch": 0.13126441602013, - "grad_norm": 5.600144616010864, - "learning_rate": 1.99901571819205e-05, - "loss": 2.681, + "epoch": 0.08839310929116069, + "grad_norm": 5.857411145763842, + "learning_rate": 1.962382445141066e-05, + "loss": 1.1866, "step": 626 }, { - "epoch": 0.13147410358565736, - "grad_norm": 5.933435259956886, - "learning_rate": 1.9990056505475354e-05, - "loss": 2.7423, + "epoch": 0.08853431234114657, + "grad_norm": 6.893548328142488, + "learning_rate": 1.9655172413793106e-05, + "loss": 1.2687, "step": 627 }, { - "epoch": 0.13168379115118473, - "grad_norm": 5.136463019492448, - "learning_rate": 1.9989955317022488e-05, - "loss": 2.6539, + "epoch": 0.08867551539113244, + "grad_norm": 5.831663304735709, + "learning_rate": 1.968652037617555e-05, + "loss": 1.1951, "step": 628 }, { - "epoch": 0.1318934787167121, - "grad_norm": 5.634815883081778, - "learning_rate": 1.998985361656708e-05, - "loss": 2.5686, + "epoch": 0.08881671844111833, + "grad_norm": 4.973545107047332, + "learning_rate": 1.9717868338557994e-05, + "loss": 1.123, "step": 629 }, { - "epoch": 0.13210316628223948, - "grad_norm": 6.1728856249187665, - "learning_rate": 1.9989751404114353e-05, - "loss": 2.6307, + "epoch": 0.08895792149110421, + "grad_norm": 6.500495792637582, + "learning_rate": 1.9749216300940443e-05, + "loss": 1.0234, "step": 630 }, { - "epoch": 0.13231285384776684, - "grad_norm": 5.617788962890585, - "learning_rate": 1.9989648679669534e-05, - "loss": 2.8053, + "epoch": 0.08909912454109009, + "grad_norm": 5.3743364068975366, + "learning_rate": 1.9780564263322885e-05, + "loss": 1.0438, "step": 631 }, { - "epoch": 0.1325225414132942, - "grad_norm": 5.242809592919124, - "learning_rate": 1.99895454432379e-05, - "loss": 2.6759, + "epoch": 0.08924032759107597, + "grad_norm": 5.121731384137619, + "learning_rate": 1.981191222570533e-05, + "loss": 1.1276, "step": 632 }, { - "epoch": 0.13273222897882156, - "grad_norm": 4.697066641496849, - "learning_rate": 1.9989441694824733e-05, - "loss": 2.5688, + "epoch": 0.08938153064106184, + "grad_norm": 6.311695481617017, + "learning_rate": 1.9843260188087777e-05, + "loss": 1.2687, "step": 633 }, { - "epoch": 0.13294191654434892, - "grad_norm": 5.023310617072543, - "learning_rate": 1.9989337434435355e-05, - "loss": 2.7492, + "epoch": 0.08952273369104773, + "grad_norm": 6.615594164050313, + "learning_rate": 1.987460815047022e-05, + "loss": 1.2163, "step": 634 }, { - "epoch": 0.13315160410987628, - "grad_norm": 4.861011579498409, - "learning_rate": 1.998923266207511e-05, - "loss": 2.4153, + "epoch": 0.08966393674103361, + "grad_norm": 5.524936353885061, + "learning_rate": 1.9905956112852668e-05, + "loss": 1.1537, "step": 635 }, { - "epoch": 0.13336129167540364, - "grad_norm": 4.876867865694473, - "learning_rate": 1.998912737774936e-05, - "loss": 2.4357, + "epoch": 0.08980513979101949, + "grad_norm": 4.519873766037404, + "learning_rate": 1.993730407523511e-05, + "loss": 0.9077, "step": 636 }, { - "epoch": 0.133570979240931, - "grad_norm": 5.72943740501241, - "learning_rate": 1.998902158146351e-05, - "loss": 2.3662, + "epoch": 0.08994634284100536, + "grad_norm": 6.09576205867513, + "learning_rate": 1.9968652037617556e-05, + "loss": 1.0986, "step": 637 }, { - "epoch": 0.13378066680645836, - "grad_norm": 6.564313478184733, - "learning_rate": 1.9988915273222978e-05, - "loss": 2.7265, + "epoch": 0.09008754589099124, + "grad_norm": 5.324926342669233, + "learning_rate": 2e-05, + "loss": 1.0757, "step": 638 }, { - "epoch": 0.13399035437198575, - "grad_norm": 6.022619420405473, - "learning_rate": 1.9988808453033213e-05, - "loss": 2.6599, + "epoch": 0.09022874894097713, + "grad_norm": 5.666949160368743, + "learning_rate": 1.9999999883802162e-05, + "loss": 0.971, "step": 639 }, { - "epoch": 0.13420004193751311, - "grad_norm": 5.575552897830298, - "learning_rate": 1.9988701120899693e-05, - "loss": 2.6047, + "epoch": 0.09036995199096301, + "grad_norm": 7.1896339702510526, + "learning_rate": 1.999999953520864e-05, + "loss": 1.1724, "step": 640 }, { - "epoch": 0.13440972950304048, - "grad_norm": 5.512229435159064, - "learning_rate": 1.9988593276827912e-05, - "loss": 2.5313, + "epoch": 0.09051115504094888, + "grad_norm": 5.770082078125777, + "learning_rate": 1.999999895421945e-05, + "loss": 1.1214, "step": 641 }, { - "epoch": 0.13461941706856784, - "grad_norm": 5.014022192346478, - "learning_rate": 1.99884849208234e-05, - "loss": 2.5894, + "epoch": 0.09065235809093476, + "grad_norm": 6.744688401394613, + "learning_rate": 1.9999998140834606e-05, + "loss": 1.1188, "step": 642 }, { - "epoch": 0.1348291046340952, - "grad_norm": 6.3607236447902835, - "learning_rate": 1.9988376052891716e-05, - "loss": 2.6781, + "epoch": 0.09079356114092065, + "grad_norm": 5.364338641891255, + "learning_rate": 1.999999709505412e-05, + "loss": 1.0897, "step": 643 }, { - "epoch": 0.13503879219962256, - "grad_norm": 5.40513632427288, - "learning_rate": 1.998826667303843e-05, - "loss": 2.3419, + "epoch": 0.09093476419090653, + "grad_norm": 5.535805071291531, + "learning_rate": 1.9999995816878023e-05, + "loss": 1.1295, "step": 644 }, { - "epoch": 0.13524847976514992, - "grad_norm": 5.837611823987948, - "learning_rate": 1.9988156781269163e-05, - "loss": 2.6317, + "epoch": 0.0910759672408924, + "grad_norm": 5.078259395818064, + "learning_rate": 1.999999430630634e-05, + "loss": 1.0883, "step": 645 }, { - "epoch": 0.13545816733067728, - "grad_norm": 6.721032888952139, - "learning_rate": 1.998804637758953e-05, - "loss": 2.7753, + "epoch": 0.09121717029087828, + "grad_norm": 5.607273524163378, + "learning_rate": 1.9999992563339106e-05, + "loss": 1.1246, "step": 646 }, { - "epoch": 0.13566785489620464, - "grad_norm": 6.083190464187132, - "learning_rate": 1.9987935462005198e-05, - "loss": 2.5566, + "epoch": 0.09135837334086416, + "grad_norm": 5.9616108866529, + "learning_rate": 1.9999990587976364e-05, + "loss": 1.2165, "step": 647 }, { - "epoch": 0.13587754246173203, - "grad_norm": 6.077949283580154, - "learning_rate": 1.998782403452185e-05, - "loss": 2.6409, + "epoch": 0.09149957639085005, + "grad_norm": 5.428490845993007, + "learning_rate": 1.9999988380218164e-05, + "loss": 1.0671, "step": 648 }, { - "epoch": 0.1360872300272594, - "grad_norm": 5.624873727632852, - "learning_rate": 1.9987712095145196e-05, - "loss": 2.5109, + "epoch": 0.09164077944083593, + "grad_norm": 6.0269452495645375, + "learning_rate": 1.9999985940064552e-05, + "loss": 1.1748, "step": 649 }, { - "epoch": 0.13629691759278675, - "grad_norm": 5.149021463965709, - "learning_rate": 1.9987599643880977e-05, - "loss": 2.5359, + "epoch": 0.0917819824908218, + "grad_norm": 5.824579105718113, + "learning_rate": 1.9999983267515578e-05, + "loss": 1.1847, "step": 650 }, { - "epoch": 0.13650660515831411, - "grad_norm": 4.489888875038243, - "learning_rate": 1.9987486680734955e-05, - "loss": 2.355, + "epoch": 0.09192318554080768, + "grad_norm": 6.158700459715279, + "learning_rate": 1.9999980362571318e-05, + "loss": 1.0948, "step": 651 }, { - "epoch": 0.13671629272384148, - "grad_norm": 5.294389301140457, - "learning_rate": 1.9987373205712912e-05, - "loss": 2.6437, + "epoch": 0.09206438859079356, + "grad_norm": 5.703276946645952, + "learning_rate": 1.9999977225231833e-05, + "loss": 1.2464, "step": 652 }, { - "epoch": 0.13692598028936884, - "grad_norm": 6.211927293086614, - "learning_rate": 1.9987259218820678e-05, - "loss": 2.7953, + "epoch": 0.09220559164077945, + "grad_norm": 5.826786775307854, + "learning_rate": 1.9999973855497196e-05, + "loss": 1.3517, "step": 653 }, { - "epoch": 0.1371356678548962, - "grad_norm": 5.83721495598414, - "learning_rate": 1.998714472006408e-05, - "loss": 2.416, + "epoch": 0.09234679469076532, + "grad_norm": 5.894383308645578, + "learning_rate": 1.999997025336748e-05, + "loss": 1.2618, "step": 654 }, { - "epoch": 0.13734535542042356, - "grad_norm": 5.206161014749805, - "learning_rate": 1.9987029709448996e-05, - "loss": 2.6799, + "epoch": 0.0924879977407512, + "grad_norm": 5.570146909107198, + "learning_rate": 1.9999966418842778e-05, + "loss": 1.092, "step": 655 }, { - "epoch": 0.13755504298595092, - "grad_norm": 5.5512605404149795, - "learning_rate": 1.998691418698132e-05, - "loss": 2.8845, + "epoch": 0.09262920079073708, + "grad_norm": 5.985116919433037, + "learning_rate": 1.9999962351923176e-05, + "loss": 1.0396, "step": 656 }, { - "epoch": 0.1377647305514783, - "grad_norm": 5.931845394998795, - "learning_rate": 1.9986798152666966e-05, - "loss": 2.6356, + "epoch": 0.09277040384072296, + "grad_norm": 5.386635594530617, + "learning_rate": 1.9999958052608766e-05, + "loss": 1.2431, "step": 657 }, { - "epoch": 0.13797441811700567, - "grad_norm": 5.622068071687936, - "learning_rate": 1.9986681606511888e-05, - "loss": 2.4955, + "epoch": 0.09291160689070883, + "grad_norm": 6.207369455383297, + "learning_rate": 1.999995352089965e-05, + "loss": 1.2984, "step": 658 }, { - "epoch": 0.13818410568253303, - "grad_norm": 5.2450933279206104, - "learning_rate": 1.9986564548522058e-05, - "loss": 2.5056, + "epoch": 0.09305280994069472, + "grad_norm": 6.132660019806295, + "learning_rate": 1.999994875679593e-05, + "loss": 1.3616, "step": 659 }, { - "epoch": 0.1383937932480604, - "grad_norm": 5.213714379448667, - "learning_rate": 1.998644697870347e-05, - "loss": 2.4313, + "epoch": 0.0931940129906806, + "grad_norm": 6.481054938157247, + "learning_rate": 1.9999943760297725e-05, + "loss": 1.0988, "step": 660 }, { - "epoch": 0.13860348081358775, - "grad_norm": 4.962907536927089, - "learning_rate": 1.9986328897062157e-05, - "loss": 2.5469, + "epoch": 0.09333521604066648, + "grad_norm": 5.7613169535325985, + "learning_rate": 1.9999938531405142e-05, + "loss": 1.0736, "step": 661 }, { - "epoch": 0.13881316837911511, - "grad_norm": 5.448549286590173, - "learning_rate": 1.9986210303604167e-05, - "loss": 2.7182, + "epoch": 0.09347641909065235, + "grad_norm": 5.255227266215469, + "learning_rate": 1.999993307011831e-05, + "loss": 0.9656, "step": 662 }, { - "epoch": 0.13902285594464248, - "grad_norm": 5.653507502704269, - "learning_rate": 1.998609119833558e-05, - "loss": 2.7621, + "epoch": 0.09361762214063823, + "grad_norm": 6.189745974331651, + "learning_rate": 1.9999927376437348e-05, + "loss": 1.135, "step": 663 }, { - "epoch": 0.13923254351016984, - "grad_norm": 5.958774481371084, - "learning_rate": 1.9985971581262498e-05, - "loss": 2.5839, + "epoch": 0.09375882519062412, + "grad_norm": 5.81189681277316, + "learning_rate": 1.9999921450362392e-05, + "loss": 1.1131, "step": 664 }, { - "epoch": 0.1394422310756972, - "grad_norm": 5.784459568602851, - "learning_rate": 1.998585145239105e-05, - "loss": 2.458, + "epoch": 0.09390002824061, + "grad_norm": 5.503829112292691, + "learning_rate": 1.9999915291893584e-05, + "loss": 1.1699, "step": 665 }, { - "epoch": 0.1396519186412246, - "grad_norm": 5.530312020168745, - "learning_rate": 1.99857308117274e-05, - "loss": 2.5512, + "epoch": 0.09404123129059588, + "grad_norm": 5.478567498765631, + "learning_rate": 1.999990890103106e-05, + "loss": 1.1486, "step": 666 }, { - "epoch": 0.13986160620675195, - "grad_norm": 6.1344985417981395, - "learning_rate": 1.9985609659277725e-05, - "loss": 2.4614, + "epoch": 0.09418243434058175, + "grad_norm": 6.067551237003436, + "learning_rate": 1.9999902277774975e-05, + "loss": 1.2134, "step": 667 }, { - "epoch": 0.1400712937722793, - "grad_norm": 5.7035173869033935, - "learning_rate": 1.9985487995048233e-05, - "loss": 2.523, + "epoch": 0.09432363739056764, + "grad_norm": 4.849793656947368, + "learning_rate": 1.9999895422125477e-05, + "loss": 0.9784, "step": 668 }, { - "epoch": 0.14028098133780667, - "grad_norm": 6.239261865278675, - "learning_rate": 1.9985365819045167e-05, - "loss": 2.8481, + "epoch": 0.09446484044055352, + "grad_norm": 5.139092118609053, + "learning_rate": 1.999988833408273e-05, + "loss": 1.0161, "step": 669 }, { - "epoch": 0.14049066890333403, - "grad_norm": 5.324058490245803, - "learning_rate": 1.998524313127478e-05, - "loss": 2.4229, + "epoch": 0.0946060434905394, + "grad_norm": 5.953497430636807, + "learning_rate": 1.9999881013646893e-05, + "loss": 1.2735, "step": 670 }, { - "epoch": 0.1407003564688614, - "grad_norm": 4.71799679414778, - "learning_rate": 1.9985119931743365e-05, - "loss": 2.6776, + "epoch": 0.09474724654052527, + "grad_norm": 5.739455802347055, + "learning_rate": 1.9999873460818142e-05, + "loss": 1.2257, "step": 671 }, { - "epoch": 0.14091004403438875, - "grad_norm": 5.691016122633507, - "learning_rate": 1.9984996220457236e-05, - "loss": 2.5361, + "epoch": 0.09488844959051115, + "grad_norm": 6.210078095732943, + "learning_rate": 1.999986567559665e-05, + "loss": 1.0667, "step": 672 }, { - "epoch": 0.14111973159991612, - "grad_norm": 4.249981795124725, - "learning_rate": 1.9984871997422732e-05, - "loss": 2.5796, + "epoch": 0.09502965264049704, + "grad_norm": 6.9847891440486665, + "learning_rate": 1.9999857657982597e-05, + "loss": 1.2512, "step": 673 }, { - "epoch": 0.14132941916544348, - "grad_norm": 5.693901831661372, - "learning_rate": 1.9984747262646222e-05, - "loss": 2.5433, + "epoch": 0.09517085569048292, + "grad_norm": 6.024046312113741, + "learning_rate": 1.9999849407976176e-05, + "loss": 1.1704, "step": 674 }, { - "epoch": 0.14153910673097087, - "grad_norm": 5.954077652552696, - "learning_rate": 1.9984622016134096e-05, - "loss": 2.5969, + "epoch": 0.09531205874046879, + "grad_norm": 4.820104959128535, + "learning_rate": 1.9999840925577568e-05, + "loss": 1.1433, "step": 675 }, { - "epoch": 0.14174879429649823, - "grad_norm": 5.428001411309745, - "learning_rate": 1.9984496257892775e-05, - "loss": 2.5192, + "epoch": 0.09545326179045467, + "grad_norm": 5.115622896516326, + "learning_rate": 1.9999832210786977e-05, + "loss": 1.0019, "step": 676 }, { - "epoch": 0.1419584818620256, - "grad_norm": 5.0598543050965885, - "learning_rate": 1.99843699879287e-05, - "loss": 2.3452, + "epoch": 0.09559446484044055, + "grad_norm": 5.77858269741972, + "learning_rate": 1.9999823263604606e-05, + "loss": 1.143, "step": 677 }, { - "epoch": 0.14216816942755295, - "grad_norm": 5.0767043318078064, - "learning_rate": 1.9984243206248347e-05, - "loss": 2.6712, + "epoch": 0.09573566789042644, + "grad_norm": 5.110085448833619, + "learning_rate": 1.999981408403066e-05, + "loss": 0.9884, "step": 678 }, { - "epoch": 0.1423778569930803, - "grad_norm": 4.601010244095382, - "learning_rate": 1.9984115912858217e-05, - "loss": 2.5923, + "epoch": 0.0958768709404123, + "grad_norm": 5.614887421941373, + "learning_rate": 1.999980467206535e-05, + "loss": 1.2248, "step": 679 }, { - "epoch": 0.14258754455860767, - "grad_norm": 5.424086112836491, - "learning_rate": 1.9983988107764828e-05, - "loss": 2.5772, + "epoch": 0.09601807399039819, + "grad_norm": 5.611956138773773, + "learning_rate": 1.99997950277089e-05, + "loss": 1.276, "step": 680 }, { - "epoch": 0.14279723212413503, - "grad_norm": 5.252267616555135, - "learning_rate": 1.998385979097473e-05, - "loss": 2.6972, + "epoch": 0.09615927704038407, + "grad_norm": 5.3752249888791495, + "learning_rate": 1.9999785150961536e-05, + "loss": 1.1671, "step": 681 }, { - "epoch": 0.1430069196896624, - "grad_norm": 5.463460655872463, - "learning_rate": 1.9983730962494504e-05, - "loss": 2.4517, + "epoch": 0.09630048009036996, + "grad_norm": 5.109802752343885, + "learning_rate": 1.9999775041823478e-05, + "loss": 0.9753, "step": 682 }, { - "epoch": 0.14321660725518975, - "grad_norm": 5.118115869057551, - "learning_rate": 1.998360162233075e-05, - "loss": 2.5055, + "epoch": 0.09644168314035584, + "grad_norm": 4.659592444779131, + "learning_rate": 1.999976470029497e-05, + "loss": 0.8942, "step": 683 }, { - "epoch": 0.14342629482071714, - "grad_norm": 5.744180697178427, - "learning_rate": 1.9983471770490093e-05, - "loss": 2.6948, + "epoch": 0.09658288619034171, + "grad_norm": 5.405217109466532, + "learning_rate": 1.9999754126376247e-05, + "loss": 1.169, "step": 684 }, { - "epoch": 0.1436359823862445, - "grad_norm": 5.230441377908594, - "learning_rate": 1.99833414069792e-05, - "loss": 2.6347, + "epoch": 0.09672408924032759, + "grad_norm": 5.5637464006244945, + "learning_rate": 1.9999743320067556e-05, + "loss": 1.229, "step": 685 }, { - "epoch": 0.14384566995177187, - "grad_norm": 5.406240431378833, - "learning_rate": 1.9983210531804736e-05, - "loss": 2.3523, + "epoch": 0.09686529229031347, + "grad_norm": 5.6768403523268915, + "learning_rate": 1.999973228136915e-05, + "loss": 1.1861, "step": 686 }, { - "epoch": 0.14405535751729923, - "grad_norm": 5.1111112729171575, - "learning_rate": 1.9983079144973424e-05, - "loss": 2.4532, + "epoch": 0.09700649534029936, + "grad_norm": 6.620644939123791, + "learning_rate": 1.9999721010281288e-05, + "loss": 1.1918, "step": 687 }, { - "epoch": 0.1442650450828266, - "grad_norm": 6.229726640457781, - "learning_rate": 1.998294724649199e-05, - "loss": 2.6002, + "epoch": 0.09714769839028523, + "grad_norm": 5.209816864875043, + "learning_rate": 1.9999709506804223e-05, + "loss": 1.1157, "step": 688 }, { - "epoch": 0.14447473264835395, - "grad_norm": 4.842476793201206, - "learning_rate": 1.9982814836367193e-05, - "loss": 2.5075, + "epoch": 0.09728890144027111, + "grad_norm": 5.409822280823739, + "learning_rate": 1.9999697770938227e-05, + "loss": 1.1351, "step": 689 }, { - "epoch": 0.1446844202138813, - "grad_norm": 5.199600262154254, - "learning_rate": 1.9982681914605823e-05, - "loss": 2.3085, + "epoch": 0.09743010449025699, + "grad_norm": 4.931291740088819, + "learning_rate": 1.999968580268358e-05, + "loss": 1.0615, "step": 690 }, { - "epoch": 0.14489410777940867, - "grad_norm": 6.1062450571318365, - "learning_rate": 1.998254848121469e-05, - "loss": 2.8684, + "epoch": 0.09757130754024287, + "grad_norm": 5.990438985640952, + "learning_rate": 1.999967360204055e-05, + "loss": 1.2046, "step": 691 }, { - "epoch": 0.14510379534493603, - "grad_norm": 5.772115902147379, - "learning_rate": 1.9982414536200633e-05, - "loss": 2.5755, + "epoch": 0.09771251059022874, + "grad_norm": 4.288297098821375, + "learning_rate": 1.999966116900942e-05, + "loss": 1.0324, "step": 692 }, { - "epoch": 0.14531348291046342, - "grad_norm": 5.5088024507795055, - "learning_rate": 1.9982280079570518e-05, - "loss": 2.5159, + "epoch": 0.09785371364021463, + "grad_norm": 4.950699747566915, + "learning_rate": 1.9999648503590488e-05, + "loss": 1.1933, "step": 693 }, { - "epoch": 0.14552317047599078, - "grad_norm": 5.495057670409247, - "learning_rate": 1.9982145111331235e-05, - "loss": 2.5204, + "epoch": 0.09799491669020051, + "grad_norm": 5.767445833191182, + "learning_rate": 1.9999635605784042e-05, + "loss": 1.3355, "step": 694 }, { - "epoch": 0.14573285804151814, - "grad_norm": 5.663383224467504, - "learning_rate": 1.9982009631489704e-05, - "loss": 2.4353, + "epoch": 0.09813611974018639, + "grad_norm": 5.218928818543916, + "learning_rate": 1.9999622475590383e-05, + "loss": 0.9571, "step": 695 }, { - "epoch": 0.1459425456070455, - "grad_norm": 5.466758344070865, - "learning_rate": 1.9981873640052863e-05, - "loss": 2.6035, + "epoch": 0.09827732279017226, + "grad_norm": 4.802705337559807, + "learning_rate": 1.9999609113009817e-05, + "loss": 0.9388, "step": 696 }, { - "epoch": 0.14615223317257287, - "grad_norm": 6.078622694955836, - "learning_rate": 1.9981737137027684e-05, - "loss": 2.6876, + "epoch": 0.09841852584015814, + "grad_norm": 5.388105902279214, + "learning_rate": 1.999959551804265e-05, + "loss": 0.9834, "step": 697 }, { - "epoch": 0.14636192073810023, - "grad_norm": 4.9166578396991385, - "learning_rate": 1.9981600122421172e-05, - "loss": 2.6663, + "epoch": 0.09855972889014403, + "grad_norm": 5.2169316246723065, + "learning_rate": 1.9999581690689204e-05, + "loss": 1.0518, "step": 698 }, { - "epoch": 0.1465716083036276, - "grad_norm": 6.131284466589723, - "learning_rate": 1.9981462596240337e-05, - "loss": 2.6511, + "epoch": 0.09870093194012991, + "grad_norm": 4.8462983840891765, + "learning_rate": 1.9999567630949798e-05, + "loss": 1.0323, "step": 699 }, { - "epoch": 0.14678129586915495, - "grad_norm": 4.979786024235127, - "learning_rate": 1.998132455849223e-05, - "loss": 2.528, + "epoch": 0.0988421349901158, + "grad_norm": 4.979942240728192, + "learning_rate": 1.999955333882476e-05, + "loss": 0.9719, "step": 700 }, { - "epoch": 0.1469909834346823, - "grad_norm": 5.057122135958919, - "learning_rate": 1.9981186009183927e-05, - "loss": 2.4776, + "epoch": 0.09898333804010166, + "grad_norm": 5.563624313268988, + "learning_rate": 1.9999538814314417e-05, + "loss": 1.0766, "step": 701 }, { - "epoch": 0.1472006710002097, - "grad_norm": 5.773137815876697, - "learning_rate": 1.9981046948322533e-05, - "loss": 2.6508, + "epoch": 0.09912454109008755, + "grad_norm": 5.7666428702306645, + "learning_rate": 1.9999524057419116e-05, + "loss": 1.2518, "step": 702 }, { - "epoch": 0.14741035856573706, - "grad_norm": 4.9536915294921435, - "learning_rate": 1.9980907375915173e-05, - "loss": 2.5322, + "epoch": 0.09926574414007343, + "grad_norm": 5.249095854501378, + "learning_rate": 1.9999509068139195e-05, + "loss": 1.1129, "step": 703 }, { - "epoch": 0.14762004613126442, - "grad_norm": 5.340343694368092, - "learning_rate": 1.9980767291968997e-05, - "loss": 2.4569, + "epoch": 0.09940694719005931, + "grad_norm": 5.262058498054311, + "learning_rate": 1.9999493846474995e-05, + "loss": 1.2255, "step": 704 }, { - "epoch": 0.14782973369679178, - "grad_norm": 4.839941696325849, - "learning_rate": 1.9980626696491185e-05, - "loss": 2.4397, + "epoch": 0.09954815024004518, + "grad_norm": 5.751945010797834, + "learning_rate": 1.9999478392426882e-05, + "loss": 1.3891, "step": 705 }, { - "epoch": 0.14803942126231914, - "grad_norm": 5.732583335149285, - "learning_rate": 1.9980485589488946e-05, - "loss": 2.587, + "epoch": 0.09968935329003106, + "grad_norm": 4.751815582453203, + "learning_rate": 1.9999462705995206e-05, + "loss": 1.0502, "step": 706 }, { - "epoch": 0.1482491088278465, - "grad_norm": 5.105108668151097, - "learning_rate": 1.998034397096951e-05, - "loss": 2.4192, + "epoch": 0.09983055634001695, + "grad_norm": 4.785141541279744, + "learning_rate": 1.9999446787180338e-05, + "loss": 0.993, "step": 707 }, { - "epoch": 0.14845879639337387, - "grad_norm": 5.326723333192696, - "learning_rate": 1.9980201840940136e-05, - "loss": 2.5819, + "epoch": 0.09997175939000283, + "grad_norm": 4.709430265877194, + "learning_rate": 1.9999430635982643e-05, + "loss": 1.0911, "step": 708 }, { - "epoch": 0.14866848395890123, - "grad_norm": 5.663789248616749, - "learning_rate": 1.9980059199408103e-05, - "loss": 2.6536, + "epoch": 0.1001129624399887, + "grad_norm": 5.1236130544956975, + "learning_rate": 1.9999414252402498e-05, + "loss": 1.149, "step": 709 }, { - "epoch": 0.14887817152442862, - "grad_norm": 5.970658029480189, - "learning_rate": 1.9979916046380734e-05, - "loss": 2.5629, + "epoch": 0.10025416548997458, + "grad_norm": 6.132400682455835, + "learning_rate": 1.9999397636440284e-05, + "loss": 1.1057, "step": 710 }, { - "epoch": 0.14908785908995598, - "grad_norm": 5.74965717949682, - "learning_rate": 1.997977238186535e-05, - "loss": 2.5661, + "epoch": 0.10039536853996046, + "grad_norm": 5.828726603762334, + "learning_rate": 1.999938078809639e-05, + "loss": 1.2996, "step": 711 }, { - "epoch": 0.14929754665548334, - "grad_norm": 6.02301762189861, - "learning_rate": 1.997962820586933e-05, - "loss": 2.7692, + "epoch": 0.10053657158994635, + "grad_norm": 5.987293666494228, + "learning_rate": 1.9999363707371203e-05, + "loss": 1.0761, "step": 712 }, { - "epoch": 0.1495072342210107, - "grad_norm": 4.579567037465211, - "learning_rate": 1.9979483518400055e-05, - "loss": 2.5482, + "epoch": 0.10067777463993222, + "grad_norm": 6.581807055331371, + "learning_rate": 1.9999346394265122e-05, + "loss": 1.1585, "step": 713 }, { - "epoch": 0.14971692178653806, - "grad_norm": 6.2559796630509075, - "learning_rate": 1.9979338319464938e-05, - "loss": 2.7388, + "epoch": 0.1008189776899181, + "grad_norm": 6.539521831297887, + "learning_rate": 1.999932884877855e-05, + "loss": 1.1535, "step": 714 }, { - "epoch": 0.14992660935206542, - "grad_norm": 5.0962604475265145, - "learning_rate": 1.9979192609071426e-05, - "loss": 2.5554, + "epoch": 0.10096018073990398, + "grad_norm": 5.4732360427182964, + "learning_rate": 1.9999311070911894e-05, + "loss": 1.1875, "step": 715 }, { - "epoch": 0.15013629691759278, - "grad_norm": 5.658627453339846, - "learning_rate": 1.9979046387226982e-05, - "loss": 2.6371, + "epoch": 0.10110138378988986, + "grad_norm": 5.784025575986175, + "learning_rate": 1.9999293060665565e-05, + "loss": 1.0287, "step": 716 }, { - "epoch": 0.15034598448312014, - "grad_norm": 5.592230022061712, - "learning_rate": 1.9978899653939105e-05, - "loss": 2.7505, + "epoch": 0.10124258683987575, + "grad_norm": 5.133736164167682, + "learning_rate": 1.9999274818039988e-05, + "loss": 1.1005, "step": 717 }, { - "epoch": 0.1505556720486475, - "grad_norm": 5.052591492697424, - "learning_rate": 1.9978752409215313e-05, - "loss": 2.4662, + "epoch": 0.10138378988986162, + "grad_norm": 6.510978354880005, + "learning_rate": 1.9999256343035577e-05, + "loss": 1.1927, "step": 718 }, { - "epoch": 0.1507653596141749, - "grad_norm": 4.63635384173006, - "learning_rate": 1.997860465306315e-05, - "loss": 2.4323, + "epoch": 0.1015249929398475, + "grad_norm": 5.500852274034257, + "learning_rate": 1.999923763565277e-05, + "loss": 1.0421, "step": 719 }, { - "epoch": 0.15097504717970225, - "grad_norm": 5.119397576289821, - "learning_rate": 1.9978456385490195e-05, - "loss": 2.6242, + "epoch": 0.10166619598983338, + "grad_norm": 6.198347709223239, + "learning_rate": 1.9999218695892e-05, + "loss": 1.2198, "step": 720 }, { - "epoch": 0.15118473474522962, - "grad_norm": 5.048631016593217, - "learning_rate": 1.9978307606504044e-05, - "loss": 2.5475, + "epoch": 0.10180739903981927, + "grad_norm": 4.873935303878561, + "learning_rate": 1.9999199523753703e-05, + "loss": 1.0713, "step": 721 }, { - "epoch": 0.15139442231075698, - "grad_norm": 6.32075870701772, - "learning_rate": 1.997815831611232e-05, - "loss": 2.568, + "epoch": 0.10194860208980513, + "grad_norm": 5.816079592242595, + "learning_rate": 1.9999180119238327e-05, + "loss": 1.2243, "step": 722 }, { - "epoch": 0.15160410987628434, - "grad_norm": 5.4321602770296, - "learning_rate": 1.9978008514322677e-05, - "loss": 2.6301, + "epoch": 0.10208980513979102, + "grad_norm": 5.568323616759773, + "learning_rate": 1.9999160482346328e-05, + "loss": 1.2409, "step": 723 }, { - "epoch": 0.1518137974418117, - "grad_norm": 5.605113035140233, - "learning_rate": 1.997785820114279e-05, - "loss": 2.7584, + "epoch": 0.1022310081897769, + "grad_norm": 6.036377036205735, + "learning_rate": 1.9999140613078154e-05, + "loss": 1.2706, "step": 724 }, { - "epoch": 0.15202348500733906, - "grad_norm": 6.450430833124885, - "learning_rate": 1.9977707376580365e-05, - "loss": 2.6253, + "epoch": 0.10237221123976278, + "grad_norm": 5.012922828562304, + "learning_rate": 1.999912051143427e-05, + "loss": 1.2475, "step": 725 }, { - "epoch": 0.15223317257286642, - "grad_norm": 5.982470803305132, - "learning_rate": 1.997755604064313e-05, - "loss": 2.6636, + "epoch": 0.10251341428974865, + "grad_norm": 4.598627902633691, + "learning_rate": 1.9999100177415143e-05, + "loss": 1.0442, "step": 726 }, { - "epoch": 0.15244286013839378, - "grad_norm": 4.9337984946437, - "learning_rate": 1.997740419333885e-05, - "loss": 2.378, + "epoch": 0.10265461733973454, + "grad_norm": 4.914847279973792, + "learning_rate": 1.999907961102125e-05, + "loss": 0.9987, "step": 727 }, { - "epoch": 0.15265254770392117, - "grad_norm": 4.8238368123520186, - "learning_rate": 1.9977251834675294e-05, - "loss": 2.4251, + "epoch": 0.10279582038972042, + "grad_norm": 5.307894371058464, + "learning_rate": 1.999905881225306e-05, + "loss": 1.1264, "step": 728 }, { - "epoch": 0.15286223526944853, - "grad_norm": 5.785317364947026, - "learning_rate": 1.9977098964660274e-05, - "loss": 2.8368, + "epoch": 0.1029370234397063, + "grad_norm": 5.810319439068577, + "learning_rate": 1.9999037781111067e-05, + "loss": 1.1089, "step": 729 }, { - "epoch": 0.1530719228349759, - "grad_norm": 5.711242094605837, - "learning_rate": 1.997694558330163e-05, - "loss": 2.8107, + "epoch": 0.10307822648969217, + "grad_norm": 4.786968652577345, + "learning_rate": 1.9999016517595752e-05, + "loss": 1.2382, "step": 730 }, { - "epoch": 0.15328161040050325, - "grad_norm": 5.552121805556651, - "learning_rate": 1.9976791690607226e-05, - "loss": 2.5561, + "epoch": 0.10321942953967805, + "grad_norm": 6.1084215923413385, + "learning_rate": 1.9998995021707614e-05, + "loss": 1.3161, "step": 731 }, { - "epoch": 0.15349129796603062, - "grad_norm": 5.049171025402422, - "learning_rate": 1.997663728658494e-05, - "loss": 2.5043, + "epoch": 0.10336063258966394, + "grad_norm": 5.056769533270197, + "learning_rate": 1.9998973293447143e-05, + "loss": 0.9979, "step": 732 }, { - "epoch": 0.15370098553155798, - "grad_norm": 5.697573904387599, - "learning_rate": 1.9976482371242687e-05, - "loss": 2.3494, + "epoch": 0.10350183563964982, + "grad_norm": 5.704456283407406, + "learning_rate": 1.9998951332814856e-05, + "loss": 1.095, "step": 733 }, { - "epoch": 0.15391067309708534, - "grad_norm": 5.641369840965116, - "learning_rate": 1.997632694458841e-05, - "loss": 2.3422, + "epoch": 0.1036430386896357, + "grad_norm": 6.043167917541847, + "learning_rate": 1.9998929139811257e-05, + "loss": 1.3111, "step": 734 }, { - "epoch": 0.1541203606626127, - "grad_norm": 5.431146595293463, - "learning_rate": 1.9976171006630077e-05, - "loss": 2.3649, + "epoch": 0.10378424173962157, + "grad_norm": 5.020497520449919, + "learning_rate": 1.9998906714436864e-05, + "loss": 1.1345, "step": 735 }, { - "epoch": 0.15433004822814006, - "grad_norm": 5.101927798951413, - "learning_rate": 1.9976014557375677e-05, - "loss": 2.4986, + "epoch": 0.10392544478960745, + "grad_norm": 4.85025193275784, + "learning_rate": 1.9998884056692195e-05, + "loss": 1.0362, "step": 736 }, { - "epoch": 0.15453973579366745, - "grad_norm": 4.923014747073183, - "learning_rate": 1.9975857596833227e-05, - "loss": 2.5067, + "epoch": 0.10406664783959334, + "grad_norm": 5.428213127714124, + "learning_rate": 1.999886116657778e-05, + "loss": 1.1797, "step": 737 }, { - "epoch": 0.1547494233591948, - "grad_norm": 4.848774676538115, - "learning_rate": 1.9975700125010773e-05, - "loss": 2.5402, + "epoch": 0.10420785088957922, + "grad_norm": 5.807489463471432, + "learning_rate": 1.999883804409415e-05, + "loss": 0.9791, "step": 738 }, { - "epoch": 0.15495911092472217, - "grad_norm": 5.115305746956279, - "learning_rate": 1.9975542141916383e-05, - "loss": 2.6393, + "epoch": 0.10434905393956509, + "grad_norm": 5.642919201101373, + "learning_rate": 1.999881468924184e-05, + "loss": 1.2226, "step": 739 }, { - "epoch": 0.15516879849024953, - "grad_norm": 5.349510685162554, - "learning_rate": 1.997538364755816e-05, - "loss": 2.7126, + "epoch": 0.10449025698955097, + "grad_norm": 5.271063846020795, + "learning_rate": 1.9998791102021396e-05, + "loss": 1.0923, "step": 740 }, { - "epoch": 0.1553784860557769, - "grad_norm": 4.967054520234963, - "learning_rate": 1.9975224641944225e-05, - "loss": 2.5267, + "epoch": 0.10463146003953686, + "grad_norm": 4.885668846840746, + "learning_rate": 1.9998767282433367e-05, + "loss": 0.8912, "step": 741 }, { - "epoch": 0.15558817362130425, - "grad_norm": 5.263228945948572, - "learning_rate": 1.9975065125082723e-05, - "loss": 2.5071, + "epoch": 0.10477266308952274, + "grad_norm": 4.900010257787578, + "learning_rate": 1.99987432304783e-05, + "loss": 1.0789, "step": 742 }, { - "epoch": 0.15579786118683162, - "grad_norm": 4.83917189253647, - "learning_rate": 1.9974905096981835e-05, - "loss": 2.3575, + "epoch": 0.10491386613950861, + "grad_norm": 5.652034656827883, + "learning_rate": 1.9998718946156766e-05, + "loss": 1.1742, "step": 743 }, { - "epoch": 0.15600754875235898, - "grad_norm": 5.3961003506352085, - "learning_rate": 1.9974744557649756e-05, - "loss": 2.6504, + "epoch": 0.10505506918949449, + "grad_norm": 5.040623538663468, + "learning_rate": 1.9998694429469315e-05, + "loss": 1.1059, "step": 744 }, { - "epoch": 0.15621723631788634, - "grad_norm": 4.98844314553697, - "learning_rate": 1.9974583507094723e-05, - "loss": 2.6481, + "epoch": 0.10519627223948037, + "grad_norm": 6.464269122005671, + "learning_rate": 1.9998669680416526e-05, + "loss": 1.2625, "step": 745 }, { - "epoch": 0.15642692388341373, - "grad_norm": 5.809041861839085, - "learning_rate": 1.9974421945324982e-05, - "loss": 2.5328, + "epoch": 0.10533747528946626, + "grad_norm": 5.624258426420456, + "learning_rate": 1.999864469899897e-05, + "loss": 1.0741, "step": 746 }, { - "epoch": 0.1566366114489411, - "grad_norm": 5.706972443697373, - "learning_rate": 1.9974259872348815e-05, - "loss": 2.6119, + "epoch": 0.10547867833945213, + "grad_norm": 6.572251059482539, + "learning_rate": 1.999861948521723e-05, + "loss": 1.2783, "step": 747 }, { - "epoch": 0.15684629901446845, - "grad_norm": 5.2228963489309095, - "learning_rate": 1.9974097288174535e-05, - "loss": 2.4938, + "epoch": 0.10561988138943801, + "grad_norm": 5.625549589571555, + "learning_rate": 1.9998594039071894e-05, + "loss": 1.2335, "step": 748 }, { - "epoch": 0.1570559865799958, - "grad_norm": 5.5058972830007, - "learning_rate": 1.9973934192810467e-05, - "loss": 2.665, + "epoch": 0.10576108443942389, + "grad_norm": 5.328741266989122, + "learning_rate": 1.999856836056355e-05, + "loss": 1.0974, "step": 749 }, { - "epoch": 0.15726567414552317, - "grad_norm": 6.3055086397332625, - "learning_rate": 1.997377058626497e-05, - "loss": 2.5374, + "epoch": 0.10590228748940977, + "grad_norm": 5.341384349114137, + "learning_rate": 1.9998542449692794e-05, + "loss": 0.8541, "step": 750 }, { - "epoch": 0.15747536171105053, - "grad_norm": 5.2272593588909, - "learning_rate": 1.9973606468546437e-05, - "loss": 2.5509, + "epoch": 0.10604349053939566, + "grad_norm": 5.736007555215294, + "learning_rate": 1.9998516306460226e-05, + "loss": 1.1804, "step": 751 }, { - "epoch": 0.1576850492765779, - "grad_norm": 5.2389144271210055, - "learning_rate": 1.997344183966327e-05, - "loss": 2.588, + "epoch": 0.10618469358938153, + "grad_norm": 4.897172795523395, + "learning_rate": 1.999848993086646e-05, + "loss": 1.1651, "step": 752 }, { - "epoch": 0.15789473684210525, - "grad_norm": 5.170506045786776, - "learning_rate": 1.9973276699623914e-05, - "loss": 2.3833, + "epoch": 0.10632589663936741, + "grad_norm": 4.533177272969853, + "learning_rate": 1.999846332291211e-05, + "loss": 0.9911, "step": 753 }, { - "epoch": 0.15810442440763262, - "grad_norm": 6.349661608621654, - "learning_rate": 1.9973111048436827e-05, - "loss": 2.5797, + "epoch": 0.10646709968935329, + "grad_norm": 5.231367703326481, + "learning_rate": 1.9998436482597784e-05, + "loss": 1.0903, "step": 754 }, { - "epoch": 0.15831411197316, - "grad_norm": 5.467234597564719, - "learning_rate": 1.99729448861105e-05, - "loss": 2.5061, + "epoch": 0.10660830273933918, + "grad_norm": 5.521757687458886, + "learning_rate": 1.9998409409924115e-05, + "loss": 1.1007, "step": 755 }, { - "epoch": 0.15852379953868737, - "grad_norm": 5.788314282563359, - "learning_rate": 1.9972778212653455e-05, - "loss": 2.5789, + "epoch": 0.10674950578932504, + "grad_norm": 5.4929736416325685, + "learning_rate": 1.9998382104891728e-05, + "loss": 1.0937, "step": 756 }, { - "epoch": 0.15873348710421473, - "grad_norm": 5.766248131111242, - "learning_rate": 1.9972611028074225e-05, - "loss": 2.6302, + "epoch": 0.10689070883931093, + "grad_norm": 5.903541946031825, + "learning_rate": 1.9998354567501262e-05, + "loss": 1.2178, "step": 757 }, { - "epoch": 0.1589431746697421, - "grad_norm": 5.851124746955546, - "learning_rate": 1.997244333238139e-05, - "loss": 2.5716, + "epoch": 0.10703191188929681, + "grad_norm": 5.701560028675846, + "learning_rate": 1.9998326797753352e-05, + "loss": 1.4536, "step": 758 }, { - "epoch": 0.15915286223526945, - "grad_norm": 5.647230932231138, - "learning_rate": 1.9972275125583527e-05, - "loss": 2.4891, + "epoch": 0.1071731149392827, + "grad_norm": 5.247477347574576, + "learning_rate": 1.9998298795648645e-05, + "loss": 1.1055, "step": 759 }, { - "epoch": 0.1593625498007968, - "grad_norm": 5.284928570683543, - "learning_rate": 1.9972106407689278e-05, - "loss": 2.3549, + "epoch": 0.10731431798926856, + "grad_norm": 4.6012264045742475, + "learning_rate": 1.999827056118779e-05, + "loss": 0.8451, "step": 760 }, { - "epoch": 0.15957223736632417, - "grad_norm": 6.82976176233091, - "learning_rate": 1.9971937178707278e-05, - "loss": 2.451, + "epoch": 0.10745552103925445, + "grad_norm": 5.101505642100971, + "learning_rate": 1.999824209437145e-05, + "loss": 0.9903, "step": 761 }, { - "epoch": 0.15978192493185153, - "grad_norm": 5.7689969603794085, - "learning_rate": 1.99717674386462e-05, - "loss": 2.2127, + "epoch": 0.10759672408924033, + "grad_norm": 5.538563910263696, + "learning_rate": 1.9998213395200277e-05, + "loss": 1.1274, "step": 762 }, { - "epoch": 0.1599916124973789, - "grad_norm": 5.3713345347098755, - "learning_rate": 1.9971597187514743e-05, - "loss": 2.4216, + "epoch": 0.10773792713922621, + "grad_norm": 4.756257820240059, + "learning_rate": 1.9998184463674947e-05, + "loss": 1.0958, "step": 763 }, { - "epoch": 0.16020130006290628, - "grad_norm": 5.121552622009936, - "learning_rate": 1.9971426425321637e-05, - "loss": 2.4033, + "epoch": 0.10787913018921208, + "grad_norm": 4.965508053130454, + "learning_rate": 1.9998155299796122e-05, + "loss": 1.0715, "step": 764 }, { - "epoch": 0.16041098762843364, - "grad_norm": 5.1771052992673425, - "learning_rate": 1.9971255152075635e-05, - "loss": 2.2227, + "epoch": 0.10802033323919796, + "grad_norm": 5.4704516094316356, + "learning_rate": 1.999812590356449e-05, + "loss": 1.3287, "step": 765 }, { - "epoch": 0.160620675193961, - "grad_norm": 5.751104264210732, - "learning_rate": 1.997108336778551e-05, - "loss": 2.562, + "epoch": 0.10816153628918385, + "grad_norm": 5.377616350364168, + "learning_rate": 1.999809627498073e-05, + "loss": 1.1041, "step": 766 }, { - "epoch": 0.16083036275948837, - "grad_norm": 6.41324399439957, - "learning_rate": 1.997091107246007e-05, - "loss": 2.8024, + "epoch": 0.10830273933916973, + "grad_norm": 5.654675297637625, + "learning_rate": 1.999806641404553e-05, + "loss": 1.0549, "step": 767 }, { - "epoch": 0.16104005032501573, - "grad_norm": 6.246885326610967, - "learning_rate": 1.9970738266108142e-05, - "loss": 2.7224, + "epoch": 0.10844394238915561, + "grad_norm": 6.544690276680263, + "learning_rate": 1.9998036320759582e-05, + "loss": 1.3446, "step": 768 }, { - "epoch": 0.1612497378905431, - "grad_norm": 5.5788167559955495, - "learning_rate": 1.9970564948738585e-05, - "loss": 2.5663, + "epoch": 0.10858514543914148, + "grad_norm": 5.625968326355665, + "learning_rate": 1.999800599512359e-05, + "loss": 1.2314, "step": 769 }, { - "epoch": 0.16145942545607045, - "grad_norm": 6.103869201794778, - "learning_rate": 1.997039112036028e-05, - "loss": 2.6032, + "epoch": 0.10872634848912736, + "grad_norm": 6.1398232285014895, + "learning_rate": 1.9997975437138256e-05, + "loss": 1.2515, "step": 770 }, { - "epoch": 0.1616691130215978, - "grad_norm": 5.093834157772151, - "learning_rate": 1.997021678098214e-05, - "loss": 2.3786, + "epoch": 0.10886755153911325, + "grad_norm": 6.13454450174957, + "learning_rate": 1.999794464680429e-05, + "loss": 1.0589, "step": 771 }, { - "epoch": 0.16187880058712517, - "grad_norm": 6.025378098467104, - "learning_rate": 1.9970041930613096e-05, - "loss": 2.4202, + "epoch": 0.10900875458909913, + "grad_norm": 4.6366569770462895, + "learning_rate": 1.999791362412241e-05, + "loss": 1.0316, "step": 772 }, { - "epoch": 0.16208848815265256, - "grad_norm": 4.9446767239003275, - "learning_rate": 1.996986656926211e-05, - "loss": 2.6345, + "epoch": 0.109149957639085, + "grad_norm": 4.923060372990307, + "learning_rate": 1.9997882369093334e-05, + "loss": 1.1058, "step": 773 }, { - "epoch": 0.16229817571817992, - "grad_norm": 5.151429866391372, - "learning_rate": 1.9969690696938173e-05, - "loss": 2.6276, + "epoch": 0.10929116068907088, + "grad_norm": 4.984834473524601, + "learning_rate": 1.9997850881717788e-05, + "loss": 1.1754, "step": 774 }, { - "epoch": 0.16250786328370728, - "grad_norm": 4.6507668918727285, - "learning_rate": 1.9969514313650294e-05, - "loss": 2.5375, + "epoch": 0.10943236373905676, + "grad_norm": 5.035565565394427, + "learning_rate": 1.9997819161996507e-05, + "loss": 1.1307, "step": 775 }, { - "epoch": 0.16271755084923464, - "grad_norm": 5.479062357904265, - "learning_rate": 1.9969337419407517e-05, - "loss": 2.6203, + "epoch": 0.10957356678904265, + "grad_norm": 5.040610041872462, + "learning_rate": 1.9997787209930222e-05, + "loss": 1.0287, "step": 776 }, { - "epoch": 0.162927238414762, - "grad_norm": 5.5366540558457285, - "learning_rate": 1.9969160014218904e-05, - "loss": 2.8279, + "epoch": 0.10971476983902852, + "grad_norm": 6.159249400645923, + "learning_rate": 1.9997755025519687e-05, + "loss": 1.0095, "step": 777 }, { - "epoch": 0.16313692598028937, - "grad_norm": 6.455368237461392, - "learning_rate": 1.9968982098093553e-05, - "loss": 2.7342, + "epoch": 0.1098559728890144, + "grad_norm": 5.5764104616174075, + "learning_rate": 1.999772260876564e-05, + "loss": 1.3386, "step": 778 }, { - "epoch": 0.16334661354581673, - "grad_norm": 6.114025923129414, - "learning_rate": 1.9968803671040574e-05, - "loss": 2.7242, + "epoch": 0.10999717593900028, + "grad_norm": 5.392220379789989, + "learning_rate": 1.9997689959668837e-05, + "loss": 1.1628, "step": 779 }, { - "epoch": 0.1635563011113441, - "grad_norm": 5.165162546701589, - "learning_rate": 1.9968624733069122e-05, - "loss": 2.7608, + "epoch": 0.11013837898898617, + "grad_norm": 6.082124062495701, + "learning_rate": 1.9997657078230034e-05, + "loss": 1.2274, "step": 780 }, { - "epoch": 0.16376598867687145, - "grad_norm": 5.183951403439922, - "learning_rate": 1.996844528418836e-05, - "loss": 2.6442, + "epoch": 0.11027958203897203, + "grad_norm": 5.4564933215479305, + "learning_rate": 1.9997623964450002e-05, + "loss": 1.2284, "step": 781 }, { - "epoch": 0.16397567624239884, - "grad_norm": 4.932360432093259, - "learning_rate": 1.996826532440749e-05, - "loss": 2.3996, + "epoch": 0.11042078508895792, + "grad_norm": 5.405189961559795, + "learning_rate": 1.9997590618329507e-05, + "loss": 1.0186, "step": 782 }, { - "epoch": 0.1641853638079262, - "grad_norm": 6.427461911630745, - "learning_rate": 1.9968084853735728e-05, - "loss": 2.6695, + "epoch": 0.1105619881389438, + "grad_norm": 5.0645601975123835, + "learning_rate": 1.9997557039869325e-05, + "loss": 1.0671, "step": 783 }, { - "epoch": 0.16439505137345356, - "grad_norm": 5.839031395817843, - "learning_rate": 1.9967903872182335e-05, - "loss": 2.8268, + "epoch": 0.11070319118892968, + "grad_norm": 5.86063819787201, + "learning_rate": 1.999752322907023e-05, + "loss": 1.3963, "step": 784 }, { - "epoch": 0.16460473893898092, - "grad_norm": 4.316046016427989, - "learning_rate": 1.9967722379756575e-05, - "loss": 2.4485, + "epoch": 0.11084439423891557, + "grad_norm": 5.642557978601218, + "learning_rate": 1.9997489185933015e-05, + "loss": 1.2447, "step": 785 }, { - "epoch": 0.16481442650450828, - "grad_norm": 5.17689136727919, - "learning_rate": 1.996754037646776e-05, - "loss": 2.4977, + "epoch": 0.11098559728890144, + "grad_norm": 5.104626697522372, + "learning_rate": 1.999745491045847e-05, + "loss": 1.0064, "step": 786 }, { - "epoch": 0.16502411407003564, - "grad_norm": 4.984608923869862, - "learning_rate": 1.9967357862325206e-05, - "loss": 2.2972, + "epoch": 0.11112680033888732, + "grad_norm": 4.268100775435925, + "learning_rate": 1.999742040264739e-05, + "loss": 1.0119, "step": 787 }, { - "epoch": 0.165233801635563, - "grad_norm": 5.764262154632816, - "learning_rate": 1.9967174837338282e-05, - "loss": 2.6403, + "epoch": 0.1112680033888732, + "grad_norm": 7.139410754305002, + "learning_rate": 1.9997385662500577e-05, + "loss": 1.3604, "step": 788 }, { - "epoch": 0.16544348920109037, - "grad_norm": 5.531201758921919, - "learning_rate": 1.9966991301516356e-05, - "loss": 2.6415, + "epoch": 0.11140920643885908, + "grad_norm": 4.946506825408107, + "learning_rate": 1.999735069001884e-05, + "loss": 0.818, "step": 789 }, { - "epoch": 0.16565317676661773, - "grad_norm": 5.9032219473923835, - "learning_rate": 1.9966807254868835e-05, - "loss": 2.3596, + "epoch": 0.11155040948884495, + "grad_norm": 5.780895871394449, + "learning_rate": 1.999731548520299e-05, + "loss": 1.0843, "step": 790 }, { - "epoch": 0.16586286433214512, - "grad_norm": 5.2307080735103195, - "learning_rate": 1.9966622697405163e-05, - "loss": 2.4864, + "epoch": 0.11169161253883084, + "grad_norm": 5.799587764253103, + "learning_rate": 1.999728004805384e-05, + "loss": 1.0935, "step": 791 }, { - "epoch": 0.16607255189767248, - "grad_norm": 5.3824415473570175, - "learning_rate": 1.9966437629134786e-05, - "loss": 2.6149, + "epoch": 0.11183281558881672, + "grad_norm": 6.748946933770958, + "learning_rate": 1.9997244378572227e-05, + "loss": 1.3733, "step": 792 }, { - "epoch": 0.16628223946319984, - "grad_norm": 5.589694336777668, - "learning_rate": 1.9966252050067197e-05, - "loss": 2.5896, + "epoch": 0.1119740186388026, + "grad_norm": 6.52900877438131, + "learning_rate": 1.999720847675897e-05, + "loss": 1.1624, "step": 793 }, { - "epoch": 0.1664919270287272, - "grad_norm": 6.493288853507978, - "learning_rate": 1.9966065960211907e-05, - "loss": 2.4996, + "epoch": 0.11211522168878847, + "grad_norm": 6.025993306476371, + "learning_rate": 1.9997172342614902e-05, + "loss": 0.8829, "step": 794 }, { - "epoch": 0.16670161459425456, - "grad_norm": 5.89449672328628, - "learning_rate": 1.996587935957845e-05, - "loss": 2.376, + "epoch": 0.11225642473877435, + "grad_norm": 5.018883638760266, + "learning_rate": 1.999713597614087e-05, + "loss": 1.0535, "step": 795 }, { - "epoch": 0.16691130215978192, - "grad_norm": 5.178448822931242, - "learning_rate": 1.996569224817639e-05, - "loss": 2.5124, + "epoch": 0.11239762778876024, + "grad_norm": 5.828023063069, + "learning_rate": 1.9997099377337714e-05, + "loss": 1.1162, "step": 796 }, { - "epoch": 0.16712098972530928, - "grad_norm": 5.314420049466998, - "learning_rate": 1.9965504626015314e-05, - "loss": 2.5139, + "epoch": 0.11253883083874612, + "grad_norm": 6.116322971079314, + "learning_rate": 1.9997062546206287e-05, + "loss": 1.3535, "step": 797 }, { - "epoch": 0.16733067729083664, - "grad_norm": 4.965540255561048, - "learning_rate": 1.9965316493104845e-05, - "loss": 2.4432, + "epoch": 0.112680033888732, + "grad_norm": 5.443814569332978, + "learning_rate": 1.999702548274744e-05, + "loss": 0.9972, "step": 798 }, { - "epoch": 0.167540364856364, - "grad_norm": 5.9976226159107116, - "learning_rate": 1.9965127849454625e-05, - "loss": 2.6128, + "epoch": 0.11282123693871787, + "grad_norm": 5.877595197960374, + "learning_rate": 1.9996988186962044e-05, + "loss": 0.9868, "step": 799 }, { - "epoch": 0.1677500524218914, - "grad_norm": 5.6868698522966, - "learning_rate": 1.9964938695074315e-05, - "loss": 2.3999, + "epoch": 0.11296243998870376, + "grad_norm": 4.453449245801207, + "learning_rate": 1.9996950658850956e-05, + "loss": 0.9016, "step": 800 }, { - "epoch": 0.16795973998741875, - "grad_norm": 5.068783438771577, - "learning_rate": 1.9964749029973616e-05, - "loss": 2.6127, + "epoch": 0.11310364303868964, + "grad_norm": 6.649263652676453, + "learning_rate": 1.9996912898415055e-05, + "loss": 1.3181, "step": 801 }, { - "epoch": 0.16816942755294612, - "grad_norm": 6.136687509953793, - "learning_rate": 1.9964558854162244e-05, - "loss": 2.8011, + "epoch": 0.11324484608867552, + "grad_norm": 4.548357238389129, + "learning_rate": 1.9996874905655214e-05, + "loss": 0.9035, "step": 802 }, { - "epoch": 0.16837911511847348, - "grad_norm": 4.800340421160969, - "learning_rate": 1.996436816764995e-05, - "loss": 2.5399, + "epoch": 0.11338604913866139, + "grad_norm": 5.666277269697033, + "learning_rate": 1.9996836680572315e-05, + "loss": 1.2294, "step": 803 }, { - "epoch": 0.16858880268400084, - "grad_norm": 5.5470642039105975, - "learning_rate": 1.9964176970446504e-05, - "loss": 2.8207, + "epoch": 0.11352725218864727, + "grad_norm": 5.607580893143515, + "learning_rate": 1.999679822316725e-05, + "loss": 1.2376, "step": 804 }, { - "epoch": 0.1687984902495282, - "grad_norm": 6.641243790799172, - "learning_rate": 1.9963985262561704e-05, - "loss": 3.0149, + "epoch": 0.11366845523863316, + "grad_norm": 5.618724506035481, + "learning_rate": 1.9996759533440916e-05, + "loss": 1.0262, "step": 805 }, { - "epoch": 0.16900817781505556, - "grad_norm": 5.2599864963628535, - "learning_rate": 1.9963793044005382e-05, - "loss": 2.7838, + "epoch": 0.11380965828861904, + "grad_norm": 5.000879783261625, + "learning_rate": 1.99967206113942e-05, + "loss": 1.1771, "step": 806 }, { - "epoch": 0.16921786538058292, - "grad_norm": 5.551203360839708, - "learning_rate": 1.9963600314787384e-05, - "loss": 2.3039, + "epoch": 0.11395086133860491, + "grad_norm": 6.430629249895955, + "learning_rate": 1.9996681457028024e-05, + "loss": 1.2216, "step": 807 }, { - "epoch": 0.16942755294611028, - "grad_norm": 5.005656047264504, - "learning_rate": 1.9963407074917586e-05, - "loss": 2.3646, + "epoch": 0.11409206438859079, + "grad_norm": 5.297120763561583, + "learning_rate": 1.999664207034328e-05, + "loss": 1.155, "step": 808 }, { - "epoch": 0.16963724051163767, - "grad_norm": 5.096147192741019, - "learning_rate": 1.99632133244059e-05, - "loss": 2.5875, + "epoch": 0.11423326743857667, + "grad_norm": 5.751556472820045, + "learning_rate": 1.9996602451340892e-05, + "loss": 1.1242, "step": 809 }, { - "epoch": 0.16984692807716503, - "grad_norm": 6.304971806711211, - "learning_rate": 1.9963019063262245e-05, - "loss": 2.3647, + "epoch": 0.11437447048856256, + "grad_norm": 5.721804381703796, + "learning_rate": 1.9996562600021786e-05, + "loss": 1.2767, "step": 810 }, { - "epoch": 0.1700566156426924, - "grad_norm": 4.689418131258043, - "learning_rate": 1.9962824291496584e-05, - "loss": 2.4458, + "epoch": 0.11451567353854843, + "grad_norm": 5.893988920391293, + "learning_rate": 1.9996522516386875e-05, + "loss": 1.1995, "step": 811 }, { - "epoch": 0.17026630320821975, - "grad_norm": 5.679181828413589, - "learning_rate": 1.9962629009118903e-05, - "loss": 2.4693, + "epoch": 0.11465687658853431, + "grad_norm": 5.9031098736613705, + "learning_rate": 1.9996482200437104e-05, + "loss": 1.1383, "step": 812 }, { - "epoch": 0.17047599077374712, - "grad_norm": 5.756522383170872, - "learning_rate": 1.9962433216139204e-05, - "loss": 2.4949, + "epoch": 0.11479807963852019, + "grad_norm": 5.678267009884401, + "learning_rate": 1.9996441652173403e-05, + "loss": 1.044, "step": 813 }, { - "epoch": 0.17068567833927448, - "grad_norm": 5.962087484363417, - "learning_rate": 1.9962236912567522e-05, - "loss": 2.3043, + "epoch": 0.11493928268850608, + "grad_norm": 5.464529351162833, + "learning_rate": 1.999640087159671e-05, + "loss": 1.1106, "step": 814 }, { - "epoch": 0.17089536590480184, - "grad_norm": 5.160052152709169, - "learning_rate": 1.9962040098413918e-05, - "loss": 2.428, + "epoch": 0.11508048573849196, + "grad_norm": 5.094443793906905, + "learning_rate": 1.9996359858707984e-05, + "loss": 1.0449, "step": 815 }, { - "epoch": 0.1711050534703292, - "grad_norm": 5.406280541609342, - "learning_rate": 1.9961842773688484e-05, - "loss": 2.4791, + "epoch": 0.11522168878847783, + "grad_norm": 6.511428641399698, + "learning_rate": 1.999631861350817e-05, + "loss": 1.4996, "step": 816 }, { - "epoch": 0.17131474103585656, - "grad_norm": 4.734733525477293, - "learning_rate": 1.996164493840133e-05, - "loss": 2.5014, + "epoch": 0.11536289183846371, + "grad_norm": 5.385932380290889, + "learning_rate": 1.999627713599823e-05, + "loss": 1.1581, "step": 817 }, { - "epoch": 0.17152442860138395, - "grad_norm": 4.943764561837859, - "learning_rate": 1.9961446592562594e-05, - "loss": 2.3089, + "epoch": 0.1155040948884496, + "grad_norm": 5.233641318862517, + "learning_rate": 1.999623542617912e-05, + "loss": 1.0391, "step": 818 }, { - "epoch": 0.1717341161669113, - "grad_norm": 5.5751813653506535, - "learning_rate": 1.9961247736182443e-05, - "loss": 2.5656, + "epoch": 0.11564529793843548, + "grad_norm": 5.655396083274696, + "learning_rate": 1.9996193484051822e-05, + "loss": 1.1819, "step": 819 }, { - "epoch": 0.17194380373243867, - "grad_norm": 5.13687644705553, - "learning_rate": 1.9961048369271066e-05, - "loss": 2.4772, + "epoch": 0.11578650098842135, + "grad_norm": 6.204063907496864, + "learning_rate": 1.99961513096173e-05, + "loss": 1.4043, "step": 820 }, { - "epoch": 0.17215349129796603, - "grad_norm": 6.675941699590338, - "learning_rate": 1.9960848491838688e-05, - "loss": 2.7695, + "epoch": 0.11592770403840723, + "grad_norm": 5.160519482087256, + "learning_rate": 1.9996108902876544e-05, + "loss": 1.2475, "step": 821 }, { - "epoch": 0.1723631788634934, - "grad_norm": 4.883406365901591, - "learning_rate": 1.9960648103895548e-05, - "loss": 2.3557, + "epoch": 0.11606890708839311, + "grad_norm": 5.033055551242292, + "learning_rate": 1.9996066263830533e-05, + "loss": 0.9497, "step": 822 }, { - "epoch": 0.17257286642902075, - "grad_norm": 4.94442909495027, - "learning_rate": 1.9960447205451915e-05, - "loss": 2.4459, + "epoch": 0.116210110138379, + "grad_norm": 5.4801078556293, + "learning_rate": 1.9996023392480254e-05, + "loss": 1.1818, "step": 823 }, { - "epoch": 0.17278255399454812, - "grad_norm": 5.323802519087413, - "learning_rate": 1.9960245796518086e-05, - "loss": 2.4145, + "epoch": 0.11635131318836486, + "grad_norm": 5.3761131553378965, + "learning_rate": 1.9995980288826714e-05, + "loss": 1.2519, "step": 824 }, { - "epoch": 0.17299224156007548, - "grad_norm": 4.997228864163945, - "learning_rate": 1.996004387710439e-05, - "loss": 2.4581, + "epoch": 0.11649251623835075, + "grad_norm": 5.405889625421241, + "learning_rate": 1.9995936952870905e-05, + "loss": 1.0568, "step": 825 }, { - "epoch": 0.17320192912560284, - "grad_norm": 5.283336329058954, - "learning_rate": 1.9959841447221165e-05, - "loss": 2.7089, + "epoch": 0.11663371928833663, + "grad_norm": 5.3021710412145575, + "learning_rate": 1.999589338461384e-05, + "loss": 1.023, "step": 826 }, { - "epoch": 0.17341161669113023, - "grad_norm": 5.237184579351533, - "learning_rate": 1.9959638506878795e-05, - "loss": 2.5239, + "epoch": 0.11677492233832251, + "grad_norm": 5.785930181095784, + "learning_rate": 1.999584958405653e-05, + "loss": 1.2078, "step": 827 }, { - "epoch": 0.1736213042566576, - "grad_norm": 5.65780162637725, - "learning_rate": 1.9959435056087676e-05, - "loss": 2.3824, + "epoch": 0.11691612538830838, + "grad_norm": 5.5250382539465726, + "learning_rate": 1.9995805551199988e-05, + "loss": 1.1602, "step": 828 }, { - "epoch": 0.17383099182218495, - "grad_norm": 4.827351960582513, - "learning_rate": 1.9959231094858236e-05, - "loss": 2.4209, + "epoch": 0.11705732843829426, + "grad_norm": 5.16493805362523, + "learning_rate": 1.9995761286045248e-05, + "loss": 1.1591, "step": 829 }, { - "epoch": 0.1740406793877123, - "grad_norm": 5.59764664902699, - "learning_rate": 1.995902662320093e-05, - "loss": 2.5681, + "epoch": 0.11719853148828015, + "grad_norm": 5.133249324233252, + "learning_rate": 1.999571678859333e-05, + "loss": 1.0251, "step": 830 }, { - "epoch": 0.17425036695323967, - "grad_norm": 5.565930028253618, - "learning_rate": 1.9958821641126234e-05, - "loss": 2.6214, + "epoch": 0.11733973453826603, + "grad_norm": 5.239574428739896, + "learning_rate": 1.9995672058845272e-05, + "loss": 1.2703, "step": 831 }, { - "epoch": 0.17446005451876703, - "grad_norm": 5.1865682653861205, - "learning_rate": 1.995861614864466e-05, - "loss": 2.3355, + "epoch": 0.11748093758825191, + "grad_norm": 5.625553408129529, + "learning_rate": 1.999562709680211e-05, + "loss": 1.193, "step": 832 }, { - "epoch": 0.1746697420842944, - "grad_norm": 5.587511254245669, - "learning_rate": 1.9958410145766733e-05, - "loss": 2.5303, + "epoch": 0.11762214063823778, + "grad_norm": 4.342673750607401, + "learning_rate": 1.9995581902464894e-05, + "loss": 0.9556, "step": 833 }, { - "epoch": 0.17487942964982175, - "grad_norm": 6.255446060394876, - "learning_rate": 1.995820363250302e-05, - "loss": 2.4386, + "epoch": 0.11776334368822367, + "grad_norm": 5.052398275117559, + "learning_rate": 1.9995536475834667e-05, + "loss": 1.0436, "step": 834 }, { - "epoch": 0.17508911721534912, - "grad_norm": 5.5566095073076704, - "learning_rate": 1.995799660886409e-05, - "loss": 2.3336, + "epoch": 0.11790454673820955, + "grad_norm": 5.991484523628505, + "learning_rate": 1.9995490816912494e-05, + "loss": 1.3851, "step": 835 }, { - "epoch": 0.1752988047808765, - "grad_norm": 5.312228334366878, - "learning_rate": 1.9957789074860566e-05, - "loss": 2.7252, + "epoch": 0.11804574978819543, + "grad_norm": 5.714464558199651, + "learning_rate": 1.9995444925699427e-05, + "loss": 1.1418, "step": 836 }, { - "epoch": 0.17550849234640387, - "grad_norm": 5.209814225019706, - "learning_rate": 1.9957581030503084e-05, - "loss": 2.2494, + "epoch": 0.1181869528381813, + "grad_norm": 5.027890337363048, + "learning_rate": 1.999539880219654e-05, + "loss": 1.317, "step": 837 }, { - "epoch": 0.17571817991193123, - "grad_norm": 5.809157066825737, - "learning_rate": 1.9957372475802303e-05, - "loss": 2.3917, + "epoch": 0.11832815588816718, + "grad_norm": 4.437889276305247, + "learning_rate": 1.99953524464049e-05, + "loss": 0.892, "step": 838 }, { - "epoch": 0.1759278674774586, - "grad_norm": 4.600111546205428, - "learning_rate": 1.995716341076891e-05, - "loss": 2.4483, + "epoch": 0.11846935893815307, + "grad_norm": 5.1281825633815545, + "learning_rate": 1.9995305858325586e-05, + "loss": 1.0215, "step": 839 }, { - "epoch": 0.17613755504298595, - "grad_norm": 4.928057485084255, - "learning_rate": 1.9956953835413624e-05, - "loss": 2.5772, + "epoch": 0.11861056198813895, + "grad_norm": 5.557320859358658, + "learning_rate": 1.999525903795968e-05, + "loss": 1.2308, "step": 840 }, { - "epoch": 0.1763472426085133, - "grad_norm": 5.624418635591188, - "learning_rate": 1.9956743749747184e-05, - "loss": 2.5422, + "epoch": 0.11875176503812482, + "grad_norm": 5.352457908193261, + "learning_rate": 1.999521198530827e-05, + "loss": 1.2149, "step": 841 }, { - "epoch": 0.17655693017404067, - "grad_norm": 5.0933223470612665, - "learning_rate": 1.9956533153780358e-05, - "loss": 2.2941, + "epoch": 0.1188929680881107, + "grad_norm": 5.049151601059812, + "learning_rate": 1.9995164700372448e-05, + "loss": 1.043, "step": 842 }, { - "epoch": 0.17676661773956803, - "grad_norm": 6.536798514231347, - "learning_rate": 1.9956322047523936e-05, - "loss": 2.6009, + "epoch": 0.11903417113809658, + "grad_norm": 6.812611063105615, + "learning_rate": 1.999511718315332e-05, + "loss": 1.1816, "step": 843 }, { - "epoch": 0.17697630530509542, - "grad_norm": 5.450621074969645, - "learning_rate": 1.9956110430988745e-05, - "loss": 2.4368, + "epoch": 0.11917537418808247, + "grad_norm": 5.033233034273857, + "learning_rate": 1.9995069433651985e-05, + "loss": 1.1476, "step": 844 }, { - "epoch": 0.17718599287062278, - "grad_norm": 6.719090216084803, - "learning_rate": 1.9955898304185623e-05, - "loss": 2.676, + "epoch": 0.11931657723806834, + "grad_norm": 5.099808940755445, + "learning_rate": 1.9995021451869548e-05, + "loss": 1.1959, "step": 845 }, { - "epoch": 0.17739568043615014, - "grad_norm": 5.468604176484468, - "learning_rate": 1.9955685667125448e-05, - "loss": 2.5359, + "epoch": 0.11945778028805422, + "grad_norm": 4.772748515716294, + "learning_rate": 1.9994973237807133e-05, + "loss": 1.0424, "step": 846 }, { - "epoch": 0.1776053680016775, - "grad_norm": 5.162680171464789, - "learning_rate": 1.9955472519819114e-05, - "loss": 2.3793, + "epoch": 0.1195989833380401, + "grad_norm": 5.122110864730668, + "learning_rate": 1.9994924791465854e-05, + "loss": 1.0853, "step": 847 }, { - "epoch": 0.17781505556720487, - "grad_norm": 4.985326564587904, - "learning_rate": 1.995525886227755e-05, - "loss": 2.4282, + "epoch": 0.11974018638802598, + "grad_norm": 5.502888437833528, + "learning_rate": 1.999487611284684e-05, + "loss": 1.3397, "step": 848 }, { - "epoch": 0.17802474313273223, - "grad_norm": 5.204117524216867, - "learning_rate": 1.9955044694511697e-05, - "loss": 2.5916, + "epoch": 0.11988138943801187, + "grad_norm": 5.207542648884867, + "learning_rate": 1.9994827201951223e-05, + "loss": 1.1872, "step": 849 }, { - "epoch": 0.1782344306982596, - "grad_norm": 4.989224401120582, - "learning_rate": 1.995483001653254e-05, - "loss": 2.6525, + "epoch": 0.12002259248799774, + "grad_norm": 5.293792420635834, + "learning_rate": 1.9994778058780136e-05, + "loss": 1.0728, "step": 850 }, { - "epoch": 0.17844411826378695, - "grad_norm": 5.6958176671058665, - "learning_rate": 1.995461482835108e-05, - "loss": 2.6081, + "epoch": 0.12016379553798362, + "grad_norm": 5.2355440932748465, + "learning_rate": 1.9994728683334726e-05, + "loss": 1.0075, "step": 851 }, { - "epoch": 0.1786538058293143, - "grad_norm": 5.030585029222048, - "learning_rate": 1.995439912997834e-05, - "loss": 2.6039, + "epoch": 0.1203049985879695, + "grad_norm": 4.93290987765116, + "learning_rate": 1.999467907561614e-05, + "loss": 1.1079, "step": 852 }, { - "epoch": 0.1788634933948417, - "grad_norm": 4.448185895967703, - "learning_rate": 1.9954182921425388e-05, - "loss": 2.1762, + "epoch": 0.12044620163795539, + "grad_norm": 5.021314451083187, + "learning_rate": 1.9994629235625524e-05, + "loss": 0.9773, "step": 853 }, { - "epoch": 0.17907318096036906, - "grad_norm": 5.383344612894652, - "learning_rate": 1.995396620270329e-05, - "loss": 2.5764, + "epoch": 0.12058740468794125, + "grad_norm": 5.2567430786820735, + "learning_rate": 1.999457916336404e-05, + "loss": 1.0051, "step": 854 }, { - "epoch": 0.17928286852589642, - "grad_norm": 5.031615038551207, - "learning_rate": 1.995374897382316e-05, - "loss": 2.2982, + "epoch": 0.12072860773792714, + "grad_norm": 5.04435006227269, + "learning_rate": 1.9994528858832854e-05, + "loss": 1.1814, "step": 855 }, { - "epoch": 0.17949255609142378, - "grad_norm": 5.18290878826544, - "learning_rate": 1.9953531234796133e-05, - "loss": 2.1095, + "epoch": 0.12086981078791302, + "grad_norm": 5.6355051650302554, + "learning_rate": 1.9994478322033134e-05, + "loss": 1.0268, "step": 856 }, { - "epoch": 0.17970224365695114, - "grad_norm": 5.386646785836298, - "learning_rate": 1.9953312985633366e-05, - "loss": 2.3745, + "epoch": 0.1210110138378989, + "grad_norm": 4.933867335534344, + "learning_rate": 1.9994427552966056e-05, + "loss": 1.1164, "step": 857 }, { - "epoch": 0.1799119312224785, - "grad_norm": 5.608239066675773, - "learning_rate": 1.9953094226346046e-05, - "loss": 2.4223, + "epoch": 0.12115221688788477, + "grad_norm": 4.370951102868625, + "learning_rate": 1.9994376551632796e-05, + "loss": 0.8646, "step": 858 }, { - "epoch": 0.18012161878800587, - "grad_norm": 5.738383305715215, - "learning_rate": 1.995287495694538e-05, - "loss": 2.6354, + "epoch": 0.12129341993787066, + "grad_norm": 4.460584837298375, + "learning_rate": 1.999432531803454e-05, + "loss": 0.9945, "step": 859 }, { - "epoch": 0.18033130635353323, - "grad_norm": 5.297752061779667, - "learning_rate": 1.9952655177442618e-05, - "loss": 2.4378, + "epoch": 0.12143462298785654, + "grad_norm": 5.0714085073396795, + "learning_rate": 1.9994273852172484e-05, + "loss": 1.1131, "step": 860 }, { - "epoch": 0.1805409939190606, - "grad_norm": 5.815973839977065, - "learning_rate": 1.9952434887849014e-05, - "loss": 2.4679, + "epoch": 0.12157582603784242, + "grad_norm": 4.7831668929163245, + "learning_rate": 1.9994222154047815e-05, + "loss": 1.0351, "step": 861 }, { - "epoch": 0.18075068148458798, - "grad_norm": 5.8559471541561345, - "learning_rate": 1.9952214088175855e-05, - "loss": 2.823, + "epoch": 0.12171702908782829, + "grad_norm": 5.617787569481514, + "learning_rate": 1.999417022366174e-05, + "loss": 1.2635, "step": 862 }, { - "epoch": 0.18096036905011534, - "grad_norm": 6.104384028798239, - "learning_rate": 1.995199277843447e-05, - "loss": 2.5181, + "epoch": 0.12185823213781417, + "grad_norm": 7.9309459021833275, + "learning_rate": 1.9994118061015468e-05, + "loss": 1.215, "step": 863 }, { - "epoch": 0.1811700566156427, - "grad_norm": 4.549457994128656, - "learning_rate": 1.995177095863619e-05, - "loss": 2.3462, + "epoch": 0.12199943518780006, + "grad_norm": 5.5743713530017605, + "learning_rate": 1.9994065666110206e-05, + "loss": 1.0707, "step": 864 }, { - "epoch": 0.18137974418117006, - "grad_norm": 5.436138788899165, - "learning_rate": 1.995154862879239e-05, - "loss": 2.755, + "epoch": 0.12214063823778594, + "grad_norm": 5.09601293123909, + "learning_rate": 1.999401303894718e-05, + "loss": 1.2704, "step": 865 }, { - "epoch": 0.18158943174669742, - "grad_norm": 5.472479469383391, - "learning_rate": 1.9951325788914457e-05, - "loss": 2.5968, + "epoch": 0.12228184128777182, + "grad_norm": 4.7956150724392, + "learning_rate": 1.99939601795276e-05, + "loss": 1.2585, "step": 866 }, { - "epoch": 0.18179911931222478, - "grad_norm": 4.8993628934332785, - "learning_rate": 1.9951102439013824e-05, - "loss": 2.6024, + "epoch": 0.12242304433775769, + "grad_norm": 5.571223991807083, + "learning_rate": 1.9993907087852703e-05, + "loss": 1.156, "step": 867 }, { - "epoch": 0.18200880687775214, - "grad_norm": 5.601454966702046, - "learning_rate": 1.9950878579101927e-05, - "loss": 2.5266, + "epoch": 0.12256424738774357, + "grad_norm": 5.1033386277020245, + "learning_rate": 1.9993853763923724e-05, + "loss": 1.1006, "step": 868 }, { - "epoch": 0.1822184944432795, - "grad_norm": 5.54926763211863, - "learning_rate": 1.9950654209190246e-05, - "loss": 2.6516, + "epoch": 0.12270545043772946, + "grad_norm": 5.753998714877866, + "learning_rate": 1.9993800207741896e-05, + "loss": 1.3855, "step": 869 }, { - "epoch": 0.18242818200880687, - "grad_norm": 4.975406708451378, - "learning_rate": 1.995042932929028e-05, - "loss": 2.3917, + "epoch": 0.12284665348771534, + "grad_norm": 4.382155954718282, + "learning_rate": 1.999374641930847e-05, + "loss": 1.0522, "step": 870 }, { - "epoch": 0.18263786957433426, - "grad_norm": 5.909001873186139, - "learning_rate": 1.995020393941355e-05, - "loss": 2.5046, + "epoch": 0.12298785653770121, + "grad_norm": 5.270368299827882, + "learning_rate": 1.999369239862469e-05, + "loss": 1.2041, "step": 871 }, { - "epoch": 0.18284755713986162, - "grad_norm": 5.104492440651537, - "learning_rate": 1.994997803957161e-05, - "loss": 2.4308, + "epoch": 0.12312905958768709, + "grad_norm": 5.1403083748212905, + "learning_rate": 1.9993638145691817e-05, + "loss": 1.2324, "step": 872 }, { - "epoch": 0.18305724470538898, - "grad_norm": 6.2011163694045255, - "learning_rate": 1.9949751629776043e-05, - "loss": 2.7716, + "epoch": 0.12327026263767298, + "grad_norm": 6.08074670417796, + "learning_rate": 1.999358366051111e-05, + "loss": 1.1363, "step": 873 }, { - "epoch": 0.18326693227091634, - "grad_norm": 4.739716649909645, - "learning_rate": 1.9949524710038445e-05, - "loss": 2.4716, + "epoch": 0.12341146568765886, + "grad_norm": 4.2939366533801975, + "learning_rate": 1.999352894308383e-05, + "loss": 1.0829, "step": 874 }, { - "epoch": 0.1834766198364437, - "grad_norm": 6.1425757658306495, - "learning_rate": 1.9949297280370444e-05, - "loss": 2.4702, + "epoch": 0.12355266873764473, + "grad_norm": 5.272779825078832, + "learning_rate": 1.999347399341126e-05, + "loss": 1.1646, "step": 875 }, { - "epoch": 0.18368630740197106, - "grad_norm": 5.5573572440882755, - "learning_rate": 1.994906934078371e-05, - "loss": 2.4642, + "epoch": 0.12369387178763061, + "grad_norm": 5.2176497161857585, + "learning_rate": 1.9993418811494663e-05, + "loss": 1.2705, "step": 876 }, { - "epoch": 0.18389599496749842, - "grad_norm": 4.828397823101376, - "learning_rate": 1.994884089128991e-05, - "loss": 2.4723, + "epoch": 0.1238350748376165, + "grad_norm": 5.442022399731426, + "learning_rate": 1.9993363397335335e-05, + "loss": 1.1217, "step": 877 }, { - "epoch": 0.18410568253302578, - "grad_norm": 5.632723011140619, - "learning_rate": 1.9948611931900764e-05, - "loss": 2.4549, + "epoch": 0.12397627788760238, + "grad_norm": 6.38768066327861, + "learning_rate": 1.9993307750934555e-05, + "loss": 1.3341, "step": 878 }, { - "epoch": 0.18431537009855314, - "grad_norm": 5.915021700626544, - "learning_rate": 1.9948382462627997e-05, - "loss": 2.4802, + "epoch": 0.12411748093758825, + "grad_norm": 4.973903989077026, + "learning_rate": 1.9993251872293618e-05, + "loss": 1.0029, "step": 879 }, { - "epoch": 0.18452505766408053, - "grad_norm": 5.192333046317903, - "learning_rate": 1.9948152483483376e-05, - "loss": 2.3059, + "epoch": 0.12425868398757413, + "grad_norm": 5.268164174800898, + "learning_rate": 1.9993195761413823e-05, + "loss": 0.9974, "step": 880 }, { - "epoch": 0.1847347452296079, - "grad_norm": 4.854540141867747, - "learning_rate": 1.9947921994478686e-05, - "loss": 2.4742, + "epoch": 0.12439988703756001, + "grad_norm": 5.21981647274676, + "learning_rate": 1.9993139418296477e-05, + "loss": 1.104, "step": 881 }, { - "epoch": 0.18494443279513526, - "grad_norm": 4.829050196113135, - "learning_rate": 1.994769099562574e-05, - "loss": 2.4339, + "epoch": 0.1245410900875459, + "grad_norm": 5.670791790579677, + "learning_rate": 1.9993082842942883e-05, + "loss": 1.285, "step": 882 }, { - "epoch": 0.18515412036066262, - "grad_norm": 6.015103895046883, - "learning_rate": 1.9947459486936378e-05, - "loss": 2.728, + "epoch": 0.12468229313753178, + "grad_norm": 5.40065965184563, + "learning_rate": 1.999302603535436e-05, + "loss": 1.1038, "step": 883 }, { - "epoch": 0.18536380792618998, - "grad_norm": 4.768495634323849, - "learning_rate": 1.9947227468422462e-05, - "loss": 2.4327, + "epoch": 0.12482349618751765, + "grad_norm": 6.098120544259875, + "learning_rate": 1.9992968995532228e-05, + "loss": 1.0751, "step": 884 }, { - "epoch": 0.18557349549171734, - "grad_norm": 6.47748341817548, - "learning_rate": 1.994699494009589e-05, - "loss": 2.8254, + "epoch": 0.12496469923750353, + "grad_norm": 5.6945610815131955, + "learning_rate": 1.9992911723477816e-05, + "loss": 1.2666, "step": 885 }, { - "epoch": 0.1857831830572447, - "grad_norm": 5.674268388327525, - "learning_rate": 1.994676190196857e-05, - "loss": 2.6363, + "epoch": 0.1251059022874894, + "grad_norm": 6.417872362780822, + "learning_rate": 1.999285421919245e-05, + "loss": 1.4128, "step": 886 }, { - "epoch": 0.18599287062277206, - "grad_norm": 4.93142945507908, - "learning_rate": 1.9946528354052453e-05, - "loss": 2.3969, + "epoch": 0.1252471053374753, + "grad_norm": 5.643168660643183, + "learning_rate": 1.9992796482677465e-05, + "loss": 1.2438, "step": 887 }, { - "epoch": 0.18620255818829942, - "grad_norm": 5.745896749665753, - "learning_rate": 1.9946294296359507e-05, - "loss": 2.5042, + "epoch": 0.12538830838746118, + "grad_norm": 6.200068370802321, + "learning_rate": 1.9992738513934205e-05, + "loss": 1.187, "step": 888 }, { - "epoch": 0.1864122457538268, - "grad_norm": 4.910569525334854, - "learning_rate": 1.9946059728901724e-05, - "loss": 2.4622, + "epoch": 0.12552951143744706, + "grad_norm": 5.334047649322766, + "learning_rate": 1.9992680312964018e-05, + "loss": 1.15, "step": 889 }, { - "epoch": 0.18662193331935417, - "grad_norm": 6.586576062035691, - "learning_rate": 1.9945824651691134e-05, - "loss": 2.5314, + "epoch": 0.12567071448743292, + "grad_norm": 5.644828267533013, + "learning_rate": 1.9992621879768256e-05, + "loss": 1.2009, "step": 890 }, { - "epoch": 0.18683162088488153, - "grad_norm": 5.079637117467489, - "learning_rate": 1.994558906473978e-05, - "loss": 2.3744, + "epoch": 0.1258119175374188, + "grad_norm": 5.888255855521819, + "learning_rate": 1.999256321434828e-05, + "loss": 1.3039, "step": 891 }, { - "epoch": 0.1870413084504089, - "grad_norm": 5.100999598825927, - "learning_rate": 1.9945352968059735e-05, - "loss": 2.3114, + "epoch": 0.12595312058740468, + "grad_norm": 5.348523442219035, + "learning_rate": 1.9992504316705446e-05, + "loss": 1.1741, "step": 892 }, { - "epoch": 0.18725099601593626, - "grad_norm": 5.522657260137167, - "learning_rate": 1.9945116361663102e-05, - "loss": 2.5035, + "epoch": 0.12609432363739057, + "grad_norm": 4.881682036266078, + "learning_rate": 1.9992445186841128e-05, + "loss": 1.1224, "step": 893 }, { - "epoch": 0.18746068358146362, - "grad_norm": 5.004913680748999, - "learning_rate": 1.9944879245562006e-05, - "loss": 2.4789, + "epoch": 0.12623552668737645, + "grad_norm": 5.203730259820256, + "learning_rate": 1.99923858247567e-05, + "loss": 1.0781, "step": 894 }, { - "epoch": 0.18767037114699098, - "grad_norm": 5.080222853740604, - "learning_rate": 1.9944641619768598e-05, - "loss": 2.483, + "epoch": 0.12637672973736233, + "grad_norm": 4.774378450311665, + "learning_rate": 1.9992326230453544e-05, + "loss": 0.9942, "step": 895 }, { - "epoch": 0.18788005871251834, - "grad_norm": 5.887346327906223, - "learning_rate": 1.9944403484295062e-05, - "loss": 2.4791, + "epoch": 0.12651793278734821, + "grad_norm": 5.248190466095374, + "learning_rate": 1.9992266403933037e-05, + "loss": 1.0486, "step": 896 }, { - "epoch": 0.1880897462780457, - "grad_norm": 5.448747530767844, - "learning_rate": 1.9944164839153595e-05, - "loss": 2.5861, + "epoch": 0.1266591358373341, + "grad_norm": 5.102677308848711, + "learning_rate": 1.9992206345196576e-05, + "loss": 1.1761, "step": 897 }, { - "epoch": 0.1882994338435731, - "grad_norm": 5.149747557197856, - "learning_rate": 1.994392568435644e-05, - "loss": 2.616, + "epoch": 0.12680033888731998, + "grad_norm": 5.495026680667837, + "learning_rate": 1.9992146054245552e-05, + "loss": 1.2891, "step": 898 }, { - "epoch": 0.18850912140910045, - "grad_norm": 5.4248840812071775, - "learning_rate": 1.9943686019915844e-05, - "loss": 2.5144, + "epoch": 0.12694154193730584, + "grad_norm": 5.084583161279488, + "learning_rate": 1.9992085531081374e-05, + "loss": 1.127, "step": 899 }, { - "epoch": 0.1887188089746278, - "grad_norm": 5.019211833713546, - "learning_rate": 1.994344584584409e-05, - "loss": 2.4441, + "epoch": 0.12708274498729172, + "grad_norm": 5.617561940825893, + "learning_rate": 1.999202477570544e-05, + "loss": 1.1415, "step": 900 }, { - "epoch": 0.18892849654015517, - "grad_norm": 5.18434435897594, - "learning_rate": 1.9943205162153493e-05, - "loss": 2.5536, + "epoch": 0.1272239480372776, + "grad_norm": 5.2426936350995375, + "learning_rate": 1.9991963788119166e-05, + "loss": 1.1545, "step": 901 }, { - "epoch": 0.18913818410568253, - "grad_norm": 4.8831296119713326, - "learning_rate": 1.9942963968856384e-05, - "loss": 2.3812, + "epoch": 0.12736515108726348, + "grad_norm": 4.209370947675331, + "learning_rate": 1.9991902568323972e-05, + "loss": 0.9188, "step": 902 }, { - "epoch": 0.1893478716712099, - "grad_norm": 5.684986224455571, - "learning_rate": 1.9942722265965128e-05, - "loss": 2.5858, + "epoch": 0.12750635413724937, + "grad_norm": 4.754577076981472, + "learning_rate": 1.9991841116321272e-05, + "loss": 1.0862, "step": 903 }, { - "epoch": 0.18955755923673726, - "grad_norm": 5.700698450628685, - "learning_rate": 1.9942480053492112e-05, - "loss": 2.4186, + "epoch": 0.12764755718723525, + "grad_norm": 5.583435340630613, + "learning_rate": 1.9991779432112503e-05, + "loss": 1.0693, "step": 904 }, { - "epoch": 0.18976724680226462, - "grad_norm": 4.547966659110109, - "learning_rate": 1.9942237331449747e-05, - "loss": 2.2892, + "epoch": 0.12778876023722113, + "grad_norm": 5.380354323379033, + "learning_rate": 1.9991717515699097e-05, + "loss": 1.3403, "step": 905 }, { - "epoch": 0.18997693436779198, - "grad_norm": 4.521301414957022, - "learning_rate": 1.9941994099850473e-05, - "loss": 2.4115, + "epoch": 0.12792996328720702, + "grad_norm": 5.143434917955193, + "learning_rate": 1.9991655367082487e-05, + "loss": 1.0408, "step": 906 }, { - "epoch": 0.19018662193331937, - "grad_norm": 4.796428680042201, - "learning_rate": 1.9941750358706763e-05, - "loss": 2.3414, + "epoch": 0.12807116633719287, + "grad_norm": 5.056272133552769, + "learning_rate": 1.9991592986264122e-05, + "loss": 1.0496, "step": 907 }, { - "epoch": 0.19039630949884673, - "grad_norm": 6.0685827688665235, - "learning_rate": 1.9941506108031102e-05, - "loss": 2.4847, + "epoch": 0.12821236938717875, + "grad_norm": 5.489078876073065, + "learning_rate": 1.999153037324545e-05, + "loss": 1.1358, "step": 908 }, { - "epoch": 0.1906059970643741, - "grad_norm": 5.1457261008257404, - "learning_rate": 1.994126134783601e-05, - "loss": 2.6251, + "epoch": 0.12835357243716464, + "grad_norm": 5.269679865296767, + "learning_rate": 1.999146752802793e-05, + "loss": 1.2032, "step": 909 }, { - "epoch": 0.19081568462990145, - "grad_norm": 5.239952560216145, - "learning_rate": 1.994101607813403e-05, - "loss": 2.4506, + "epoch": 0.12849477548715052, + "grad_norm": 5.6333335099989, + "learning_rate": 1.999140445061302e-05, + "loss": 1.066, "step": 910 }, { - "epoch": 0.1910253721954288, - "grad_norm": 6.112448725212283, - "learning_rate": 1.9940770298937736e-05, - "loss": 2.5548, + "epoch": 0.1286359785371364, + "grad_norm": 5.041319873023169, + "learning_rate": 1.9991341141002185e-05, + "loss": 1.1441, "step": 911 }, { - "epoch": 0.19123505976095617, - "grad_norm": 5.191368930630846, - "learning_rate": 1.994052401025972e-05, - "loss": 2.2849, + "epoch": 0.12877718158712229, + "grad_norm": 5.367812889439325, + "learning_rate": 1.9991277599196897e-05, + "loss": 0.9939, "step": 912 }, { - "epoch": 0.19144474732648353, - "grad_norm": 5.377069654303603, - "learning_rate": 1.994027721211261e-05, - "loss": 2.2073, + "epoch": 0.12891838463710817, + "grad_norm": 5.078656371145898, + "learning_rate": 1.999121382519863e-05, + "loss": 1.0985, "step": 913 }, { - "epoch": 0.1916544348920109, - "grad_norm": 5.269656183035728, - "learning_rate": 1.9940029904509054e-05, - "loss": 2.5896, + "epoch": 0.12905958768709405, + "grad_norm": 4.946862885553495, + "learning_rate": 1.999114981900887e-05, + "loss": 1.061, "step": 914 }, { - "epoch": 0.19186412245753826, - "grad_norm": 4.582472713273976, - "learning_rate": 1.9939782087461724e-05, - "loss": 2.3606, + "epoch": 0.12920079073707993, + "grad_norm": 5.180711918536928, + "learning_rate": 1.9991085580629103e-05, + "loss": 1.0385, "step": 915 }, { - "epoch": 0.19207381002306564, - "grad_norm": 6.011572748683017, - "learning_rate": 1.9939533760983322e-05, - "loss": 2.3621, + "epoch": 0.1293419937870658, + "grad_norm": 4.780963636377455, + "learning_rate": 1.9991021110060825e-05, + "loss": 1.0859, "step": 916 }, { - "epoch": 0.192283497588593, - "grad_norm": 4.860293072710145, - "learning_rate": 1.9939284925086576e-05, - "loss": 2.4226, + "epoch": 0.12948319683705167, + "grad_norm": 4.735577384078431, + "learning_rate": 1.9990956407305525e-05, + "loss": 0.8622, "step": 917 }, { - "epoch": 0.19249318515412037, - "grad_norm": 6.224550963894682, - "learning_rate": 1.9939035579784236e-05, - "loss": 2.4929, + "epoch": 0.12962439988703756, + "grad_norm": 5.5036610425201, + "learning_rate": 1.999089147236472e-05, + "loss": 1.1861, "step": 918 }, { - "epoch": 0.19270287271964773, - "grad_norm": 4.734580779678761, - "learning_rate": 1.993878572508909e-05, - "loss": 2.2896, + "epoch": 0.12976560293702344, + "grad_norm": 4.773065186470791, + "learning_rate": 1.9990826305239906e-05, + "loss": 1.0559, "step": 919 }, { - "epoch": 0.1929125602851751, - "grad_norm": 5.821405318413932, - "learning_rate": 1.993853536101393e-05, - "loss": 2.631, + "epoch": 0.12990680598700932, + "grad_norm": 5.345862295352198, + "learning_rate": 1.9990760905932605e-05, + "loss": 1.2572, "step": 920 }, { - "epoch": 0.19312224785070245, - "grad_norm": 5.630603319863494, - "learning_rate": 1.9938284487571604e-05, - "loss": 2.3614, + "epoch": 0.1300480090369952, + "grad_norm": 4.559381469417303, + "learning_rate": 1.9990695274444338e-05, + "loss": 1.0719, "step": 921 }, { - "epoch": 0.1933319354162298, - "grad_norm": 5.0940768877114895, - "learning_rate": 1.993803310477495e-05, - "loss": 2.204, + "epoch": 0.1301892120869811, + "grad_norm": 5.665120960887968, + "learning_rate": 1.9990629410776624e-05, + "loss": 1.3774, "step": 922 }, { - "epoch": 0.19354162298175717, - "grad_norm": 4.922087843773558, - "learning_rate": 1.993778121263687e-05, - "loss": 2.6305, + "epoch": 0.13033041513696697, + "grad_norm": 5.172174638994669, + "learning_rate": 1.9990563314930997e-05, + "loss": 1.015, "step": 923 }, { - "epoch": 0.19375131054728453, - "grad_norm": 5.201118970695738, - "learning_rate": 1.9937528811170267e-05, - "loss": 2.6807, + "epoch": 0.13047161818695283, + "grad_norm": 5.499930974785996, + "learning_rate": 1.9990496986908994e-05, + "loss": 1.3084, "step": 924 }, { - "epoch": 0.19396099811281192, - "grad_norm": 5.338934484350384, - "learning_rate": 1.9937275900388076e-05, - "loss": 2.6133, + "epoch": 0.1306128212369387, + "grad_norm": 5.197619345991682, + "learning_rate": 1.9990430426712156e-05, + "loss": 1.218, "step": 925 }, { - "epoch": 0.19417068567833928, - "grad_norm": 5.395068409146181, - "learning_rate": 1.9937022480303256e-05, - "loss": 2.5248, + "epoch": 0.1307540242869246, + "grad_norm": 6.460598377532906, + "learning_rate": 1.9990363634342032e-05, + "loss": 1.2714, "step": 926 }, { - "epoch": 0.19438037324386664, - "grad_norm": 5.4413205997191705, - "learning_rate": 1.9936768550928804e-05, - "loss": 2.3095, + "epoch": 0.13089522733691047, + "grad_norm": 5.379512170317948, + "learning_rate": 1.9990296609800167e-05, + "loss": 1.0327, "step": 927 }, { - "epoch": 0.194590060809394, - "grad_norm": 4.60626361060752, - "learning_rate": 1.9936514112277727e-05, - "loss": 2.4582, + "epoch": 0.13103643038689636, + "grad_norm": 5.388522971829105, + "learning_rate": 1.9990229353088123e-05, + "loss": 1.3501, "step": 928 }, { - "epoch": 0.19479974837492137, - "grad_norm": 4.91685168909383, - "learning_rate": 1.9936259164363066e-05, - "loss": 2.6594, + "epoch": 0.13117763343688224, + "grad_norm": 5.224493527208887, + "learning_rate": 1.9990161864207466e-05, + "loss": 1.2567, "step": 929 }, { - "epoch": 0.19500943594044873, - "grad_norm": 4.854375241178789, - "learning_rate": 1.9936003707197893e-05, - "loss": 2.116, + "epoch": 0.13131883648686812, + "grad_norm": 4.6317562806682675, + "learning_rate": 1.9990094143159763e-05, + "loss": 0.9792, "step": 930 }, { - "epoch": 0.1952191235059761, - "grad_norm": 5.3419706462791074, - "learning_rate": 1.9935747740795295e-05, - "loss": 2.2324, + "epoch": 0.131460039536854, + "grad_norm": 4.761843862735333, + "learning_rate": 1.9990026189946584e-05, + "loss": 1.1116, "step": 931 }, { - "epoch": 0.19542881107150345, - "grad_norm": 5.256376420547868, - "learning_rate": 1.9935491265168392e-05, - "loss": 2.5482, + "epoch": 0.1316012425868399, + "grad_norm": 4.9942741246143125, + "learning_rate": 1.9989958004569514e-05, + "loss": 1.2391, "step": 932 }, { - "epoch": 0.1956384986370308, - "grad_norm": 4.901918345493721, - "learning_rate": 1.9935234280330333e-05, - "loss": 2.3966, + "epoch": 0.13174244563682574, + "grad_norm": 5.331731136576249, + "learning_rate": 1.998988958703013e-05, + "loss": 1.1209, "step": 933 }, { - "epoch": 0.1958481862025582, - "grad_norm": 5.317936673143983, - "learning_rate": 1.993497678629428e-05, - "loss": 2.5429, + "epoch": 0.13188364868681163, + "grad_norm": 6.039204116458024, + "learning_rate": 1.9989820937330028e-05, + "loss": 1.4043, "step": 934 }, { - "epoch": 0.19605787376808556, - "grad_norm": 5.315767067422777, - "learning_rate": 1.993471878307344e-05, - "loss": 2.1342, + "epoch": 0.1320248517367975, + "grad_norm": 5.120888595994629, + "learning_rate": 1.99897520554708e-05, + "loss": 1.0491, "step": 935 }, { - "epoch": 0.19626756133361292, - "grad_norm": 5.876888515032318, - "learning_rate": 1.9934460270681028e-05, - "loss": 2.1805, + "epoch": 0.1321660547867834, + "grad_norm": 4.754581901564395, + "learning_rate": 1.998968294145405e-05, + "loss": 1.1176, "step": 936 }, { - "epoch": 0.19647724889914028, - "grad_norm": 6.06976386362381, - "learning_rate": 1.99342012491303e-05, - "loss": 2.7713, + "epoch": 0.13230725783676928, + "grad_norm": 5.195528747894677, + "learning_rate": 1.9989613595281384e-05, + "loss": 0.963, "step": 937 }, { - "epoch": 0.19668693646466764, - "grad_norm": 5.042156333179805, - "learning_rate": 1.9933941718434523e-05, - "loss": 2.6455, + "epoch": 0.13244846088675516, + "grad_norm": 5.6163949327877365, + "learning_rate": 1.9989544016954414e-05, + "loss": 1.406, "step": 938 }, { - "epoch": 0.196896624030195, - "grad_norm": 5.373087123649382, - "learning_rate": 1.9933681678607005e-05, - "loss": 2.4848, + "epoch": 0.13258966393674104, + "grad_norm": 5.192517204151998, + "learning_rate": 1.998947420647475e-05, + "loss": 0.9567, "step": 939 }, { - "epoch": 0.19710631159572237, - "grad_norm": 4.8283300788311205, - "learning_rate": 1.9933421129661075e-05, - "loss": 2.5254, + "epoch": 0.13273086698672693, + "grad_norm": 4.939326278914683, + "learning_rate": 1.9989404163844026e-05, + "loss": 1.0999, "step": 940 }, { - "epoch": 0.19731599916124973, - "grad_norm": 5.615108274453707, - "learning_rate": 1.993316007161008e-05, - "loss": 2.6872, + "epoch": 0.13287207003671278, + "grad_norm": 4.4363610143865, + "learning_rate": 1.9989333889063856e-05, + "loss": 0.9798, "step": 941 }, { - "epoch": 0.1975256867267771, - "grad_norm": 5.632846177763661, - "learning_rate": 1.9932898504467407e-05, - "loss": 2.2772, + "epoch": 0.13301327308669866, + "grad_norm": 4.8234058295639795, + "learning_rate": 1.9989263382135882e-05, + "loss": 0.9433, "step": 942 }, { - "epoch": 0.19773537429230448, - "grad_norm": 4.9900070371932355, - "learning_rate": 1.9932636428246456e-05, - "loss": 2.3392, + "epoch": 0.13315447613668455, + "grad_norm": 5.2426868079866225, + "learning_rate": 1.9989192643061745e-05, + "loss": 1.183, "step": 943 }, { - "epoch": 0.19794506185783184, - "grad_norm": 5.332409359202788, - "learning_rate": 1.993237384296066e-05, - "loss": 2.5759, + "epoch": 0.13329567918667043, + "grad_norm": 5.63390095990124, + "learning_rate": 1.998912167184308e-05, + "loss": 1.3909, "step": 944 }, { - "epoch": 0.1981547494233592, - "grad_norm": 5.618227414596692, - "learning_rate": 1.9932110748623476e-05, - "loss": 2.4152, + "epoch": 0.1334368822366563, + "grad_norm": 4.7800251715278606, + "learning_rate": 1.9989050468481544e-05, + "loss": 1.0927, "step": 945 }, { - "epoch": 0.19836443698888656, - "grad_norm": 5.2710668257843665, - "learning_rate": 1.9931847145248393e-05, - "loss": 2.5688, + "epoch": 0.1335780852866422, + "grad_norm": 5.059596253692547, + "learning_rate": 1.998897903297879e-05, + "loss": 1.1203, "step": 946 }, { - "epoch": 0.19857412455441392, - "grad_norm": 4.813334400142518, - "learning_rate": 1.9931583032848918e-05, - "loss": 2.3557, + "epoch": 0.13371928833662808, + "grad_norm": 5.462436930269701, + "learning_rate": 1.9988907365336478e-05, + "loss": 1.0374, "step": 947 }, { - "epoch": 0.19878381211994128, - "grad_norm": 5.216202596722822, - "learning_rate": 1.9931318411438586e-05, - "loss": 2.4534, + "epoch": 0.13386049138661396, + "grad_norm": 5.383252482072445, + "learning_rate": 1.998883546555627e-05, + "loss": 1.2318, "step": 948 }, { - "epoch": 0.19899349968546864, - "grad_norm": 5.175036990798288, - "learning_rate": 1.9931053281030957e-05, - "loss": 2.3328, + "epoch": 0.13400169443659984, + "grad_norm": 4.691362619313088, + "learning_rate": 1.9988763333639843e-05, + "loss": 1.1946, "step": 949 }, { - "epoch": 0.199203187250996, - "grad_norm": 5.29469629139264, - "learning_rate": 1.9930787641639628e-05, - "loss": 2.4553, + "epoch": 0.1341428974865857, + "grad_norm": 4.928880192779834, + "learning_rate": 1.9988690969588867e-05, + "loss": 1.0886, "step": 950 }, { - "epoch": 0.19941287481652337, - "grad_norm": 4.6110997078811735, - "learning_rate": 1.9930521493278205e-05, - "loss": 2.4536, + "epoch": 0.13428410053657158, + "grad_norm": 4.3687379111943745, + "learning_rate": 1.9988618373405025e-05, + "loss": 0.903, "step": 951 }, { - "epoch": 0.19962256238205076, - "grad_norm": 5.529826725346696, - "learning_rate": 1.9930254835960335e-05, - "loss": 2.3811, + "epoch": 0.13442530358655747, + "grad_norm": 5.125574725864024, + "learning_rate": 1.998854554509001e-05, + "loss": 1.2622, "step": 952 }, { - "epoch": 0.19983224994757812, - "grad_norm": 5.170745811515311, - "learning_rate": 1.992998766969968e-05, - "loss": 2.4378, + "epoch": 0.13456650663654335, + "grad_norm": 4.614140815104223, + "learning_rate": 1.998847248464551e-05, + "loss": 1.1588, "step": 953 }, { - "epoch": 0.20004193751310548, - "grad_norm": 4.459697190515876, - "learning_rate": 1.9929719994509934e-05, - "loss": 2.2087, + "epoch": 0.13470770968652923, + "grad_norm": 4.128127436169854, + "learning_rate": 1.998839919207322e-05, + "loss": 1.0075, "step": 954 }, { - "epoch": 0.20025162507863284, - "grad_norm": 5.618558737353918, - "learning_rate": 1.9929451810404816e-05, - "loss": 2.3138, + "epoch": 0.13484891273651511, + "grad_norm": 4.394625209204543, + "learning_rate": 1.9988325667374848e-05, + "loss": 1.1011, "step": 955 }, { - "epoch": 0.2004613126441602, - "grad_norm": 5.088797994398359, - "learning_rate": 1.992918311739807e-05, - "loss": 2.4355, + "epoch": 0.134990115786501, + "grad_norm": 4.9631838776976425, + "learning_rate": 1.9988251910552103e-05, + "loss": 1.2591, "step": 956 }, { - "epoch": 0.20067100020968756, - "grad_norm": 5.130162001818529, - "learning_rate": 1.9928913915503473e-05, - "loss": 1.9568, + "epoch": 0.13513131883648688, + "grad_norm": 5.0553501367064415, + "learning_rate": 1.998817792160669e-05, + "loss": 1.2266, "step": 957 }, { - "epoch": 0.20088068777521492, - "grad_norm": 6.77220147315865, - "learning_rate": 1.992864420473481e-05, - "loss": 2.1486, + "epoch": 0.13527252188647274, + "grad_norm": 4.965229263011208, + "learning_rate": 1.9988103700540345e-05, + "loss": 1.0737, "step": 958 }, { - "epoch": 0.20109037534074228, - "grad_norm": 5.489317132045333, - "learning_rate": 1.9928373985105914e-05, - "loss": 2.4805, + "epoch": 0.13541372493645862, + "grad_norm": 4.823682018357297, + "learning_rate": 1.9988029247354778e-05, + "loss": 1.2301, "step": 959 }, { - "epoch": 0.20130006290626964, - "grad_norm": 5.65637728609791, - "learning_rate": 1.9928103256630632e-05, - "loss": 2.7277, + "epoch": 0.1355549279864445, + "grad_norm": 4.993761047777017, + "learning_rate": 1.9987954562051724e-05, + "loss": 1.0941, "step": 960 }, { - "epoch": 0.20150975047179703, - "grad_norm": 5.604981170941377, - "learning_rate": 1.9927832019322838e-05, - "loss": 2.3853, + "epoch": 0.13569613103643038, + "grad_norm": 5.579508206335968, + "learning_rate": 1.998787964463292e-05, + "loss": 1.1355, "step": 961 }, { - "epoch": 0.2017194380373244, - "grad_norm": 6.045773713913119, - "learning_rate": 1.9927560273196436e-05, - "loss": 2.4956, + "epoch": 0.13583733408641627, + "grad_norm": 5.927369532705596, + "learning_rate": 1.9987804495100112e-05, + "loss": 1.1492, "step": 962 }, { - "epoch": 0.20192912560285176, - "grad_norm": 5.032703277139353, - "learning_rate": 1.9927288018265346e-05, - "loss": 2.4831, + "epoch": 0.13597853713640215, + "grad_norm": 4.344311792671202, + "learning_rate": 1.9987729113455036e-05, + "loss": 1.0834, "step": 963 }, { - "epoch": 0.20213881316837912, - "grad_norm": 5.870126491171813, - "learning_rate": 1.9927015254543533e-05, - "loss": 2.6288, + "epoch": 0.13611974018638803, + "grad_norm": 5.393506333320731, + "learning_rate": 1.998765349969945e-05, + "loss": 1.0183, "step": 964 }, { - "epoch": 0.20234850073390648, - "grad_norm": 4.842696739861761, - "learning_rate": 1.9926741982044967e-05, - "loss": 2.3388, + "epoch": 0.13626094323637392, + "grad_norm": 4.877987090652875, + "learning_rate": 1.998757765383511e-05, + "loss": 1.0941, "step": 965 }, { - "epoch": 0.20255818829943384, - "grad_norm": 5.156002782003122, - "learning_rate": 1.992646820078366e-05, - "loss": 2.427, + "epoch": 0.1364021462863598, + "grad_norm": 5.249919120932146, + "learning_rate": 1.9987501575863776e-05, + "loss": 1.1639, "step": 966 }, { - "epoch": 0.2027678758649612, - "grad_norm": 5.932095944618149, - "learning_rate": 1.992619391077364e-05, - "loss": 2.3469, + "epoch": 0.13654334933634565, + "grad_norm": 5.798203926989949, + "learning_rate": 1.9987425265787222e-05, + "loss": 1.2088, "step": 967 }, { - "epoch": 0.20297756343048856, - "grad_norm": 6.844332611420199, - "learning_rate": 1.9925919112028968e-05, - "loss": 2.4075, + "epoch": 0.13668455238633154, + "grad_norm": 6.22724404473044, + "learning_rate": 1.9987348723607218e-05, + "loss": 1.2981, "step": 968 }, { - "epoch": 0.20318725099601595, - "grad_norm": 5.4721063254887214, - "learning_rate": 1.992564380456372e-05, - "loss": 2.4533, + "epoch": 0.13682575543631742, + "grad_norm": 5.448868754906509, + "learning_rate": 1.9987271949325543e-05, + "loss": 1.0919, "step": 969 }, { - "epoch": 0.2033969385615433, - "grad_norm": 5.689594420915548, - "learning_rate": 1.992536798839202e-05, - "loss": 2.5801, + "epoch": 0.1369669584863033, + "grad_norm": 4.71134074672123, + "learning_rate": 1.998719494294398e-05, + "loss": 1.062, "step": 970 }, { - "epoch": 0.20360662612707067, - "grad_norm": 5.303921923868487, - "learning_rate": 1.9925091663527988e-05, - "loss": 2.4352, + "epoch": 0.13710816153628919, + "grad_norm": 4.841239063395571, + "learning_rate": 1.998711770446432e-05, + "loss": 1.0859, "step": 971 }, { - "epoch": 0.20381631369259803, - "grad_norm": 6.102298979386814, - "learning_rate": 1.99248148299858e-05, - "loss": 2.2759, + "epoch": 0.13724936458627507, + "grad_norm": 5.146262195727475, + "learning_rate": 1.9987040233888355e-05, + "loss": 1.2393, "step": 972 }, { - "epoch": 0.2040260012581254, - "grad_norm": 5.10592501269951, - "learning_rate": 1.9924537487779637e-05, - "loss": 2.4277, + "epoch": 0.13739056763626095, + "grad_norm": 5.114334190310484, + "learning_rate": 1.998696253121789e-05, + "loss": 1.1822, "step": 973 }, { - "epoch": 0.20423568882365276, - "grad_norm": 5.44142907343923, - "learning_rate": 1.992425963692371e-05, - "loss": 2.3429, + "epoch": 0.13753177068624683, + "grad_norm": 4.794117383524985, + "learning_rate": 1.998688459645473e-05, + "loss": 1.238, "step": 974 }, { - "epoch": 0.20444537638918012, - "grad_norm": 5.018214761519992, - "learning_rate": 1.9923981277432268e-05, - "loss": 2.4903, + "epoch": 0.1376729737362327, + "grad_norm": 4.287235871492822, + "learning_rate": 1.9986806429600684e-05, + "loss": 1.0582, "step": 975 }, { - "epoch": 0.20465506395470748, - "grad_norm": 5.1264355271054605, - "learning_rate": 1.9923702409319574e-05, - "loss": 2.4897, + "epoch": 0.13781417678621857, + "grad_norm": 5.86960034355485, + "learning_rate": 1.998672803065757e-05, + "loss": 1.1929, "step": 976 }, { - "epoch": 0.20486475152023484, - "grad_norm": 5.616942379959226, - "learning_rate": 1.992342303259992e-05, - "loss": 2.4862, + "epoch": 0.13795537983620446, + "grad_norm": 4.519127485751294, + "learning_rate": 1.998664939962721e-05, + "loss": 1.2046, "step": 977 }, { - "epoch": 0.20507443908576223, - "grad_norm": 6.721297698976598, - "learning_rate": 1.9923143147287622e-05, - "loss": 2.5596, + "epoch": 0.13809658288619034, + "grad_norm": 4.67139988786646, + "learning_rate": 1.998657053651143e-05, + "loss": 1.1493, "step": 978 }, { - "epoch": 0.2052841266512896, - "grad_norm": 5.94528255614887, - "learning_rate": 1.9922862753397024e-05, - "loss": 2.3787, + "epoch": 0.13823778593617622, + "grad_norm": 5.299227266277092, + "learning_rate": 1.9986491441312064e-05, + "loss": 1.2819, "step": 979 }, { - "epoch": 0.20549381421681695, - "grad_norm": 5.3440472880217165, - "learning_rate": 1.9922581850942505e-05, - "loss": 2.6357, + "epoch": 0.1383789889861621, + "grad_norm": 4.706529851370187, + "learning_rate": 1.998641211403095e-05, + "loss": 1.1905, "step": 980 }, { - "epoch": 0.2057035017823443, - "grad_norm": 5.226061565508953, - "learning_rate": 1.9922300439938454e-05, - "loss": 2.3324, + "epoch": 0.138520192036148, + "grad_norm": 5.103760687920829, + "learning_rate": 1.998633255466993e-05, + "loss": 0.9903, "step": 981 }, { - "epoch": 0.20591318934787167, - "grad_norm": 5.866211480937162, - "learning_rate": 1.9922018520399295e-05, - "loss": 2.3069, + "epoch": 0.13866139508613387, + "grad_norm": 4.803457248565893, + "learning_rate": 1.9986252763230856e-05, + "loss": 1.0923, "step": 982 }, { - "epoch": 0.20612287691339903, - "grad_norm": 5.194306073907077, - "learning_rate": 1.992173609233948e-05, - "loss": 2.5197, + "epoch": 0.13880259813611975, + "grad_norm": 5.68141816842776, + "learning_rate": 1.998617273971558e-05, + "loss": 1.2047, "step": 983 }, { - "epoch": 0.2063325644789264, - "grad_norm": 5.893924553144362, - "learning_rate": 1.9921453155773483e-05, - "loss": 2.2331, + "epoch": 0.1389438011861056, + "grad_norm": 4.826803219845096, + "learning_rate": 1.998609248412596e-05, + "loss": 0.9226, "step": 984 }, { - "epoch": 0.20654225204445376, - "grad_norm": 6.2542895594839925, - "learning_rate": 1.9921169710715798e-05, - "loss": 2.7147, + "epoch": 0.1390850042360915, + "grad_norm": 5.220878720177872, + "learning_rate": 1.9986011996463865e-05, + "loss": 1.2218, "step": 985 }, { - "epoch": 0.20675193960998112, - "grad_norm": 5.412600130038455, - "learning_rate": 1.9920885757180963e-05, - "loss": 2.5721, + "epoch": 0.13922620728607737, + "grad_norm": 5.4265888114952485, + "learning_rate": 1.9985931276731162e-05, + "loss": 1.1696, "step": 986 }, { - "epoch": 0.2069616271755085, - "grad_norm": 5.249332644559337, - "learning_rate": 1.9920601295183525e-05, - "loss": 2.5272, + "epoch": 0.13936741033606326, + "grad_norm": 5.902399316085023, + "learning_rate": 1.998585032492973e-05, + "loss": 1.1828, "step": 987 }, { - "epoch": 0.20717131474103587, - "grad_norm": 5.310076871818566, - "learning_rate": 1.9920316324738066e-05, - "loss": 2.3711, + "epoch": 0.13950861338604914, + "grad_norm": 4.663341571879512, + "learning_rate": 1.998576914106145e-05, + "loss": 1.1453, "step": 988 }, { - "epoch": 0.20738100230656323, - "grad_norm": 6.120712586398684, - "learning_rate": 1.9920030845859188e-05, - "loss": 2.5763, + "epoch": 0.13964981643603502, + "grad_norm": 5.1009069846630535, + "learning_rate": 1.9985687725128208e-05, + "loss": 1.1245, "step": 989 }, { - "epoch": 0.2075906898720906, - "grad_norm": 5.84672651053916, - "learning_rate": 1.9919744858561522e-05, - "loss": 2.417, + "epoch": 0.1397910194860209, + "grad_norm": 5.193301142220486, + "learning_rate": 1.9985606077131895e-05, + "loss": 1.2048, "step": 990 }, { - "epoch": 0.20780037743761795, - "grad_norm": 6.859888984723797, - "learning_rate": 1.991945836285973e-05, - "loss": 2.609, + "epoch": 0.1399322225360068, + "grad_norm": 4.837039022757927, + "learning_rate": 1.9985524197074412e-05, + "loss": 1.0964, "step": 991 }, { - "epoch": 0.2080100650031453, - "grad_norm": 5.198070603555374, - "learning_rate": 1.991917135876849e-05, - "loss": 2.3804, + "epoch": 0.14007342558599264, + "grad_norm": 5.303187748834104, + "learning_rate": 1.9985442084957657e-05, + "loss": 1.0946, "step": 992 }, { - "epoch": 0.20821975256867267, - "grad_norm": 5.384222366960085, - "learning_rate": 1.9918883846302514e-05, - "loss": 2.4152, + "epoch": 0.14021462863597853, + "grad_norm": 5.031949316489782, + "learning_rate": 1.9985359740783538e-05, + "loss": 0.9783, "step": 993 }, { - "epoch": 0.20842944013420003, - "grad_norm": 5.68017806619844, - "learning_rate": 1.991859582547654e-05, - "loss": 2.3473, + "epoch": 0.1403558316859644, + "grad_norm": 6.128873408688188, + "learning_rate": 1.9985277164553972e-05, + "loss": 1.2135, "step": 994 }, { - "epoch": 0.2086391276997274, - "grad_norm": 5.304686557561822, - "learning_rate": 1.9918307296305323e-05, - "loss": 2.562, + "epoch": 0.1404970347359503, + "grad_norm": 5.2262161075540225, + "learning_rate": 1.9985194356270877e-05, + "loss": 1.2272, "step": 995 }, { - "epoch": 0.20884881526525478, - "grad_norm": 5.467332092462531, - "learning_rate": 1.9918018258803657e-05, - "loss": 2.4205, + "epoch": 0.14063823778593618, + "grad_norm": 5.4524975848746005, + "learning_rate": 1.9985111315936177e-05, + "loss": 1.086, "step": 996 }, { - "epoch": 0.20905850283078214, - "grad_norm": 4.769902789990944, - "learning_rate": 1.9917728712986355e-05, - "loss": 2.2886, + "epoch": 0.14077944083592206, + "grad_norm": 4.7661745359885215, + "learning_rate": 1.9985028043551804e-05, + "loss": 1.1571, "step": 997 }, { - "epoch": 0.2092681903963095, - "grad_norm": 6.794217532194218, - "learning_rate": 1.9917438658868255e-05, - "loss": 2.6853, + "epoch": 0.14092064388590794, + "grad_norm": 5.342198708993227, + "learning_rate": 1.998494453911969e-05, + "loss": 1.1877, "step": 998 }, { - "epoch": 0.20947787796183687, - "grad_norm": 5.454906469970778, - "learning_rate": 1.991714809646422e-05, - "loss": 2.2207, + "epoch": 0.14106184693589383, + "grad_norm": 5.801971382713709, + "learning_rate": 1.998486080264178e-05, + "loss": 1.3002, "step": 999 }, { - "epoch": 0.20968756552736423, - "grad_norm": 6.298957621995344, - "learning_rate": 1.9916857025789146e-05, - "loss": 2.6666, + "epoch": 0.1412030499858797, + "grad_norm": 5.454876186146265, + "learning_rate": 1.9984776834120015e-05, + "loss": 1.1621, "step": 1000 }, { - "epoch": 0.2098972530928916, - "grad_norm": 5.521826411995796, - "learning_rate": 1.9916565446857946e-05, - "loss": 2.7545, + "epoch": 0.14134425303586556, + "grad_norm": 4.813788366382928, + "learning_rate": 1.998469263355635e-05, + "loss": 0.9452, "step": 1001 }, { - "epoch": 0.21010694065841895, - "grad_norm": 5.757874625011884, - "learning_rate": 1.9916273359685576e-05, - "loss": 2.5922, + "epoch": 0.14148545608585145, + "grad_norm": 5.624256910985407, + "learning_rate": 1.9984608200952736e-05, + "loss": 1.0643, "step": 1002 }, { - "epoch": 0.2103166282239463, - "grad_norm": 5.27902083093314, - "learning_rate": 1.991598076428699e-05, - "loss": 2.2553, + "epoch": 0.14162665913583733, + "grad_norm": 4.719082834886343, + "learning_rate": 1.9984523536311143e-05, + "loss": 0.9886, "step": 1003 }, { - "epoch": 0.21052631578947367, - "grad_norm": 5.498085477839137, - "learning_rate": 1.9915687660677192e-05, - "loss": 2.6383, + "epoch": 0.1417678621858232, + "grad_norm": 4.741326545885201, + "learning_rate": 1.9984438639633534e-05, + "loss": 1.0628, "step": 1004 }, { - "epoch": 0.21073600335500106, - "grad_norm": 5.113704941603106, - "learning_rate": 1.9915394048871205e-05, - "loss": 2.4952, + "epoch": 0.1419090652358091, + "grad_norm": 6.709670649714525, + "learning_rate": 1.998435351092188e-05, + "loss": 1.2321, "step": 1005 }, { - "epoch": 0.21094569092052842, - "grad_norm": 5.072259979149427, - "learning_rate": 1.9915099928884078e-05, - "loss": 2.4678, + "epoch": 0.14205026828579498, + "grad_norm": 4.747265385337215, + "learning_rate": 1.998426815017817e-05, + "loss": 1.1405, "step": 1006 }, { - "epoch": 0.21115537848605578, - "grad_norm": 4.884467389239115, - "learning_rate": 1.991480530073088e-05, - "loss": 2.4964, + "epoch": 0.14219147133578086, + "grad_norm": 4.955184824852184, + "learning_rate": 1.998418255740437e-05, + "loss": 1.1816, "step": 1007 }, { - "epoch": 0.21136506605158314, - "grad_norm": 4.836556286618052, - "learning_rate": 1.9914510164426715e-05, - "loss": 2.5517, + "epoch": 0.14233267438576674, + "grad_norm": 4.948725132811143, + "learning_rate": 1.9984096732602485e-05, + "loss": 1.1544, "step": 1008 }, { - "epoch": 0.2115747536171105, - "grad_norm": 4.7968385662798285, - "learning_rate": 1.9914214519986704e-05, - "loss": 2.6311, + "epoch": 0.1424738774357526, + "grad_norm": 5.005213721677638, + "learning_rate": 1.9984010675774504e-05, + "loss": 1.3052, "step": 1009 }, { - "epoch": 0.21178444118263787, - "grad_norm": 5.082890017057909, - "learning_rate": 1.9913918367426008e-05, - "loss": 2.485, + "epoch": 0.14261508048573848, + "grad_norm": 4.985660029015741, + "learning_rate": 1.9983924386922427e-05, + "loss": 1.0762, "step": 1010 }, { - "epoch": 0.21199412874816523, - "grad_norm": 5.643927808534393, - "learning_rate": 1.99136217067598e-05, - "loss": 2.5945, + "epoch": 0.14275628353572437, + "grad_norm": 4.927306637627592, + "learning_rate": 1.998383786604826e-05, + "loss": 1.1855, "step": 1011 }, { - "epoch": 0.2122038163136926, - "grad_norm": 5.84315180999882, - "learning_rate": 1.9913324538003287e-05, - "loss": 2.4666, + "epoch": 0.14289748658571025, + "grad_norm": 4.409473994268329, + "learning_rate": 1.998375111315401e-05, + "loss": 1.0704, "step": 1012 }, { - "epoch": 0.21241350387921995, - "grad_norm": 4.746601787110454, - "learning_rate": 1.9913026861171697e-05, - "loss": 2.2797, + "epoch": 0.14303868963569613, + "grad_norm": 4.739180242718452, + "learning_rate": 1.9983664128241694e-05, + "loss": 1.1632, "step": 1013 }, { - "epoch": 0.21262319144474734, - "grad_norm": 5.932084622281802, - "learning_rate": 1.9912728676280283e-05, - "loss": 2.659, + "epoch": 0.14317989268568201, + "grad_norm": 5.03655566069825, + "learning_rate": 1.998357691131334e-05, + "loss": 1.275, "step": 1014 }, { - "epoch": 0.2128328790102747, - "grad_norm": 6.344257633623337, - "learning_rate": 1.991242998334434e-05, - "loss": 2.6808, + "epoch": 0.1433210957356679, + "grad_norm": 4.923270709707517, + "learning_rate": 1.998348946237097e-05, + "loss": 1.0444, "step": 1015 }, { - "epoch": 0.21304256657580206, - "grad_norm": 4.894288146442785, - "learning_rate": 1.991213078237916e-05, - "loss": 2.3413, + "epoch": 0.14346229878565378, + "grad_norm": 5.7277676659111165, + "learning_rate": 1.998340178141661e-05, + "loss": 1.0806, "step": 1016 }, { - "epoch": 0.21325225414132942, - "grad_norm": 5.068525800195981, - "learning_rate": 1.9911831073400087e-05, - "loss": 2.1377, + "epoch": 0.14360350183563966, + "grad_norm": 5.334532052127538, + "learning_rate": 1.998331386845231e-05, + "loss": 1.1856, "step": 1017 }, { - "epoch": 0.21346194170685678, - "grad_norm": 5.214427846582535, - "learning_rate": 1.9911530856422483e-05, - "loss": 2.1781, + "epoch": 0.14374470488562552, + "grad_norm": 4.974947805606638, + "learning_rate": 1.9983225723480104e-05, + "loss": 0.9759, "step": 1018 }, { - "epoch": 0.21367162927238414, - "grad_norm": 5.408891363357328, - "learning_rate": 1.9911230131461728e-05, - "loss": 2.4674, + "epoch": 0.1438859079356114, + "grad_norm": 5.465656018484248, + "learning_rate": 1.9983137346502046e-05, + "loss": 1.0451, "step": 1019 }, { - "epoch": 0.2138813168379115, - "grad_norm": 5.8909554529528645, - "learning_rate": 1.991092889853324e-05, - "loss": 2.795, + "epoch": 0.14402711098559728, + "grad_norm": 4.889948656561684, + "learning_rate": 1.9983048737520186e-05, + "loss": 1.068, "step": 1020 }, { - "epoch": 0.21409100440343887, - "grad_norm": 5.385242429979247, - "learning_rate": 1.991062715765246e-05, - "loss": 2.4117, + "epoch": 0.14416831403558317, + "grad_norm": 5.804464843060851, + "learning_rate": 1.9982959896536588e-05, + "loss": 1.1558, "step": 1021 }, { - "epoch": 0.21430069196896623, - "grad_norm": 6.775863301077143, - "learning_rate": 1.9910324908834847e-05, - "loss": 2.6616, + "epoch": 0.14430951708556905, + "grad_norm": 4.819953150693295, + "learning_rate": 1.998287082355331e-05, + "loss": 1.0911, "step": 1022 }, { - "epoch": 0.21451037953449362, - "grad_norm": 5.6366557314701415, - "learning_rate": 1.9910022152095893e-05, - "loss": 2.6525, + "epoch": 0.14445072013555493, + "grad_norm": 4.27568049613761, + "learning_rate": 1.9982781518572424e-05, + "loss": 0.9496, "step": 1023 }, { - "epoch": 0.21472006710002098, - "grad_norm": 5.928054903499012, - "learning_rate": 1.9909718887451116e-05, - "loss": 2.4277, + "epoch": 0.14459192318554082, + "grad_norm": 5.155309453242915, + "learning_rate": 1.998269198159601e-05, + "loss": 1.074, "step": 1024 }, { - "epoch": 0.21492975466554834, - "grad_norm": 5.6545899514519675, - "learning_rate": 1.990941511491606e-05, - "loss": 2.5467, + "epoch": 0.1447331262355267, + "grad_norm": 5.09547458259276, + "learning_rate": 1.9982602212626144e-05, + "loss": 1.2055, "step": 1025 }, { - "epoch": 0.2151394422310757, - "grad_norm": 5.915372301589685, - "learning_rate": 1.990911083450629e-05, - "loss": 2.6486, + "epoch": 0.14487432928551255, + "grad_norm": 5.4946745608607985, + "learning_rate": 1.9982512211664914e-05, + "loss": 1.1038, "step": 1026 }, { - "epoch": 0.21534912979660306, - "grad_norm": 4.910707852843951, - "learning_rate": 1.9908806046237402e-05, - "loss": 2.4959, + "epoch": 0.14501553233549844, + "grad_norm": 4.844896733765715, + "learning_rate": 1.998242197871441e-05, + "loss": 0.9975, "step": 1027 }, { - "epoch": 0.21555881736213042, - "grad_norm": 4.7234529764044595, - "learning_rate": 1.990850075012502e-05, - "loss": 2.5617, + "epoch": 0.14515673538548432, + "grad_norm": 5.9709915805171745, + "learning_rate": 1.9982331513776733e-05, + "loss": 1.4021, "step": 1028 }, { - "epoch": 0.21576850492765778, - "grad_norm": 4.807494910872457, - "learning_rate": 1.990819494618479e-05, - "loss": 2.4391, + "epoch": 0.1452979384354702, + "grad_norm": 5.475157761291001, + "learning_rate": 1.9982240816853983e-05, + "loss": 1.2389, "step": 1029 }, { - "epoch": 0.21597819249318514, - "grad_norm": 4.977852264588288, - "learning_rate": 1.9907888634432387e-05, - "loss": 2.3121, + "epoch": 0.14543914148545609, + "grad_norm": 4.8643699153987, + "learning_rate": 1.9982149887948264e-05, + "loss": 0.9482, "step": 1030 }, { - "epoch": 0.2161878800587125, - "grad_norm": 4.752309202791071, - "learning_rate": 1.9907581814883504e-05, - "loss": 2.4005, + "epoch": 0.14558034453544197, + "grad_norm": 4.365670520441248, + "learning_rate": 1.9982058727061692e-05, + "loss": 0.9553, "step": 1031 }, { - "epoch": 0.2163975676242399, - "grad_norm": 5.340413713610933, - "learning_rate": 1.9907274487553866e-05, - "loss": 2.1291, + "epoch": 0.14572154758542785, + "grad_norm": 5.20439111539994, + "learning_rate": 1.998196733419639e-05, + "loss": 1.0998, "step": 1032 }, { - "epoch": 0.21660725518976726, - "grad_norm": 4.8443014728879845, - "learning_rate": 1.990696665245923e-05, - "loss": 2.2789, + "epoch": 0.14586275063541373, + "grad_norm": 4.683631406535726, + "learning_rate": 1.9981875709354478e-05, + "loss": 1.234, "step": 1033 }, { - "epoch": 0.21681694275529462, - "grad_norm": 5.018147690157275, - "learning_rate": 1.990665830961537e-05, - "loss": 2.5965, + "epoch": 0.14600395368539962, + "grad_norm": 4.816067248130286, + "learning_rate": 1.998178385253808e-05, + "loss": 1.2131, "step": 1034 }, { - "epoch": 0.21702663032082198, - "grad_norm": 5.450430771157589, - "learning_rate": 1.990634945903809e-05, - "loss": 2.382, + "epoch": 0.14614515673538547, + "grad_norm": 5.18864985856515, + "learning_rate": 1.998169176374934e-05, + "loss": 1.1972, "step": 1035 }, { - "epoch": 0.21723631788634934, - "grad_norm": 5.629604652990529, - "learning_rate": 1.9906040100743217e-05, - "loss": 2.4539, + "epoch": 0.14628635978537136, + "grad_norm": 4.8246602370808755, + "learning_rate": 1.9981599442990397e-05, + "loss": 0.9288, "step": 1036 }, { - "epoch": 0.2174460054518767, - "grad_norm": 5.258814896738817, - "learning_rate": 1.990573023474661e-05, - "loss": 2.2414, + "epoch": 0.14642756283535724, + "grad_norm": 4.208838681132935, + "learning_rate": 1.998150689026339e-05, + "loss": 0.9546, "step": 1037 }, { - "epoch": 0.21765569301740406, - "grad_norm": 5.374829820500496, - "learning_rate": 1.990541986106415e-05, - "loss": 2.6661, + "epoch": 0.14656876588534312, + "grad_norm": 4.955894406623324, + "learning_rate": 1.9981414105570473e-05, + "loss": 1.0658, "step": 1038 }, { - "epoch": 0.21786538058293142, - "grad_norm": 5.300374494757614, - "learning_rate": 1.9905108979711736e-05, - "loss": 2.2204, + "epoch": 0.146709968935329, + "grad_norm": 4.68067412598076, + "learning_rate": 1.9981321088913806e-05, + "loss": 1.0163, "step": 1039 }, { - "epoch": 0.21807506814845878, - "grad_norm": 4.9667622078988165, - "learning_rate": 1.9904797590705312e-05, - "loss": 2.4237, + "epoch": 0.1468511719853149, + "grad_norm": 4.645297238272814, + "learning_rate": 1.9981227840295544e-05, + "loss": 1.0526, "step": 1040 }, { - "epoch": 0.21828475571398617, - "grad_norm": 5.3780683496462895, - "learning_rate": 1.990448569406083e-05, - "loss": 2.5063, + "epoch": 0.14699237503530077, + "grad_norm": 4.7469964863101834, + "learning_rate": 1.998113435971786e-05, + "loss": 1.0682, "step": 1041 }, { - "epoch": 0.21849444327951353, - "grad_norm": 5.407520357169475, - "learning_rate": 1.990417328979428e-05, - "loss": 2.2237, + "epoch": 0.14713357808528665, + "grad_norm": 5.000522242682167, + "learning_rate": 1.9981040647182923e-05, + "loss": 0.9545, "step": 1042 }, { - "epoch": 0.2187041308450409, - "grad_norm": 5.134979884921702, - "learning_rate": 1.9903860377921667e-05, - "loss": 2.2159, + "epoch": 0.1472747811352725, + "grad_norm": 5.042863383463851, + "learning_rate": 1.998094670269291e-05, + "loss": 1.1831, "step": 1043 }, { - "epoch": 0.21891381841056826, - "grad_norm": 5.185963481755813, - "learning_rate": 1.9903546958459033e-05, - "loss": 2.5709, + "epoch": 0.1474159841852584, + "grad_norm": 4.275336733477554, + "learning_rate": 1.998085252625001e-05, + "loss": 0.8503, "step": 1044 }, { - "epoch": 0.21912350597609562, - "grad_norm": 6.417992882326232, - "learning_rate": 1.9903233031422445e-05, - "loss": 2.5104, + "epoch": 0.14755718723524427, + "grad_norm": 4.550947022277699, + "learning_rate": 1.9980758117856403e-05, + "loss": 1.1488, "step": 1045 }, { - "epoch": 0.21933319354162298, - "grad_norm": 7.015523503573994, - "learning_rate": 1.9902918596827985e-05, - "loss": 2.7682, + "epoch": 0.14769839028523016, + "grad_norm": 5.06680443540758, + "learning_rate": 1.9980663477514294e-05, + "loss": 1.1539, "step": 1046 }, { - "epoch": 0.21954288110715034, - "grad_norm": 5.238605408520289, - "learning_rate": 1.990260365469177e-05, - "loss": 2.685, + "epoch": 0.14783959333521604, + "grad_norm": 5.713185343523915, + "learning_rate": 1.998056860522587e-05, + "loss": 1.2296, "step": 1047 }, { - "epoch": 0.2197525686726777, - "grad_norm": 5.2584784374393685, - "learning_rate": 1.9902288205029944e-05, - "loss": 2.5484, + "epoch": 0.14798079638520192, + "grad_norm": 5.230852325398315, + "learning_rate": 1.9980473500993346e-05, + "loss": 1.1401, "step": 1048 }, { - "epoch": 0.21996225623820506, - "grad_norm": 5.017678731434627, - "learning_rate": 1.9901972247858673e-05, - "loss": 2.1928, + "epoch": 0.1481219994351878, + "grad_norm": 3.9325487650392126, + "learning_rate": 1.9980378164818926e-05, + "loss": 1.0065, "step": 1049 }, { - "epoch": 0.22017194380373245, - "grad_norm": 6.009470586969942, - "learning_rate": 1.990165578319415e-05, - "loss": 2.3864, + "epoch": 0.1482632024851737, + "grad_norm": 4.674932051075275, + "learning_rate": 1.9980282596704828e-05, + "loss": 1.2407, "step": 1050 }, { - "epoch": 0.2203816313692598, - "grad_norm": 5.444071213140894, - "learning_rate": 1.9901338811052593e-05, - "loss": 2.2969, + "epoch": 0.14840440553515957, + "grad_norm": 4.694106343490261, + "learning_rate": 1.998018679665327e-05, + "loss": 1.1867, "step": 1051 }, { - "epoch": 0.22059131893478717, - "grad_norm": 5.615432192759512, - "learning_rate": 1.990102133145025e-05, - "loss": 2.5327, + "epoch": 0.14854560858514543, + "grad_norm": 5.383007842176398, + "learning_rate": 1.9980090764666486e-05, + "loss": 1.193, "step": 1052 }, { - "epoch": 0.22080100650031453, - "grad_norm": 5.255182920505702, - "learning_rate": 1.9900703344403392e-05, - "loss": 2.3714, + "epoch": 0.1486868116351313, + "grad_norm": 4.840170984239393, + "learning_rate": 1.9979994500746697e-05, + "loss": 1.0282, "step": 1053 }, { - "epoch": 0.2210106940658419, - "grad_norm": 5.413996056497158, - "learning_rate": 1.9900384849928316e-05, - "loss": 2.5327, + "epoch": 0.1488280146851172, + "grad_norm": 5.101448686942334, + "learning_rate": 1.997989800489615e-05, + "loss": 1.1701, "step": 1054 }, { - "epoch": 0.22122038163136926, - "grad_norm": 6.066541748074999, - "learning_rate": 1.9900065848041344e-05, - "loss": 2.4643, + "epoch": 0.14896921773510308, + "grad_norm": 4.815828081728059, + "learning_rate": 1.9979801277117082e-05, + "loss": 1.1164, "step": 1055 }, { - "epoch": 0.22143006919689662, - "grad_norm": 5.326762180328677, - "learning_rate": 1.9899746338758824e-05, - "loss": 2.3716, + "epoch": 0.14911042078508896, + "grad_norm": 4.783152538415447, + "learning_rate": 1.9979704317411745e-05, + "loss": 1.3569, "step": 1056 }, { - "epoch": 0.22163975676242398, - "grad_norm": 5.802056536507036, - "learning_rate": 1.989942632209714e-05, - "loss": 2.5801, + "epoch": 0.14925162383507484, + "grad_norm": 4.879324034262992, + "learning_rate": 1.9979607125782387e-05, + "loss": 1.1476, "step": 1057 }, { - "epoch": 0.22184944432795134, - "grad_norm": 5.151459060862107, - "learning_rate": 1.9899105798072682e-05, - "loss": 2.3164, + "epoch": 0.14939282688506073, + "grad_norm": 4.548052852951136, + "learning_rate": 1.997950970223127e-05, + "loss": 1.2485, "step": 1058 }, { - "epoch": 0.22205913189347873, - "grad_norm": 6.18651319712188, - "learning_rate": 1.9898784766701884e-05, - "loss": 2.5993, + "epoch": 0.1495340299350466, + "grad_norm": 5.058620602455277, + "learning_rate": 1.9979412046760657e-05, + "loss": 1.2437, "step": 1059 }, { - "epoch": 0.2222688194590061, - "grad_norm": 4.982734095422201, - "learning_rate": 1.9898463228001203e-05, - "loss": 2.5444, + "epoch": 0.14967523298503246, + "grad_norm": 4.165882301703776, + "learning_rate": 1.9979314159372815e-05, + "loss": 1.1035, "step": 1060 }, { - "epoch": 0.22247850702453345, - "grad_norm": 4.850342014040275, - "learning_rate": 1.9898141181987106e-05, - "loss": 2.4433, + "epoch": 0.14981643603501835, + "grad_norm": 4.231504717921089, + "learning_rate": 1.9979216040070026e-05, + "loss": 1.045, "step": 1061 }, { - "epoch": 0.2226881945900608, - "grad_norm": 6.324858012002737, - "learning_rate": 1.989781862867611e-05, - "loss": 2.4208, + "epoch": 0.14995763908500423, + "grad_norm": 4.500094370397637, + "learning_rate": 1.9979117688854565e-05, + "loss": 1.1506, "step": 1062 }, { - "epoch": 0.22289788215558817, - "grad_norm": 5.616595221771015, - "learning_rate": 1.989749556808474e-05, - "loss": 2.4745, + "epoch": 0.1500988421349901, + "grad_norm": 5.022187789468775, + "learning_rate": 1.9979019105728717e-05, + "loss": 1.0845, "step": 1063 }, { - "epoch": 0.22310756972111553, - "grad_norm": 5.380188697827271, - "learning_rate": 1.989717200022956e-05, - "loss": 2.1131, + "epoch": 0.150240045184976, + "grad_norm": 4.388156715038051, + "learning_rate": 1.9978920290694776e-05, + "loss": 1.0058, "step": 1064 }, { - "epoch": 0.2233172572866429, - "grad_norm": 5.450797630408192, - "learning_rate": 1.9896847925127146e-05, - "loss": 2.3037, + "epoch": 0.15038124823496188, + "grad_norm": 4.49583224495091, + "learning_rate": 1.9978821243755035e-05, + "loss": 1.0972, "step": 1065 }, { - "epoch": 0.22352694485217026, - "grad_norm": 5.457348114452025, - "learning_rate": 1.989652334279411e-05, - "loss": 2.1687, + "epoch": 0.15052245128494776, + "grad_norm": 5.150998331665217, + "learning_rate": 1.99787219649118e-05, + "loss": 1.2336, "step": 1066 }, { - "epoch": 0.22373663241769762, - "grad_norm": 6.046739047053696, - "learning_rate": 1.989619825324709e-05, - "loss": 2.4515, + "epoch": 0.15066365433493364, + "grad_norm": 4.5903759486498155, + "learning_rate": 1.9978622454167376e-05, + "loss": 1.0926, "step": 1067 }, { - "epoch": 0.223946319983225, - "grad_norm": 5.711326213587446, - "learning_rate": 1.9895872656502746e-05, - "loss": 2.6118, + "epoch": 0.15080485738491953, + "grad_norm": 5.901878520313315, + "learning_rate": 1.9978522711524076e-05, + "loss": 1.2474, "step": 1068 }, { - "epoch": 0.22415600754875237, - "grad_norm": 5.008363290924573, - "learning_rate": 1.989554655257776e-05, - "loss": 2.2721, + "epoch": 0.15094606043490538, + "grad_norm": 5.524109606050706, + "learning_rate": 1.9978422736984216e-05, + "loss": 1.0601, "step": 1069 }, { - "epoch": 0.22436569511427973, - "grad_norm": 5.674063647957833, - "learning_rate": 1.9895219941488858e-05, - "loss": 2.4694, + "epoch": 0.15108726348489127, + "grad_norm": 5.9551718005021606, + "learning_rate": 1.997832253055012e-05, + "loss": 1.2725, "step": 1070 }, { - "epoch": 0.2245753826798071, - "grad_norm": 5.015264460456114, - "learning_rate": 1.9894892823252765e-05, - "loss": 2.4531, + "epoch": 0.15122846653487715, + "grad_norm": 4.989541845809095, + "learning_rate": 1.997822209222412e-05, + "loss": 1.2202, "step": 1071 }, { - "epoch": 0.22478507024533445, - "grad_norm": 5.46331736480284, - "learning_rate": 1.9894565197886258e-05, - "loss": 2.4283, + "epoch": 0.15136966958486303, + "grad_norm": 5.276690780495154, + "learning_rate": 1.9978121422008547e-05, + "loss": 1.2666, "step": 1072 }, { - "epoch": 0.2249947578108618, - "grad_norm": 5.329346418566365, - "learning_rate": 1.989423706540612e-05, - "loss": 2.4312, + "epoch": 0.15151087263484891, + "grad_norm": 4.789907089313249, + "learning_rate": 1.9978020519905742e-05, + "loss": 1.1662, "step": 1073 }, { - "epoch": 0.22520444537638917, - "grad_norm": 6.0640851345305755, - "learning_rate": 1.9893908425829173e-05, - "loss": 2.4175, + "epoch": 0.1516520756848348, + "grad_norm": 7.601629566967476, + "learning_rate": 1.997791938591805e-05, + "loss": 1.1854, "step": 1074 }, { - "epoch": 0.22541413294191653, - "grad_norm": 5.615423527558611, - "learning_rate": 1.989357927917226e-05, - "loss": 2.4979, + "epoch": 0.15179327873482068, + "grad_norm": 6.0086804548275285, + "learning_rate": 1.9977818020047816e-05, + "loss": 1.4079, "step": 1075 }, { - "epoch": 0.2256238205074439, - "grad_norm": 5.795191410457589, - "learning_rate": 1.9893249625452244e-05, - "loss": 2.283, + "epoch": 0.15193448178480656, + "grad_norm": 4.326979328470599, + "learning_rate": 1.9977716422297404e-05, + "loss": 1.0989, "step": 1076 }, { - "epoch": 0.22583350807297128, - "grad_norm": 5.402480276096938, - "learning_rate": 1.989291946468603e-05, - "loss": 2.3371, + "epoch": 0.15207568483479242, + "grad_norm": 5.097828981266428, + "learning_rate": 1.997761459266917e-05, + "loss": 1.1963, "step": 1077 }, { - "epoch": 0.22604319563849865, - "grad_norm": 6.238517874384495, - "learning_rate": 1.989258879689054e-05, - "loss": 2.315, + "epoch": 0.1522168878847783, + "grad_norm": 4.7327716238405335, + "learning_rate": 1.9977512531165484e-05, + "loss": 1.3076, "step": 1078 }, { - "epoch": 0.226252883204026, - "grad_norm": 5.274754239395247, - "learning_rate": 1.9892257622082706e-05, - "loss": 2.4908, + "epoch": 0.15235809093476418, + "grad_norm": 4.041750148542854, + "learning_rate": 1.9977410237788715e-05, + "loss": 0.9088, "step": 1079 }, { - "epoch": 0.22646257076955337, - "grad_norm": 5.864569440688154, - "learning_rate": 1.9891925940279518e-05, - "loss": 2.1208, + "epoch": 0.15249929398475007, + "grad_norm": 5.703759166864815, + "learning_rate": 1.997730771254124e-05, + "loss": 1.2683, "step": 1080 }, { - "epoch": 0.22667225833508073, - "grad_norm": 5.378517977996407, - "learning_rate": 1.9891593751497967e-05, - "loss": 2.554, + "epoch": 0.15264049703473595, + "grad_norm": 4.953506936126508, + "learning_rate": 1.9977204955425443e-05, + "loss": 1.0767, "step": 1081 }, { - "epoch": 0.2268819459006081, - "grad_norm": 5.439393863834567, - "learning_rate": 1.9891261055755084e-05, - "loss": 2.3881, + "epoch": 0.15278170008472183, + "grad_norm": 4.829695878897943, + "learning_rate": 1.9977101966443713e-05, + "loss": 1.146, "step": 1082 }, { - "epoch": 0.22709163346613545, - "grad_norm": 5.511717855127642, - "learning_rate": 1.989092785306791e-05, - "loss": 2.3734, + "epoch": 0.15292290313470772, + "grad_norm": 4.620173504468806, + "learning_rate": 1.997699874559844e-05, + "loss": 1.0712, "step": 1083 }, { - "epoch": 0.2273013210316628, - "grad_norm": 4.680462502286909, - "learning_rate": 1.9890594143453533e-05, - "loss": 2.4086, + "epoch": 0.1530641061846936, + "grad_norm": 4.977907880105714, + "learning_rate": 1.9976895292892028e-05, + "loss": 1.0521, "step": 1084 }, { - "epoch": 0.22751100859719017, - "grad_norm": 5.5267447915425825, - "learning_rate": 1.9890259926929048e-05, - "loss": 2.4705, + "epoch": 0.15320530923467948, + "grad_norm": 5.566691571022359, + "learning_rate": 1.9976791608326876e-05, + "loss": 1.4203, "step": 1085 }, { - "epoch": 0.22772069616271756, - "grad_norm": 6.274532441087503, - "learning_rate": 1.9889925203511594e-05, - "loss": 2.8928, + "epoch": 0.15334651228466534, + "grad_norm": 4.78538990233984, + "learning_rate": 1.9976687691905394e-05, + "loss": 0.9861, "step": 1086 }, { - "epoch": 0.22793038372824492, - "grad_norm": 5.796756491259425, - "learning_rate": 1.9889589973218315e-05, - "loss": 2.0499, + "epoch": 0.15348771533465122, + "grad_norm": 5.695630859346557, + "learning_rate": 1.997658354363e-05, + "loss": 1.2845, "step": 1087 }, { - "epoch": 0.22814007129377228, - "grad_norm": 5.498948459659436, - "learning_rate": 1.98892542360664e-05, - "loss": 2.5408, + "epoch": 0.1536289183846371, + "grad_norm": 5.890929271497539, + "learning_rate": 1.9976479163503112e-05, + "loss": 1.1156, "step": 1088 }, { - "epoch": 0.22834975885929965, - "grad_norm": 6.00016785085112, - "learning_rate": 1.988891799207305e-05, - "loss": 2.4997, + "epoch": 0.15377012143462299, + "grad_norm": 5.596829149931767, + "learning_rate": 1.9976374551527156e-05, + "loss": 1.2213, "step": 1089 }, { - "epoch": 0.228559446424827, - "grad_norm": 5.387316315703425, - "learning_rate": 1.9888581241255505e-05, - "loss": 2.4935, + "epoch": 0.15391132448460887, + "grad_norm": 4.313428149548073, + "learning_rate": 1.997626970770457e-05, + "loss": 1.1139, "step": 1090 }, { - "epoch": 0.22876913399035437, - "grad_norm": 5.2308199820132995, - "learning_rate": 1.9888243983631018e-05, - "loss": 2.6946, + "epoch": 0.15405252753459475, + "grad_norm": 5.2196992029204266, + "learning_rate": 1.997616463203778e-05, + "loss": 1.0761, "step": 1091 }, { - "epoch": 0.22897882155588173, - "grad_norm": 5.243182635403196, - "learning_rate": 1.988790621921688e-05, - "loss": 2.3642, + "epoch": 0.15419373058458063, + "grad_norm": 4.786288220678975, + "learning_rate": 1.997605932452923e-05, + "loss": 1.1584, "step": 1092 }, { - "epoch": 0.2291885091214091, - "grad_norm": 6.810486904566175, - "learning_rate": 1.9887567948030397e-05, - "loss": 2.3179, + "epoch": 0.15433493363456652, + "grad_norm": 4.1452058579049895, + "learning_rate": 1.9975953785181373e-05, + "loss": 1.1304, "step": 1093 }, { - "epoch": 0.22939819668693645, - "grad_norm": 5.156717603786679, - "learning_rate": 1.9887229170088903e-05, - "loss": 2.2929, + "epoch": 0.15447613668455237, + "grad_norm": 5.520870451267546, + "learning_rate": 1.9975848013996658e-05, + "loss": 1.313, "step": 1094 }, { - "epoch": 0.22960788425246384, - "grad_norm": 5.589558908681318, - "learning_rate": 1.988688988540977e-05, - "loss": 2.3762, + "epoch": 0.15461733973453826, + "grad_norm": 5.327997661783379, + "learning_rate": 1.9975742010977544e-05, + "loss": 1.3395, "step": 1095 }, { - "epoch": 0.2298175718179912, - "grad_norm": 5.858290454627939, - "learning_rate": 1.988655009401038e-05, - "loss": 2.5275, + "epoch": 0.15475854278452414, + "grad_norm": 5.023970991530368, + "learning_rate": 1.9975635776126492e-05, + "loss": 1.2949, "step": 1096 }, { - "epoch": 0.23002725938351856, - "grad_norm": 5.949439503505859, - "learning_rate": 1.9886209795908153e-05, - "loss": 2.3339, + "epoch": 0.15489974583451002, + "grad_norm": 4.581170786574883, + "learning_rate": 1.9975529309445972e-05, + "loss": 1.0555, "step": 1097 }, { - "epoch": 0.23023694694904592, - "grad_norm": 5.460743579538918, - "learning_rate": 1.9885868991120527e-05, - "loss": 2.3334, + "epoch": 0.1550409488844959, + "grad_norm": 5.300415428719428, + "learning_rate": 1.9975422610938463e-05, + "loss": 1.1372, "step": 1098 }, { - "epoch": 0.23044663451457328, - "grad_norm": 5.963284293167345, - "learning_rate": 1.9885527679664968e-05, - "loss": 2.3996, + "epoch": 0.1551821519344818, + "grad_norm": 5.146995178214368, + "learning_rate": 1.9975315680606436e-05, + "loss": 1.1885, "step": 1099 }, { - "epoch": 0.23065632208010065, - "grad_norm": 5.99662899042123, - "learning_rate": 1.9885185861558972e-05, - "loss": 2.3276, + "epoch": 0.15532335498446767, + "grad_norm": 4.3958111063798295, + "learning_rate": 1.9975208518452384e-05, + "loss": 1.0613, "step": 1100 }, { - "epoch": 0.230866009645628, - "grad_norm": 6.246734905317924, - "learning_rate": 1.9884843536820053e-05, - "loss": 2.3025, + "epoch": 0.15546455803445355, + "grad_norm": 4.496637765971624, + "learning_rate": 1.9975101124478794e-05, + "loss": 1.0108, "step": 1101 }, { - "epoch": 0.23107569721115537, - "grad_norm": 6.629480689689256, - "learning_rate": 1.9884500705465758e-05, - "loss": 2.5753, + "epoch": 0.15560576108443944, + "grad_norm": 5.195064630750615, + "learning_rate": 1.997499349868816e-05, + "loss": 1.2129, "step": 1102 }, { - "epoch": 0.23128538477668276, - "grad_norm": 5.588705383470157, - "learning_rate": 1.988415736751366e-05, - "loss": 2.6218, + "epoch": 0.1557469641344253, + "grad_norm": 6.025793699254552, + "learning_rate": 1.997488564108298e-05, + "loss": 1.2281, "step": 1103 }, { - "epoch": 0.23149507234221012, - "grad_norm": 6.03275764601439, - "learning_rate": 1.9883813522981354e-05, - "loss": 2.3798, + "epoch": 0.15588816718441117, + "grad_norm": 4.2671990513538605, + "learning_rate": 1.9974777551665773e-05, + "loss": 0.9592, "step": 1104 }, { - "epoch": 0.23170475990773748, - "grad_norm": 5.945400602844596, - "learning_rate": 1.988346917188646e-05, - "loss": 2.5885, + "epoch": 0.15602937023439706, + "grad_norm": 5.099057981736808, + "learning_rate": 1.9974669230439043e-05, + "loss": 1.0767, "step": 1105 }, { - "epoch": 0.23191444747326484, - "grad_norm": 5.6174688882209205, - "learning_rate": 1.988312431424663e-05, - "loss": 2.1999, + "epoch": 0.15617057328438294, + "grad_norm": 4.43202272681864, + "learning_rate": 1.9974560677405307e-05, + "loss": 0.993, "step": 1106 }, { - "epoch": 0.2321241350387922, - "grad_norm": 5.4661783558100785, - "learning_rate": 1.988277895007954e-05, - "loss": 2.275, + "epoch": 0.15631177633436882, + "grad_norm": 4.220344609414598, + "learning_rate": 1.9974451892567086e-05, + "loss": 0.9865, "step": 1107 }, { - "epoch": 0.23233382260431956, - "grad_norm": 5.0527309781561875, - "learning_rate": 1.9882433079402883e-05, - "loss": 2.097, + "epoch": 0.1564529793843547, + "grad_norm": 5.143021275270448, + "learning_rate": 1.9974342875926913e-05, + "loss": 1.5237, "step": 1108 }, { - "epoch": 0.23254351016984692, - "grad_norm": 5.272169916430745, - "learning_rate": 1.9882086702234392e-05, - "loss": 2.2594, + "epoch": 0.1565941824343406, + "grad_norm": 4.738749555939336, + "learning_rate": 1.997423362748732e-05, + "loss": 1.1025, "step": 1109 }, { - "epoch": 0.23275319773537428, - "grad_norm": 5.638774706877664, - "learning_rate": 1.988173981859182e-05, - "loss": 2.0905, + "epoch": 0.15673538548432647, + "grad_norm": 5.05114651810557, + "learning_rate": 1.997412414725084e-05, + "loss": 1.2272, "step": 1110 }, { - "epoch": 0.23296288530090165, - "grad_norm": 5.095121898086692, - "learning_rate": 1.9881392428492942e-05, - "loss": 2.4484, + "epoch": 0.15687658853431233, + "grad_norm": 4.591643963880788, + "learning_rate": 1.9974014435220027e-05, + "loss": 1.0934, "step": 1111 }, { - "epoch": 0.23317257286642903, - "grad_norm": 5.312797842547137, - "learning_rate": 1.9881044531955565e-05, - "loss": 2.203, + "epoch": 0.1570177915842982, + "grad_norm": 5.062834204920063, + "learning_rate": 1.9973904491397426e-05, + "loss": 1.174, "step": 1112 }, { - "epoch": 0.2333822604319564, - "grad_norm": 6.2745768185297965, - "learning_rate": 1.9880696128997517e-05, - "loss": 2.5075, + "epoch": 0.1571589946342841, + "grad_norm": 4.678185160298189, + "learning_rate": 1.9973794315785587e-05, + "loss": 1.0912, "step": 1113 }, { - "epoch": 0.23359194799748376, - "grad_norm": 5.5122408090759984, - "learning_rate": 1.988034721963665e-05, - "loss": 2.2074, + "epoch": 0.15730019768426998, + "grad_norm": 4.19405399220832, + "learning_rate": 1.997368390838708e-05, + "loss": 0.8624, "step": 1114 }, { - "epoch": 0.23380163556301112, - "grad_norm": 6.0371519336847905, - "learning_rate": 1.987999780389086e-05, - "loss": 2.2155, + "epoch": 0.15744140073425586, + "grad_norm": 5.145176571186488, + "learning_rate": 1.9973573269204466e-05, + "loss": 1.1022, "step": 1115 }, { - "epoch": 0.23401132312853848, - "grad_norm": 6.317337843256815, - "learning_rate": 1.9879647881778043e-05, - "loss": 2.4839, + "epoch": 0.15758260378424174, + "grad_norm": 4.930982695701197, + "learning_rate": 1.9973462398240316e-05, + "loss": 0.9899, "step": 1116 }, { - "epoch": 0.23422101069406584, - "grad_norm": 6.208222649238208, - "learning_rate": 1.9879297453316136e-05, - "loss": 2.533, + "epoch": 0.15772380683422763, + "grad_norm": 4.837922958625512, + "learning_rate": 1.997335129549721e-05, + "loss": 1.1217, "step": 1117 }, { - "epoch": 0.2344306982595932, - "grad_norm": 6.2122656465287625, - "learning_rate": 1.9878946518523104e-05, - "loss": 2.5666, + "epoch": 0.1578650098842135, + "grad_norm": 6.508780539481619, + "learning_rate": 1.997323996097772e-05, + "loss": 1.2971, "step": 1118 }, { - "epoch": 0.23464038582512056, - "grad_norm": 7.153919656271692, - "learning_rate": 1.9878595077416924e-05, - "loss": 2.7303, + "epoch": 0.1580062129341994, + "grad_norm": 5.974016503996492, + "learning_rate": 1.9973128394684448e-05, + "loss": 1.2914, "step": 1119 }, { - "epoch": 0.23485007339064792, - "grad_norm": 4.792929169826411, - "learning_rate": 1.9878243130015613e-05, - "loss": 2.486, + "epoch": 0.15814741598418525, + "grad_norm": 4.603512649117855, + "learning_rate": 1.9973016596619973e-05, + "loss": 1.0364, "step": 1120 }, { - "epoch": 0.2350597609561753, - "grad_norm": 5.41165104539102, - "learning_rate": 1.987789067633721e-05, - "loss": 2.2663, + "epoch": 0.15828861903417113, + "grad_norm": 4.522512186367647, + "learning_rate": 1.9972904566786903e-05, + "loss": 0.8583, "step": 1121 }, { - "epoch": 0.23526944852170267, - "grad_norm": 5.223460485397531, - "learning_rate": 1.9877537716399774e-05, - "loss": 2.478, + "epoch": 0.158429822084157, + "grad_norm": 5.649589325592691, + "learning_rate": 1.997279230518784e-05, + "loss": 1.197, "step": 1122 }, { - "epoch": 0.23547913608723003, - "grad_norm": 5.729524446536079, - "learning_rate": 1.9877184250221403e-05, - "loss": 2.5641, + "epoch": 0.1585710251341429, + "grad_norm": 4.552292209312444, + "learning_rate": 1.9972679811825384e-05, + "loss": 1.1165, "step": 1123 }, { - "epoch": 0.2356888236527574, - "grad_norm": 5.502387336399626, - "learning_rate": 1.9876830277820208e-05, - "loss": 2.2782, + "epoch": 0.15871222818412878, + "grad_norm": 6.400696905377842, + "learning_rate": 1.9972567086702163e-05, + "loss": 1.2852, "step": 1124 }, { - "epoch": 0.23589851121828476, - "grad_norm": 6.15686932207332, - "learning_rate": 1.987647579921433e-05, - "loss": 2.4573, + "epoch": 0.15885343123411466, + "grad_norm": 4.825929786172961, + "learning_rate": 1.9972454129820786e-05, + "loss": 1.2004, "step": 1125 }, { - "epoch": 0.23610819878381212, - "grad_norm": 4.567698842648741, - "learning_rate": 1.9876120814421934e-05, - "loss": 2.1701, + "epoch": 0.15899463428410054, + "grad_norm": 5.270462972565587, + "learning_rate": 1.997234094118388e-05, + "loss": 1.039, "step": 1126 }, { - "epoch": 0.23631788634933948, - "grad_norm": 6.289496308078909, - "learning_rate": 1.987576532346122e-05, - "loss": 2.2438, + "epoch": 0.15913583733408643, + "grad_norm": 4.117872923603862, + "learning_rate": 1.997222752079408e-05, + "loss": 0.8892, "step": 1127 }, { - "epoch": 0.23652757391486684, - "grad_norm": 5.452527689465011, - "learning_rate": 1.9875409326350404e-05, - "loss": 2.3893, + "epoch": 0.15927704038407228, + "grad_norm": 4.944608927893909, + "learning_rate": 1.9972113868654016e-05, + "loss": 1.2317, "step": 1128 }, { - "epoch": 0.2367372614803942, - "grad_norm": 5.708237255565833, - "learning_rate": 1.9875052823107733e-05, - "loss": 2.2636, + "epoch": 0.15941824343405817, + "grad_norm": 4.8139383272839105, + "learning_rate": 1.9971999984766337e-05, + "loss": 1.1767, "step": 1129 }, { - "epoch": 0.2369469490459216, - "grad_norm": 5.504553428880151, - "learning_rate": 1.9874695813751475e-05, - "loss": 2.1595, + "epoch": 0.15955944648404405, + "grad_norm": 4.459582085684538, + "learning_rate": 1.9971885869133683e-05, + "loss": 1.1833, "step": 1130 }, { - "epoch": 0.23715663661144895, - "grad_norm": 5.605450790489481, - "learning_rate": 1.9874338298299933e-05, - "loss": 2.753, + "epoch": 0.15970064953402993, + "grad_norm": 4.958432377251127, + "learning_rate": 1.9971771521758707e-05, + "loss": 1.2543, "step": 1131 }, { - "epoch": 0.2373663241769763, - "grad_norm": 5.367842452443219, - "learning_rate": 1.9873980276771426e-05, - "loss": 2.316, + "epoch": 0.15984185258401581, + "grad_norm": 4.996952775789927, + "learning_rate": 1.9971656942644068e-05, + "loss": 1.1167, "step": 1132 }, { - "epoch": 0.23757601174250367, - "grad_norm": 5.517744481439908, - "learning_rate": 1.98736217491843e-05, - "loss": 2.4459, + "epoch": 0.1599830556340017, + "grad_norm": 5.470769268862384, + "learning_rate": 1.997154213179243e-05, + "loss": 1.3827, "step": 1133 }, { - "epoch": 0.23778569930803103, - "grad_norm": 5.3123463391071875, - "learning_rate": 1.987326271555694e-05, - "loss": 2.4406, + "epoch": 0.16012425868398758, + "grad_norm": 5.240352881796299, + "learning_rate": 1.9971427089206458e-05, + "loss": 1.1858, "step": 1134 }, { - "epoch": 0.2379953868735584, - "grad_norm": 6.113216911836581, - "learning_rate": 1.987290317590774e-05, - "loss": 2.1981, + "epoch": 0.16026546173397346, + "grad_norm": 4.41487799239553, + "learning_rate": 1.9971311814888823e-05, + "loss": 0.9955, "step": 1135 }, { - "epoch": 0.23820507443908576, - "grad_norm": 5.499884763202649, - "learning_rate": 1.9872543130255128e-05, - "loss": 2.6627, + "epoch": 0.16040666478395935, + "grad_norm": 4.629404400225735, + "learning_rate": 1.997119630884221e-05, + "loss": 1.0966, "step": 1136 }, { - "epoch": 0.23841476200461312, - "grad_norm": 5.274258796880021, - "learning_rate": 1.9872182578617558e-05, - "loss": 2.204, + "epoch": 0.1605478678339452, + "grad_norm": 4.735890302037721, + "learning_rate": 1.9971080571069303e-05, + "loss": 1.0419, "step": 1137 }, { - "epoch": 0.23862444957014048, - "grad_norm": 5.028270158500906, - "learning_rate": 1.987182152101351e-05, - "loss": 2.4063, + "epoch": 0.16068907088393108, + "grad_norm": 5.46902641288739, + "learning_rate": 1.9970964601572788e-05, + "loss": 1.0326, "step": 1138 }, { - "epoch": 0.23883413713566787, - "grad_norm": 4.953113826278815, - "learning_rate": 1.9871459957461482e-05, - "loss": 2.2782, + "epoch": 0.16083027393391697, + "grad_norm": 4.921918149183649, + "learning_rate": 1.9970848400355364e-05, + "loss": 1.0783, "step": 1139 }, { - "epoch": 0.23904382470119523, - "grad_norm": 4.955249712641666, - "learning_rate": 1.9871097887980016e-05, - "loss": 2.5487, + "epoch": 0.16097147698390285, + "grad_norm": 4.5532972752435095, + "learning_rate": 1.9970731967419724e-05, + "loss": 1.0725, "step": 1140 }, { - "epoch": 0.2392535122667226, - "grad_norm": 5.997073655406304, - "learning_rate": 1.9870735312587658e-05, - "loss": 2.2155, + "epoch": 0.16111268003388873, + "grad_norm": 4.656200162759009, + "learning_rate": 1.9970615302768586e-05, + "loss": 1.147, "step": 1141 }, { - "epoch": 0.23946319983224995, - "grad_norm": 5.856533857079827, - "learning_rate": 1.9870372231302996e-05, - "loss": 2.3308, + "epoch": 0.16125388308387462, + "grad_norm": 6.030633098850729, + "learning_rate": 1.997049840640465e-05, + "loss": 1.3959, "step": 1142 }, { - "epoch": 0.2396728873977773, - "grad_norm": 5.44256584284782, - "learning_rate": 1.987000864414464e-05, - "loss": 2.1604, + "epoch": 0.1613950861338605, + "grad_norm": 5.7090161370207255, + "learning_rate": 1.9970381278330638e-05, + "loss": 1.3324, "step": 1143 }, { - "epoch": 0.23988257496330467, - "grad_norm": 5.495169713569408, - "learning_rate": 1.986964455113122e-05, - "loss": 2.2905, + "epoch": 0.16153628918384638, + "grad_norm": 3.5516607599886356, + "learning_rate": 1.9970263918549274e-05, + "loss": 0.858, "step": 1144 }, { - "epoch": 0.24009226252883203, - "grad_norm": 6.117013045146031, - "learning_rate": 1.98692799522814e-05, - "loss": 2.4982, + "epoch": 0.16167749223383224, + "grad_norm": 5.072042007315354, + "learning_rate": 1.9970146327063276e-05, + "loss": 1.2115, "step": 1145 }, { - "epoch": 0.2403019500943594, - "grad_norm": 6.494218601389671, - "learning_rate": 1.9868914847613867e-05, - "loss": 2.5917, + "epoch": 0.16181869528381812, + "grad_norm": 5.9786361230868375, + "learning_rate": 1.9970028503875387e-05, + "loss": 1.3568, "step": 1146 }, { - "epoch": 0.24051163765988676, - "grad_norm": 5.347069593173759, - "learning_rate": 1.986854923714733e-05, - "loss": 2.1281, + "epoch": 0.161959898333804, + "grad_norm": 4.82441265177681, + "learning_rate": 1.996991044898834e-05, + "loss": 1.1887, "step": 1147 }, { - "epoch": 0.24072132522541415, - "grad_norm": 7.068169917508162, - "learning_rate": 1.9868183120900527e-05, - "loss": 2.3845, + "epoch": 0.1621011013837899, + "grad_norm": 4.156615645020029, + "learning_rate": 1.996979216240488e-05, + "loss": 0.9075, "step": 1148 }, { - "epoch": 0.2409310127909415, - "grad_norm": 6.15783802810978, - "learning_rate": 1.9867816498892228e-05, - "loss": 2.4638, + "epoch": 0.16224230443377577, + "grad_norm": 5.295656274068003, + "learning_rate": 1.996967364412776e-05, + "loss": 1.0972, "step": 1149 }, { - "epoch": 0.24114070035646887, - "grad_norm": 8.244312691717331, - "learning_rate": 1.9867449371141214e-05, - "loss": 2.6692, + "epoch": 0.16238350748376165, + "grad_norm": 4.992705844666568, + "learning_rate": 1.9969554894159723e-05, + "loss": 1.0895, "step": 1150 }, { - "epoch": 0.24135038792199623, - "grad_norm": 5.68659057279483, - "learning_rate": 1.9867081737666306e-05, - "loss": 2.374, + "epoch": 0.16252471053374754, + "grad_norm": 4.381287639430764, + "learning_rate": 1.996943591250354e-05, + "loss": 1.2319, "step": 1151 }, { - "epoch": 0.2415600754875236, - "grad_norm": 5.929399972630398, - "learning_rate": 1.9866713598486345e-05, - "loss": 2.3455, + "epoch": 0.16266591358373342, + "grad_norm": 4.882908726919229, + "learning_rate": 1.996931669916197e-05, + "loss": 1.125, "step": 1152 }, { - "epoch": 0.24176976305305095, - "grad_norm": 5.787811580602739, - "learning_rate": 1.98663449536202e-05, - "loss": 2.2595, + "epoch": 0.1628071166337193, + "grad_norm": 4.248085213469589, + "learning_rate": 1.9969197254137786e-05, + "loss": 1.0392, "step": 1153 }, { - "epoch": 0.2419794506185783, - "grad_norm": 5.848226090100022, - "learning_rate": 1.9865975803086765e-05, - "loss": 2.4017, + "epoch": 0.16294831968370516, + "grad_norm": 5.018658833090311, + "learning_rate": 1.9969077577433763e-05, + "loss": 1.0778, "step": 1154 }, { - "epoch": 0.24218913818410567, - "grad_norm": 6.082148243489996, - "learning_rate": 1.9865606146904957e-05, - "loss": 2.5488, + "epoch": 0.16308952273369104, + "grad_norm": 3.9889336820297325, + "learning_rate": 1.9968957669052687e-05, + "loss": 0.9441, "step": 1155 }, { - "epoch": 0.24239882574963303, - "grad_norm": 7.3803871732556185, - "learning_rate": 1.9865235985093724e-05, - "loss": 2.6939, + "epoch": 0.16323072578367692, + "grad_norm": 6.153782683434731, + "learning_rate": 1.9968837528997333e-05, + "loss": 1.2275, "step": 1156 }, { - "epoch": 0.24260851331516042, - "grad_norm": 5.28203187147514, - "learning_rate": 1.9864865317672034e-05, - "loss": 2.257, + "epoch": 0.1633719288336628, + "grad_norm": 5.634623491013958, + "learning_rate": 1.9968717157270502e-05, + "loss": 1.2337, "step": 1157 }, { - "epoch": 0.24281820088068778, - "grad_norm": 5.423373224566631, - "learning_rate": 1.986449414465889e-05, - "loss": 2.6038, + "epoch": 0.1635131318836487, + "grad_norm": 4.422171742775482, + "learning_rate": 1.9968596553874993e-05, + "loss": 0.9681, "step": 1158 }, { - "epoch": 0.24302788844621515, - "grad_norm": 5.801273387364715, - "learning_rate": 1.9864122466073308e-05, - "loss": 2.5051, + "epoch": 0.16365433493363457, + "grad_norm": 5.157700831331247, + "learning_rate": 1.99684757188136e-05, + "loss": 1.1761, "step": 1159 }, { - "epoch": 0.2432375760117425, - "grad_norm": 5.379339333505808, - "learning_rate": 1.9863750281934345e-05, - "loss": 2.3499, + "epoch": 0.16379553798362045, + "grad_norm": 5.992449393622337, + "learning_rate": 1.9968354652089142e-05, + "loss": 1.1279, "step": 1160 }, { - "epoch": 0.24344726357726987, - "grad_norm": 5.362395437761733, - "learning_rate": 1.986337759226107e-05, - "loss": 2.4641, + "epoch": 0.16393674103360634, + "grad_norm": 4.916631406032586, + "learning_rate": 1.996823335370442e-05, + "loss": 1.1057, "step": 1161 }, { - "epoch": 0.24365695114279723, - "grad_norm": 5.560355618138492, - "learning_rate": 1.9863004397072586e-05, - "loss": 2.0889, + "epoch": 0.1640779440835922, + "grad_norm": 5.608265764309042, + "learning_rate": 1.9968111823662268e-05, + "loss": 1.102, "step": 1162 }, { - "epoch": 0.2438666387083246, - "grad_norm": 5.137534652210816, - "learning_rate": 1.9862630696388023e-05, - "loss": 2.387, + "epoch": 0.16421914713357808, + "grad_norm": 4.426081705158, + "learning_rate": 1.9967990061965497e-05, + "loss": 1.2122, "step": 1163 }, { - "epoch": 0.24407632627385195, - "grad_norm": 5.117646251200553, - "learning_rate": 1.986225649022653e-05, - "loss": 2.2911, + "epoch": 0.16436035018356396, + "grad_norm": 5.34358330556191, + "learning_rate": 1.996786806861694e-05, + "loss": 1.2047, "step": 1164 }, { - "epoch": 0.2442860138393793, - "grad_norm": 4.929722910793912, - "learning_rate": 1.9861881778607283e-05, - "loss": 2.112, + "epoch": 0.16450155323354984, + "grad_norm": 4.50661736599926, + "learning_rate": 1.996774584361944e-05, + "loss": 1.0098, "step": 1165 }, { - "epoch": 0.2444957014049067, - "grad_norm": 4.924007160384863, - "learning_rate": 1.9861506561549494e-05, - "loss": 2.4487, + "epoch": 0.16464275628353572, + "grad_norm": 4.909272011581947, + "learning_rate": 1.9967623386975826e-05, + "loss": 1.1622, "step": 1166 }, { - "epoch": 0.24470538897043406, - "grad_norm": 5.2735753090830375, - "learning_rate": 1.986113083907239e-05, - "loss": 2.1672, + "epoch": 0.1647839593335216, + "grad_norm": 4.780828325639308, + "learning_rate": 1.996750069868895e-05, + "loss": 1.0284, "step": 1167 }, { - "epoch": 0.24491507653596142, - "grad_norm": 6.608704847772229, - "learning_rate": 1.9860754611195226e-05, - "loss": 2.3328, + "epoch": 0.1649251623835075, + "grad_norm": 5.718198109586679, + "learning_rate": 1.9967377778761667e-05, + "loss": 1.0667, "step": 1168 }, { - "epoch": 0.24512476410148878, - "grad_norm": 5.1966977918464865, - "learning_rate": 1.9860377877937287e-05, - "loss": 2.3562, + "epoch": 0.16506636543349337, + "grad_norm": 4.781998095392276, + "learning_rate": 1.9967254627196823e-05, + "loss": 1.1192, "step": 1169 }, { - "epoch": 0.24533445166701615, - "grad_norm": 4.948102841994688, - "learning_rate": 1.986000063931788e-05, - "loss": 2.3373, + "epoch": 0.16520756848347926, + "grad_norm": 4.158974695290931, + "learning_rate": 1.996713124399729e-05, + "loss": 0.9703, "step": 1170 }, { - "epoch": 0.2455441392325435, - "grad_norm": 5.2097377981363895, - "learning_rate": 1.9859622895356335e-05, - "loss": 2.393, + "epoch": 0.1653487715334651, + "grad_norm": 4.774755573005021, + "learning_rate": 1.996700762916593e-05, + "loss": 1.1202, "step": 1171 }, { - "epoch": 0.24575382679807087, - "grad_norm": 5.941671184348702, - "learning_rate": 1.9859244646072017e-05, - "loss": 2.5533, + "epoch": 0.165489974583451, + "grad_norm": 4.602605691369339, + "learning_rate": 1.9966883782705617e-05, + "loss": 1.0959, "step": 1172 }, { - "epoch": 0.24596351436359823, - "grad_norm": 5.190811034661953, - "learning_rate": 1.9858865891484313e-05, - "loss": 2.5423, + "epoch": 0.16563117763343688, + "grad_norm": 4.470840325697791, + "learning_rate": 1.996675970461923e-05, + "loss": 1.105, "step": 1173 }, { - "epoch": 0.2461732019291256, - "grad_norm": 5.241397669095447, - "learning_rate": 1.9858486631612633e-05, - "loss": 2.309, + "epoch": 0.16577238068342276, + "grad_norm": 5.155838892675553, + "learning_rate": 1.996663539490965e-05, + "loss": 1.1275, "step": 1174 }, { - "epoch": 0.24638288949465298, - "grad_norm": 6.071879430448749, - "learning_rate": 1.985810686647641e-05, - "loss": 2.1952, + "epoch": 0.16591358373340864, + "grad_norm": 4.493211582275356, + "learning_rate": 1.9966510853579767e-05, + "loss": 1.1345, "step": 1175 }, { - "epoch": 0.24659257706018034, - "grad_norm": 5.415756123932901, - "learning_rate": 1.985772659609512e-05, - "loss": 2.2649, + "epoch": 0.16605478678339453, + "grad_norm": 5.103456949154394, + "learning_rate": 1.996638608063248e-05, + "loss": 1.0466, "step": 1176 }, { - "epoch": 0.2468022646257077, - "grad_norm": 5.591952572905724, - "learning_rate": 1.985734582048824e-05, - "loss": 2.5287, + "epoch": 0.1661959898333804, + "grad_norm": 4.664538175951263, + "learning_rate": 1.996626107607068e-05, + "loss": 0.9554, "step": 1177 }, { - "epoch": 0.24701195219123506, - "grad_norm": 6.186606283769733, - "learning_rate": 1.9856964539675287e-05, - "loss": 2.4627, + "epoch": 0.1663371928833663, + "grad_norm": 4.860685825944272, + "learning_rate": 1.996613583989728e-05, + "loss": 1.161, "step": 1178 }, { - "epoch": 0.24722163975676242, - "grad_norm": 5.648617180496344, - "learning_rate": 1.9856582753675808e-05, - "loss": 2.4207, + "epoch": 0.16647839593335215, + "grad_norm": 4.638587678007743, + "learning_rate": 1.9966010372115186e-05, + "loss": 1.0527, "step": 1179 }, { - "epoch": 0.24743132732228978, - "grad_norm": 5.721334461306422, - "learning_rate": 1.985620046250937e-05, - "loss": 2.4322, + "epoch": 0.16661959898333803, + "grad_norm": 5.2318313887716945, + "learning_rate": 1.9965884672727313e-05, + "loss": 1.2447, "step": 1180 }, { - "epoch": 0.24764101488781715, - "grad_norm": 5.8129998608713755, - "learning_rate": 1.985581766619556e-05, - "loss": 2.5754, + "epoch": 0.1667608020333239, + "grad_norm": 4.671149961172834, + "learning_rate": 1.9965758741736587e-05, + "loss": 1.0432, "step": 1181 }, { - "epoch": 0.2478507024533445, - "grad_norm": 5.6367792272000035, - "learning_rate": 1.9855434364754003e-05, - "loss": 2.3282, + "epoch": 0.1669020050833098, + "grad_norm": 3.892770963817775, + "learning_rate": 1.996563257914593e-05, + "loss": 0.8379, "step": 1182 }, { - "epoch": 0.24806039001887187, - "grad_norm": 7.218380738491035, - "learning_rate": 1.985505055820434e-05, - "loss": 2.4668, + "epoch": 0.16704320813329568, + "grad_norm": 4.972522787507057, + "learning_rate": 1.9965506184958277e-05, + "loss": 1.1638, "step": 1183 }, { - "epoch": 0.24827007758439926, - "grad_norm": 5.177707975673539, - "learning_rate": 1.9854666246566244e-05, - "loss": 2.2937, + "epoch": 0.16718441118328156, + "grad_norm": 5.236804181725235, + "learning_rate": 1.9965379559176562e-05, + "loss": 1.2589, "step": 1184 }, { - "epoch": 0.24847976514992662, - "grad_norm": 5.572773400010812, - "learning_rate": 1.985428142985941e-05, - "loss": 2.4316, + "epoch": 0.16732561423326744, + "grad_norm": 5.159160002524021, + "learning_rate": 1.9965252701803733e-05, + "loss": 1.0586, "step": 1185 }, { - "epoch": 0.24868945271545398, - "grad_norm": 5.404683091715184, - "learning_rate": 1.9853896108103565e-05, - "loss": 2.4322, + "epoch": 0.16746681728325333, + "grad_norm": 5.193207175371085, + "learning_rate": 1.996512561284273e-05, + "loss": 1.0912, "step": 1186 }, { - "epoch": 0.24889914028098134, - "grad_norm": 5.4586373140361, - "learning_rate": 1.9853510281318446e-05, - "loss": 2.3618, + "epoch": 0.1676080203332392, + "grad_norm": 5.088875793184004, + "learning_rate": 1.996499829229652e-05, + "loss": 1.208, "step": 1187 }, { - "epoch": 0.2491088278465087, - "grad_norm": 5.712605667669462, - "learning_rate": 1.9853123949523843e-05, - "loss": 2.1057, + "epoch": 0.16774922338322507, + "grad_norm": 4.1768913153132, + "learning_rate": 1.9964870740168046e-05, + "loss": 1.1677, "step": 1188 }, { - "epoch": 0.24931851541203606, - "grad_norm": 5.840087749174537, - "learning_rate": 1.9852737112739545e-05, - "loss": 2.4032, + "epoch": 0.16789042643321095, + "grad_norm": 4.720334313188313, + "learning_rate": 1.996474295646028e-05, + "loss": 1.0759, "step": 1189 }, { - "epoch": 0.24952820297756342, - "grad_norm": 5.38535609309615, - "learning_rate": 1.985234977098538e-05, - "loss": 2.2993, + "epoch": 0.16803162948319683, + "grad_norm": 4.463351861093201, + "learning_rate": 1.9964614941176194e-05, + "loss": 1.0423, "step": 1190 }, { - "epoch": 0.24973789054309078, - "grad_norm": 5.348506150096231, - "learning_rate": 1.98519619242812e-05, - "loss": 2.4278, + "epoch": 0.16817283253318271, + "grad_norm": 4.6140940656716145, + "learning_rate": 1.9964486694318758e-05, + "loss": 1.1759, "step": 1191 }, { - "epoch": 0.24994757810861815, - "grad_norm": 5.583716426867162, - "learning_rate": 1.9851573572646888e-05, - "loss": 2.1251, + "epoch": 0.1683140355831686, + "grad_norm": 4.860198883725041, + "learning_rate": 1.9964358215890955e-05, + "loss": 1.0864, "step": 1192 }, { - "epoch": 0.25015726567414553, - "grad_norm": 5.956034460626029, - "learning_rate": 1.9851184716102342e-05, - "loss": 2.5705, + "epoch": 0.16845523863315448, + "grad_norm": 5.676714072235111, + "learning_rate": 1.9964229505895768e-05, + "loss": 1.2349, "step": 1193 }, { - "epoch": 0.2503669532396729, - "grad_norm": 6.671375126114211, - "learning_rate": 1.985079535466749e-05, - "loss": 2.319, + "epoch": 0.16859644168314036, + "grad_norm": 4.325881150491777, + "learning_rate": 1.9964100564336197e-05, + "loss": 0.9979, "step": 1194 }, { - "epoch": 0.25057664080520026, - "grad_norm": 5.926760972556911, - "learning_rate": 1.9850405488362296e-05, - "loss": 2.5532, + "epoch": 0.16873764473312625, + "grad_norm": 5.135060794473869, + "learning_rate": 1.9963971391215226e-05, + "loss": 1.1304, "step": 1195 }, { - "epoch": 0.2507863283707276, - "grad_norm": 5.020992536505515, - "learning_rate": 1.9850015117206736e-05, - "loss": 2.6251, + "epoch": 0.1688788477831121, + "grad_norm": 4.43911971813641, + "learning_rate": 1.9963841986535864e-05, + "loss": 1.0748, "step": 1196 }, { - "epoch": 0.250996015936255, - "grad_norm": 5.177380738976003, - "learning_rate": 1.9849624241220814e-05, - "loss": 2.3906, + "epoch": 0.16902005083309798, + "grad_norm": 4.536829356448458, + "learning_rate": 1.996371235030112e-05, + "loss": 1.3027, "step": 1197 }, { - "epoch": 0.25120570350178234, - "grad_norm": 5.439352919616085, - "learning_rate": 1.984923286042457e-05, - "loss": 2.3691, + "epoch": 0.16916125388308387, + "grad_norm": 5.955702804116423, + "learning_rate": 1.9963582482514003e-05, + "loss": 1.0215, "step": 1198 }, { - "epoch": 0.2514153910673097, - "grad_norm": 5.064151809568982, - "learning_rate": 1.9848840974838054e-05, - "loss": 2.1341, + "epoch": 0.16930245693306975, + "grad_norm": 4.4124999689209865, + "learning_rate": 1.996345238317753e-05, + "loss": 0.9698, "step": 1199 }, { - "epoch": 0.25162507863283706, - "grad_norm": 5.837547826130716, - "learning_rate": 1.984844858448136e-05, - "loss": 2.5517, + "epoch": 0.16944365998305563, + "grad_norm": 5.3875102071102585, + "learning_rate": 1.996332205229473e-05, + "loss": 1.2229, "step": 1200 }, { - "epoch": 0.2518347661983644, - "grad_norm": 5.41671745046474, - "learning_rate": 1.9848055689374594e-05, - "loss": 2.139, + "epoch": 0.16958486303304152, + "grad_norm": 4.623236606991806, + "learning_rate": 1.9963191489868624e-05, + "loss": 0.9886, "step": 1201 }, { - "epoch": 0.2520444537638918, - "grad_norm": 5.5614150019148045, - "learning_rate": 1.9847662289537893e-05, - "loss": 2.1161, + "epoch": 0.1697260660830274, + "grad_norm": 4.334738100442265, + "learning_rate": 1.996306069590225e-05, + "loss": 1.0788, "step": 1202 }, { - "epoch": 0.25225414132941915, - "grad_norm": 4.887067996724922, - "learning_rate": 1.984726838499142e-05, - "loss": 2.4905, + "epoch": 0.16986726913301328, + "grad_norm": 4.796742345483506, + "learning_rate": 1.996292967039865e-05, + "loss": 1.1337, "step": 1203 }, { - "epoch": 0.2524638288949465, - "grad_norm": 5.266908558874635, - "learning_rate": 1.9846873975755363e-05, - "loss": 2.2074, + "epoch": 0.17000847218299917, + "grad_norm": 4.839822157330316, + "learning_rate": 1.996279841336087e-05, + "loss": 1.0515, "step": 1204 }, { - "epoch": 0.25267351646047387, - "grad_norm": 5.700358033778161, - "learning_rate": 1.9846479061849936e-05, - "loss": 2.3268, + "epoch": 0.17014967523298502, + "grad_norm": 5.264923200250912, + "learning_rate": 1.996266692479196e-05, + "loss": 1.1155, "step": 1205 }, { - "epoch": 0.2528832040260013, - "grad_norm": 5.033509154854469, - "learning_rate": 1.984608364329538e-05, - "loss": 2.3735, + "epoch": 0.1702908782829709, + "grad_norm": 3.5624932963091394, + "learning_rate": 1.9962535204694964e-05, + "loss": 0.9227, "step": 1206 }, { - "epoch": 0.25309289159152865, - "grad_norm": 6.0183316637739335, - "learning_rate": 1.9845687720111958e-05, - "loss": 2.4834, + "epoch": 0.1704320813329568, + "grad_norm": 4.818957797529313, + "learning_rate": 1.996240325307296e-05, + "loss": 1.0933, "step": 1207 }, { - "epoch": 0.253302579157056, - "grad_norm": 5.190534882363171, - "learning_rate": 1.9845291292319967e-05, - "loss": 2.2527, + "epoch": 0.17057328438294267, + "grad_norm": 5.15095130623406, + "learning_rate": 1.9962271069929e-05, + "loss": 1.4231, "step": 1208 }, { - "epoch": 0.25351226672258337, - "grad_norm": 5.543751449817277, - "learning_rate": 1.9844894359939716e-05, - "loss": 2.6291, + "epoch": 0.17071448743292855, + "grad_norm": 4.4859995335573135, + "learning_rate": 1.996213865526617e-05, + "loss": 0.9422, "step": 1209 }, { - "epoch": 0.25372195428811073, - "grad_norm": 5.435129714033043, - "learning_rate": 1.984449692299156e-05, - "loss": 2.3272, + "epoch": 0.17085569048291444, + "grad_norm": 4.965232098293266, + "learning_rate": 1.9962006009087535e-05, + "loss": 1.0033, "step": 1210 }, { - "epoch": 0.2539316418536381, - "grad_norm": 4.831754248434429, - "learning_rate": 1.984409898149586e-05, - "loss": 2.4798, + "epoch": 0.17099689353290032, + "grad_norm": 4.429831296158428, + "learning_rate": 1.9961873131396185e-05, + "loss": 1.0522, "step": 1211 }, { - "epoch": 0.25414132941916545, - "grad_norm": 6.5695152689212835, - "learning_rate": 1.984370053547301e-05, - "loss": 2.3515, + "epoch": 0.1711380965828862, + "grad_norm": 4.2902686003066846, + "learning_rate": 1.9961740022195202e-05, + "loss": 1.0232, "step": 1212 }, { - "epoch": 0.2543510169846928, - "grad_norm": 5.897750792729059, - "learning_rate": 1.984330158494344e-05, - "loss": 2.4192, + "epoch": 0.17127929963287206, + "grad_norm": 4.637063025708788, + "learning_rate": 1.9961606681487685e-05, + "loss": 1.0737, "step": 1213 }, { - "epoch": 0.2545607045502202, - "grad_norm": 5.942370235426671, - "learning_rate": 1.984290212992759e-05, - "loss": 2.3976, + "epoch": 0.17142050268285794, + "grad_norm": 4.9649691032247985, + "learning_rate": 1.9961473109276735e-05, + "loss": 1.2268, "step": 1214 }, { - "epoch": 0.25477039211574753, - "grad_norm": 6.193064285016414, - "learning_rate": 1.9842502170445933e-05, - "loss": 2.4846, + "epoch": 0.17156170573284382, + "grad_norm": 5.346696112627389, + "learning_rate": 1.9961339305565447e-05, + "loss": 1.128, "step": 1215 }, { - "epoch": 0.2549800796812749, - "grad_norm": 5.876130410960825, - "learning_rate": 1.9842101706518974e-05, - "loss": 2.3774, + "epoch": 0.1717029087828297, + "grad_norm": 5.051514125497087, + "learning_rate": 1.9961205270356937e-05, + "loss": 1.2226, "step": 1216 }, { - "epoch": 0.25518976724680226, - "grad_norm": 5.719231147536945, - "learning_rate": 1.9841700738167225e-05, - "loss": 2.5691, + "epoch": 0.1718441118328156, + "grad_norm": 4.3335417860770855, + "learning_rate": 1.996107100365432e-05, + "loss": 0.9637, "step": 1217 }, { - "epoch": 0.2553994548123296, - "grad_norm": 5.595717421108977, - "learning_rate": 1.984129926541125e-05, - "loss": 2.1098, + "epoch": 0.17198531488280147, + "grad_norm": 4.314035724736223, + "learning_rate": 1.996093650546071e-05, + "loss": 0.8998, "step": 1218 }, { - "epoch": 0.255609142377857, - "grad_norm": 6.179802493545814, - "learning_rate": 1.984089728827161e-05, - "loss": 2.3443, + "epoch": 0.17212651793278735, + "grad_norm": 4.462396544399696, + "learning_rate": 1.996080177577924e-05, + "loss": 1.1061, "step": 1219 }, { - "epoch": 0.25581882994338434, - "grad_norm": 4.68647201531337, - "learning_rate": 1.9840494806768922e-05, - "loss": 2.2537, + "epoch": 0.17226772098277324, + "grad_norm": 4.554901065008981, + "learning_rate": 1.9960666814613043e-05, + "loss": 1.1939, "step": 1220 }, { - "epoch": 0.2560285175089117, - "grad_norm": 5.582522111083153, - "learning_rate": 1.984009182092381e-05, - "loss": 2.3954, + "epoch": 0.17240892403275912, + "grad_norm": 5.451877302293623, + "learning_rate": 1.9960531621965246e-05, + "loss": 1.0017, "step": 1221 }, { - "epoch": 0.25623820507443906, - "grad_norm": 6.359146106136524, - "learning_rate": 1.983968833075692e-05, - "loss": 1.7707, + "epoch": 0.17255012708274498, + "grad_norm": 4.649639678605315, + "learning_rate": 1.9960396197838997e-05, + "loss": 1.2605, "step": 1222 }, { - "epoch": 0.2564478926399664, - "grad_norm": 6.3285506849987785, - "learning_rate": 1.9839284336288937e-05, - "loss": 2.5248, + "epoch": 0.17269133013273086, + "grad_norm": 4.879921813476367, + "learning_rate": 1.9960260542237438e-05, + "loss": 1.1538, "step": 1223 }, { - "epoch": 0.25665758020549384, - "grad_norm": 6.151720134525134, - "learning_rate": 1.9838879837540567e-05, - "loss": 2.288, + "epoch": 0.17283253318271674, + "grad_norm": 4.3017424577008105, + "learning_rate": 1.996012465516373e-05, + "loss": 1.0651, "step": 1224 }, { - "epoch": 0.2568672677710212, - "grad_norm": 5.621075483528397, - "learning_rate": 1.9838474834532546e-05, - "loss": 2.5878, + "epoch": 0.17297373623270262, + "grad_norm": 4.692640802351377, + "learning_rate": 1.9959988536621026e-05, + "loss": 1.1819, "step": 1225 }, { - "epoch": 0.25707695533654856, - "grad_norm": 5.215333990206144, - "learning_rate": 1.983806932728562e-05, - "loss": 2.4143, + "epoch": 0.1731149392826885, + "grad_norm": 4.242932972997732, + "learning_rate": 1.9959852186612492e-05, + "loss": 1.2351, "step": 1226 }, { - "epoch": 0.2572866429020759, - "grad_norm": 4.954922257318685, - "learning_rate": 1.9837663315820578e-05, - "loss": 2.4693, + "epoch": 0.1732561423326744, + "grad_norm": 4.199971960819896, + "learning_rate": 1.995971560514129e-05, + "loss": 0.93, "step": 1227 }, { - "epoch": 0.2574963304676033, - "grad_norm": 5.326597655728064, - "learning_rate": 1.9837256800158232e-05, - "loss": 2.3845, + "epoch": 0.17339734538266027, + "grad_norm": 5.544480469175915, + "learning_rate": 1.9959578792210604e-05, + "loss": 1.2355, "step": 1228 }, { - "epoch": 0.25770601803313065, - "grad_norm": 5.266881696393896, - "learning_rate": 1.9836849780319413e-05, - "loss": 2.1453, + "epoch": 0.17353854843264616, + "grad_norm": 4.230432303297978, + "learning_rate": 1.9959441747823603e-05, + "loss": 0.8558, "step": 1229 }, { - "epoch": 0.257915705598658, - "grad_norm": 5.151379809070329, - "learning_rate": 1.9836442256324977e-05, - "loss": 2.3946, + "epoch": 0.173679751482632, + "grad_norm": 3.86338852818337, + "learning_rate": 1.9959304471983477e-05, + "loss": 0.7717, "step": 1230 }, { - "epoch": 0.25812539316418537, - "grad_norm": 5.5708711932153765, - "learning_rate": 1.9836034228195817e-05, - "loss": 2.6474, + "epoch": 0.1738209545326179, + "grad_norm": 5.192953040524915, + "learning_rate": 1.9959166964693417e-05, + "loss": 1.3743, "step": 1231 }, { - "epoch": 0.25833508072971273, - "grad_norm": 5.4759966606263575, - "learning_rate": 1.9835625695952846e-05, - "loss": 2.3367, + "epoch": 0.17396215758260378, + "grad_norm": 4.870700849269062, + "learning_rate": 1.995902922595662e-05, + "loss": 1.2319, "step": 1232 }, { - "epoch": 0.2585447682952401, - "grad_norm": 4.919886897100189, - "learning_rate": 1.9835216659616997e-05, - "loss": 2.5269, + "epoch": 0.17410336063258966, + "grad_norm": 4.533206029446299, + "learning_rate": 1.9958891255776284e-05, + "loss": 1.0641, "step": 1233 }, { - "epoch": 0.25875445586076745, - "grad_norm": 6.027754044675275, - "learning_rate": 1.9834807119209233e-05, - "loss": 2.5167, + "epoch": 0.17424456368257554, + "grad_norm": 4.8705735075571726, + "learning_rate": 1.995875305415561e-05, + "loss": 1.1214, "step": 1234 }, { - "epoch": 0.2589641434262948, - "grad_norm": 4.84284369619074, - "learning_rate": 1.983439707475055e-05, - "loss": 2.2038, + "epoch": 0.17438576673256143, + "grad_norm": 5.073610120108678, + "learning_rate": 1.9958614621097826e-05, + "loss": 1.3175, "step": 1235 }, { - "epoch": 0.2591738309918222, - "grad_norm": 5.097040535475538, - "learning_rate": 1.9833986526261958e-05, - "loss": 2.3842, + "epoch": 0.1745269697825473, + "grad_norm": 5.303017697231334, + "learning_rate": 1.9958475956606133e-05, + "loss": 1.2074, "step": 1236 }, { - "epoch": 0.25938351855734953, - "grad_norm": 5.346301675262585, - "learning_rate": 1.9833575473764505e-05, - "loss": 2.5197, + "epoch": 0.1746681728325332, + "grad_norm": 5.169586306770147, + "learning_rate": 1.995833706068376e-05, + "loss": 1.2317, "step": 1237 }, { - "epoch": 0.2595932061228769, - "grad_norm": 6.08034660642355, - "learning_rate": 1.9833163917279247e-05, - "loss": 2.3006, + "epoch": 0.17480937588251907, + "grad_norm": 4.919302063301337, + "learning_rate": 1.995819793333394e-05, + "loss": 1.2109, "step": 1238 }, { - "epoch": 0.25980289368840426, - "grad_norm": 5.718078908081992, - "learning_rate": 1.9832751856827288e-05, - "loss": 2.4086, + "epoch": 0.17495057893250493, + "grad_norm": 4.609082575319201, + "learning_rate": 1.9958058574559893e-05, + "loss": 1.0625, "step": 1239 }, { - "epoch": 0.2600125812539316, - "grad_norm": 4.6728544594966, - "learning_rate": 1.983233929242974e-05, - "loss": 2.3108, + "epoch": 0.1750917819824908, + "grad_norm": 4.6944231225839586, + "learning_rate": 1.995791898436487e-05, + "loss": 1.014, "step": 1240 }, { - "epoch": 0.260222268819459, - "grad_norm": 5.1393477710538145, - "learning_rate": 1.983192622410775e-05, - "loss": 2.2975, + "epoch": 0.1752329850324767, + "grad_norm": 4.6586299999591905, + "learning_rate": 1.9957779162752105e-05, + "loss": 1.079, "step": 1241 }, { - "epoch": 0.2604319563849864, - "grad_norm": 5.366751190147697, - "learning_rate": 1.9831512651882493e-05, - "loss": 2.2786, + "epoch": 0.17537418808246258, + "grad_norm": 4.470951048820083, + "learning_rate": 1.9957639109724858e-05, + "loss": 1.1884, "step": 1242 }, { - "epoch": 0.26064164395051376, - "grad_norm": 5.330911591631136, - "learning_rate": 1.9831098575775157e-05, - "loss": 2.0923, + "epoch": 0.17551539113244846, + "grad_norm": 4.310139549114599, + "learning_rate": 1.9957498825286374e-05, + "loss": 0.9912, "step": 1243 }, { - "epoch": 0.2608513315160411, - "grad_norm": 5.584312081436149, - "learning_rate": 1.9830683995806966e-05, - "loss": 2.4355, + "epoch": 0.17565659418243434, + "grad_norm": 5.447714868694457, + "learning_rate": 1.995735830943992e-05, + "loss": 1.2984, "step": 1244 }, { - "epoch": 0.2610610190815685, - "grad_norm": 5.320522162777732, - "learning_rate": 1.983026891199917e-05, - "loss": 2.3724, + "epoch": 0.17579779723242023, + "grad_norm": 4.500836203978812, + "learning_rate": 1.9957217562188763e-05, + "loss": 1.0362, "step": 1245 }, { - "epoch": 0.26127070664709584, - "grad_norm": 5.35383690653178, - "learning_rate": 1.9829853324373045e-05, - "loss": 2.3875, + "epoch": 0.1759390002824061, + "grad_norm": 4.989481499385495, + "learning_rate": 1.9957076583536166e-05, + "loss": 1.1578, "step": 1246 }, { - "epoch": 0.2614803942126232, - "grad_norm": 6.08381273936834, - "learning_rate": 1.9829437232949888e-05, - "loss": 2.2762, + "epoch": 0.176080203332392, + "grad_norm": 4.532421271108147, + "learning_rate": 1.9956935373485406e-05, + "loss": 1.0343, "step": 1247 }, { - "epoch": 0.26169008177815056, - "grad_norm": 5.679294095719883, - "learning_rate": 1.9829020637751022e-05, - "loss": 2.3114, + "epoch": 0.17622140638237785, + "grad_norm": 4.872221326425936, + "learning_rate": 1.9956793932039774e-05, + "loss": 1.1878, "step": 1248 }, { - "epoch": 0.2618997693436779, - "grad_norm": 6.360855883203795, - "learning_rate": 1.9828603538797804e-05, - "loss": 2.7159, + "epoch": 0.17636260943236373, + "grad_norm": 4.2504954569185145, + "learning_rate": 1.9956652259202548e-05, + "loss": 1.0258, "step": 1249 }, { - "epoch": 0.2621094569092053, - "grad_norm": 5.385428800829001, - "learning_rate": 1.9828185936111604e-05, - "loss": 2.2438, + "epoch": 0.17650381248234961, + "grad_norm": 4.941910920948673, + "learning_rate": 1.9956510354977022e-05, + "loss": 1.1637, "step": 1250 }, { - "epoch": 0.26231914447473265, - "grad_norm": 6.279653511183181, - "learning_rate": 1.9827767829713834e-05, - "loss": 2.32, + "epoch": 0.1766450155323355, + "grad_norm": 5.084381829744562, + "learning_rate": 1.99563682193665e-05, + "loss": 1.1971, "step": 1251 }, { - "epoch": 0.26252883204026, - "grad_norm": 6.096860363715155, - "learning_rate": 1.9827349219625912e-05, - "loss": 2.5817, + "epoch": 0.17678621858232138, + "grad_norm": 4.105186947940933, + "learning_rate": 1.9956225852374275e-05, + "loss": 0.8818, "step": 1252 }, { - "epoch": 0.26273851960578737, - "grad_norm": 6.0660160469466735, - "learning_rate": 1.98269301058693e-05, - "loss": 2.5179, + "epoch": 0.17692742163230726, + "grad_norm": 4.071406483525072, + "learning_rate": 1.9956083254003667e-05, + "loss": 0.8964, "step": 1253 }, { - "epoch": 0.26294820717131473, - "grad_norm": 5.6939034161962745, - "learning_rate": 1.9826510488465477e-05, - "loss": 1.7898, + "epoch": 0.17706862468229315, + "grad_norm": 4.62430982559045, + "learning_rate": 1.995594042425798e-05, + "loss": 1.0882, "step": 1254 }, { - "epoch": 0.2631578947368421, - "grad_norm": 6.157895608774056, - "learning_rate": 1.9826090367435946e-05, - "loss": 2.2776, + "epoch": 0.17720982773227903, + "grad_norm": 5.699316862550439, + "learning_rate": 1.9955797363140536e-05, + "loss": 1.2869, "step": 1255 }, { - "epoch": 0.26336758230236945, - "grad_norm": 5.111559533735894, - "learning_rate": 1.9825669742802244e-05, - "loss": 2.4436, + "epoch": 0.17735103078226488, + "grad_norm": 4.6441839273500625, + "learning_rate": 1.9955654070654664e-05, + "loss": 1.2183, "step": 1256 }, { - "epoch": 0.2635772698678968, - "grad_norm": 5.657181849764752, - "learning_rate": 1.9825248614585923e-05, - "loss": 2.3923, + "epoch": 0.17749223383225077, + "grad_norm": 5.798970056058559, + "learning_rate": 1.995551054680369e-05, + "loss": 1.2501, "step": 1257 }, { - "epoch": 0.2637869574334242, - "grad_norm": 5.932671455166495, - "learning_rate": 1.982482698280857e-05, - "loss": 2.621, + "epoch": 0.17763343688223665, + "grad_norm": 4.527253270046071, + "learning_rate": 1.995536679159095e-05, + "loss": 0.9936, "step": 1258 }, { - "epoch": 0.26399664499895154, - "grad_norm": 5.265155833769554, - "learning_rate": 1.9824404847491796e-05, - "loss": 2.3326, + "epoch": 0.17777463993222253, + "grad_norm": 4.633787085996182, + "learning_rate": 1.9955222805019786e-05, + "loss": 1.1555, "step": 1259 }, { - "epoch": 0.26420633256447895, - "grad_norm": 6.437380784687752, - "learning_rate": 1.982398220865723e-05, - "loss": 2.6448, + "epoch": 0.17791584298220842, + "grad_norm": 4.589039110290919, + "learning_rate": 1.995507858709354e-05, + "loss": 0.9787, "step": 1260 }, { - "epoch": 0.2644160201300063, - "grad_norm": 5.617705409317104, - "learning_rate": 1.982355906632654e-05, - "loss": 2.5222, + "epoch": 0.1780570460321943, + "grad_norm": 5.2790049115244635, + "learning_rate": 1.9954934137815568e-05, + "loss": 1.1364, "step": 1261 }, { - "epoch": 0.2646257076955337, - "grad_norm": 5.895967274786925, - "learning_rate": 1.982313542052141e-05, - "loss": 2.3592, + "epoch": 0.17819824908218018, + "grad_norm": 4.534838459934027, + "learning_rate": 1.995478945718923e-05, + "loss": 1.2378, "step": 1262 }, { - "epoch": 0.26483539526106104, - "grad_norm": 5.72419044529412, - "learning_rate": 1.9822711271263554e-05, - "loss": 2.4786, + "epoch": 0.17833945213216607, + "grad_norm": 4.9154977673551645, + "learning_rate": 1.995464454521788e-05, + "loss": 1.2292, "step": 1263 }, { - "epoch": 0.2650450828265884, - "grad_norm": 5.591249549080251, - "learning_rate": 1.9822286618574704e-05, - "loss": 2.2958, + "epoch": 0.17848065518215195, + "grad_norm": 3.98779088434465, + "learning_rate": 1.9954499401904893e-05, + "loss": 0.8715, "step": 1264 }, { - "epoch": 0.26525477039211576, - "grad_norm": 6.832124623565176, - "learning_rate": 1.9821861462476636e-05, - "loss": 2.1979, + "epoch": 0.1786218582321378, + "grad_norm": 5.142652047003303, + "learning_rate": 1.9954354027253635e-05, + "loss": 1.4004, "step": 1265 }, { - "epoch": 0.2654644579576431, - "grad_norm": 4.79977835068797, - "learning_rate": 1.9821435802991126e-05, - "loss": 2.453, + "epoch": 0.1787630612821237, + "grad_norm": 5.004151334023995, + "learning_rate": 1.995420842126749e-05, + "loss": 0.9918, "step": 1266 }, { - "epoch": 0.2656741455231705, - "grad_norm": 4.640605793316747, - "learning_rate": 1.982100964014e-05, - "loss": 2.274, + "epoch": 0.17890426433210957, + "grad_norm": 4.361361301082337, + "learning_rate": 1.9954062583949842e-05, + "loss": 1.0382, "step": 1267 }, { - "epoch": 0.26588383308869784, - "grad_norm": 5.093378486241985, - "learning_rate": 1.9820582973945095e-05, - "loss": 2.5203, + "epoch": 0.17904546738209545, + "grad_norm": 4.7034889403971825, + "learning_rate": 1.9953916515304077e-05, + "loss": 1.2482, "step": 1268 }, { - "epoch": 0.2660935206542252, - "grad_norm": 6.586494062817634, - "learning_rate": 1.9820155804428278e-05, - "loss": 2.7188, + "epoch": 0.17918667043208134, + "grad_norm": 6.232633458423675, + "learning_rate": 1.9953770215333593e-05, + "loss": 1.2757, "step": 1269 }, { - "epoch": 0.26630320821975256, - "grad_norm": 5.253871062667889, - "learning_rate": 1.9819728131611447e-05, - "loss": 2.46, + "epoch": 0.17932787348206722, + "grad_norm": 5.040964662575827, + "learning_rate": 1.9953623684041786e-05, + "loss": 1.0132, "step": 1270 }, { - "epoch": 0.2665128957852799, - "grad_norm": 6.115208491414063, - "learning_rate": 1.9819299955516515e-05, - "loss": 2.1893, + "epoch": 0.1794690765320531, + "grad_norm": 5.044052772782454, + "learning_rate": 1.995347692143206e-05, + "loss": 1.1451, "step": 1271 }, { - "epoch": 0.2667225833508073, - "grad_norm": 6.137026561760813, - "learning_rate": 1.981887127616543e-05, - "loss": 2.5991, + "epoch": 0.17961027958203898, + "grad_norm": 5.20990423713552, + "learning_rate": 1.9953329927507833e-05, + "loss": 1.3044, "step": 1272 }, { - "epoch": 0.26693227091633465, - "grad_norm": 5.452575290487976, - "learning_rate": 1.9818442093580164e-05, - "loss": 2.5141, + "epoch": 0.17975148263202484, + "grad_norm": 4.254999650470814, + "learning_rate": 1.9953182702272514e-05, + "loss": 0.9758, "step": 1273 }, { - "epoch": 0.267141958481862, - "grad_norm": 6.113570782322421, - "learning_rate": 1.9818012407782713e-05, - "loss": 2.4623, + "epoch": 0.17989268568201072, + "grad_norm": 6.026971312928143, + "learning_rate": 1.995303524572953e-05, + "loss": 1.3211, "step": 1274 }, { - "epoch": 0.26735164604738937, - "grad_norm": 4.749434782189213, - "learning_rate": 1.9817582218795094e-05, - "loss": 2.4898, + "epoch": 0.1800338887319966, + "grad_norm": 4.595563752221218, + "learning_rate": 1.9952887557882304e-05, + "loss": 1.1039, "step": 1275 }, { - "epoch": 0.26756133361291673, - "grad_norm": 6.4045008635310205, - "learning_rate": 1.981715152663936e-05, - "loss": 2.6487, + "epoch": 0.1801750917819825, + "grad_norm": 4.496045674765187, + "learning_rate": 1.995273963873427e-05, + "loss": 1.2038, "step": 1276 }, { - "epoch": 0.2677710211784441, - "grad_norm": 5.03275443583643, - "learning_rate": 1.9816720331337582e-05, - "loss": 2.3014, + "epoch": 0.18031629483196837, + "grad_norm": 4.655447861840876, + "learning_rate": 1.9952591488288868e-05, + "loss": 0.974, "step": 1277 }, { - "epoch": 0.2679807087439715, - "grad_norm": 5.073883475047928, - "learning_rate": 1.9816288632911864e-05, - "loss": 2.2536, + "epoch": 0.18045749788195425, + "grad_norm": 4.15308438518539, + "learning_rate": 1.9952443106549535e-05, + "loss": 1.099, "step": 1278 }, { - "epoch": 0.26819039630949887, - "grad_norm": 5.937809635860017, - "learning_rate": 1.9815856431384323e-05, - "loss": 2.4236, + "epoch": 0.18059870093194014, + "grad_norm": 6.190142892845365, + "learning_rate": 1.995229449351972e-05, + "loss": 1.289, "step": 1279 }, { - "epoch": 0.26840008387502623, - "grad_norm": 6.636803411490492, - "learning_rate": 1.981542372677712e-05, - "loss": 2.3035, + "epoch": 0.18073990398192602, + "grad_norm": 5.358189285530809, + "learning_rate": 1.9952145649202885e-05, + "loss": 1.1363, "step": 1280 }, { - "epoch": 0.2686097714405536, - "grad_norm": 5.197509258886102, - "learning_rate": 1.9814990519112424e-05, - "loss": 2.3859, + "epoch": 0.1808811070319119, + "grad_norm": 5.1677095117282414, + "learning_rate": 1.9951996573602476e-05, + "loss": 1.2044, "step": 1281 }, { - "epoch": 0.26881945900608095, - "grad_norm": 5.997065176229385, - "learning_rate": 1.9814556808412444e-05, - "loss": 2.3246, + "epoch": 0.18102231008189776, + "grad_norm": 5.009382872473553, + "learning_rate": 1.995184726672197e-05, + "loss": 1.066, "step": 1282 }, { - "epoch": 0.2690291465716083, - "grad_norm": 5.838210602998649, - "learning_rate": 1.98141225946994e-05, - "loss": 2.635, + "epoch": 0.18116351313188364, + "grad_norm": 4.693743290558771, + "learning_rate": 1.995169772856483e-05, + "loss": 1.1668, "step": 1283 }, { - "epoch": 0.2692388341371357, - "grad_norm": 7.133731490626786, - "learning_rate": 1.9813687877995556e-05, - "loss": 2.5888, + "epoch": 0.18130471618186952, + "grad_norm": 4.80827540853195, + "learning_rate": 1.9951547959134535e-05, + "loss": 1.1819, "step": 1284 }, { - "epoch": 0.26944852170266304, - "grad_norm": 6.212250850778944, - "learning_rate": 1.9813252658323186e-05, - "loss": 1.9985, + "epoch": 0.1814459192318554, + "grad_norm": 4.298919866577047, + "learning_rate": 1.9951397958434556e-05, + "loss": 0.9337, "step": 1285 }, { - "epoch": 0.2696582092681904, - "grad_norm": 5.386211045262769, - "learning_rate": 1.9812816935704596e-05, - "loss": 2.1854, + "epoch": 0.1815871222818413, + "grad_norm": 4.375509210194846, + "learning_rate": 1.9951247726468393e-05, + "loss": 1.0347, "step": 1286 }, { - "epoch": 0.26986789683371776, - "grad_norm": 6.303945142855519, - "learning_rate": 1.981238071016212e-05, - "loss": 2.6064, + "epoch": 0.18172832533182717, + "grad_norm": 4.196236919986424, + "learning_rate": 1.995109726323953e-05, + "loss": 0.9568, "step": 1287 }, { - "epoch": 0.2700775843992451, - "grad_norm": 5.3162656673676585, - "learning_rate": 1.981194398171811e-05, - "loss": 2.2494, + "epoch": 0.18186952838181306, + "grad_norm": 4.492160719422738, + "learning_rate": 1.9950946568751458e-05, + "loss": 0.9087, "step": 1288 }, { - "epoch": 0.2702872719647725, - "grad_norm": 5.020536335740002, - "learning_rate": 1.981150675039496e-05, - "loss": 2.4484, + "epoch": 0.18201073143179894, + "grad_norm": 4.682166600476827, + "learning_rate": 1.995079564300769e-05, + "loss": 1.1577, "step": 1289 }, { - "epoch": 0.27049695953029984, - "grad_norm": 6.1420272246550915, - "learning_rate": 1.9811069016215066e-05, - "loss": 2.1911, + "epoch": 0.1821519344817848, + "grad_norm": 4.767562446301597, + "learning_rate": 1.995064448601173e-05, + "loss": 1.2279, "step": 1290 }, { - "epoch": 0.2707066470958272, - "grad_norm": 5.87394210429339, - "learning_rate": 1.981063077920087e-05, - "loss": 2.2371, + "epoch": 0.18229313753177068, + "grad_norm": 4.563497077376157, + "learning_rate": 1.9950493097767086e-05, + "loss": 1.1248, "step": 1291 }, { - "epoch": 0.27091633466135456, - "grad_norm": 6.244711660978756, - "learning_rate": 1.981019203937483e-05, - "loss": 2.4066, + "epoch": 0.18243434058175656, + "grad_norm": 4.719289082275079, + "learning_rate": 1.9950341478277282e-05, + "loss": 1.4356, "step": 1292 }, { - "epoch": 0.2711260222268819, - "grad_norm": 6.165865589693254, - "learning_rate": 1.9809752796759434e-05, - "loss": 2.5739, + "epoch": 0.18257554363174244, + "grad_norm": 4.016594252551786, + "learning_rate": 1.9950189627545837e-05, + "loss": 0.8753, "step": 1293 }, { - "epoch": 0.2713357097924093, - "grad_norm": 6.88735453147843, - "learning_rate": 1.9809313051377196e-05, - "loss": 2.4636, + "epoch": 0.18271674668172833, + "grad_norm": 4.894590333283925, + "learning_rate": 1.9950037545576288e-05, + "loss": 1.1642, "step": 1294 }, { - "epoch": 0.2715453973579367, - "grad_norm": 5.810667529576484, - "learning_rate": 1.9808872803250647e-05, - "loss": 2.5927, + "epoch": 0.1828579497317142, + "grad_norm": 4.415399911984575, + "learning_rate": 1.994988523237216e-05, + "loss": 1.1803, "step": 1295 }, { - "epoch": 0.27175508492346406, - "grad_norm": 5.0610716016591635, - "learning_rate": 1.9808432052402356e-05, - "loss": 2.2261, + "epoch": 0.1829991527817001, + "grad_norm": 4.1549054885743235, + "learning_rate": 1.9949732687936992e-05, + "loss": 0.9826, "step": 1296 }, { - "epoch": 0.2719647724889914, - "grad_norm": 5.488967514234256, - "learning_rate": 1.980799079885491e-05, - "loss": 2.3501, + "epoch": 0.18314035583168597, + "grad_norm": 5.0533212810800725, + "learning_rate": 1.9949579912274337e-05, + "loss": 1.1916, "step": 1297 }, { - "epoch": 0.2721744600545188, - "grad_norm": 5.810833357151884, - "learning_rate": 1.980754904263092e-05, - "loss": 2.3486, + "epoch": 0.18328155888167186, + "grad_norm": 5.533817274245665, + "learning_rate": 1.994942690538774e-05, + "loss": 1.165, "step": 1298 }, { - "epoch": 0.27238414762004615, - "grad_norm": 5.096805833624817, - "learning_rate": 1.980710678375304e-05, - "loss": 2.1315, + "epoch": 0.1834227619316577, + "grad_norm": 4.655854771267515, + "learning_rate": 1.9949273667280764e-05, + "loss": 1.0441, "step": 1299 }, { - "epoch": 0.2725938351855735, - "grad_norm": 6.130130225751224, - "learning_rate": 1.980666402224392e-05, - "loss": 2.4333, + "epoch": 0.1835639649816436, + "grad_norm": 3.778265624584894, + "learning_rate": 1.9949120197956956e-05, + "loss": 0.8498, "step": 1300 }, { - "epoch": 0.27280352275110087, - "grad_norm": 5.476543252682569, - "learning_rate": 1.9806220758126263e-05, - "loss": 2.2098, + "epoch": 0.18370516803162948, + "grad_norm": 5.091137737813793, + "learning_rate": 1.99489664974199e-05, + "loss": 1.1958, "step": 1301 }, { - "epoch": 0.27301321031662823, - "grad_norm": 5.8557984383039665, - "learning_rate": 1.9805776991422788e-05, - "loss": 2.3436, + "epoch": 0.18384637108161536, + "grad_norm": 4.49639533349355, + "learning_rate": 1.9948812565673153e-05, + "loss": 0.997, "step": 1302 }, { - "epoch": 0.2732228978821556, - "grad_norm": 4.345406756206948, - "learning_rate": 1.980533272215623e-05, - "loss": 2.281, + "epoch": 0.18398757413160124, + "grad_norm": 4.875794611584804, + "learning_rate": 1.9948658402720303e-05, + "loss": 1.279, "step": 1303 }, { - "epoch": 0.27343258544768295, - "grad_norm": 6.569083837710685, - "learning_rate": 1.9804887950349366e-05, - "loss": 2.6885, + "epoch": 0.18412877718158713, + "grad_norm": 4.796974470574435, + "learning_rate": 1.9948504008564928e-05, + "loss": 1.2298, "step": 1304 }, { - "epoch": 0.2736422730132103, - "grad_norm": 4.824719829443905, - "learning_rate": 1.9804442676024986e-05, - "loss": 2.2335, + "epoch": 0.184269980231573, + "grad_norm": 4.069420107108959, + "learning_rate": 1.994834938321061e-05, + "loss": 0.9937, "step": 1305 }, { - "epoch": 0.2738519605787377, - "grad_norm": 5.610564787907202, - "learning_rate": 1.9803996899205916e-05, - "loss": 2.4043, + "epoch": 0.1844111832815589, + "grad_norm": 5.135791535008867, + "learning_rate": 1.9948194526660956e-05, + "loss": 1.3049, "step": 1306 }, { - "epoch": 0.27406164814426504, - "grad_norm": 5.262691627518141, - "learning_rate": 1.9803550619915e-05, - "loss": 2.3232, + "epoch": 0.18455238633154475, + "grad_norm": 4.713189212916467, + "learning_rate": 1.9948039438919554e-05, + "loss": 1.2788, "step": 1307 }, { - "epoch": 0.2742713357097924, - "grad_norm": 5.963194042368016, - "learning_rate": 1.9803103838175112e-05, - "loss": 2.2867, + "epoch": 0.18469358938153063, + "grad_norm": 5.515181633149785, + "learning_rate": 1.9947884119990012e-05, + "loss": 1.3322, "step": 1308 }, { - "epoch": 0.27448102327531976, - "grad_norm": 5.317531294803183, - "learning_rate": 1.980265655400915e-05, - "loss": 2.299, + "epoch": 0.18483479243151651, + "grad_norm": 5.402063884682159, + "learning_rate": 1.9947728569875936e-05, + "loss": 1.0901, "step": 1309 }, { - "epoch": 0.2746907108408471, - "grad_norm": 5.461498148664218, - "learning_rate": 1.9802208767440033e-05, - "loss": 2.2299, + "epoch": 0.1849759954815024, + "grad_norm": 4.173255440490058, + "learning_rate": 1.994757278858095e-05, + "loss": 0.9413, "step": 1310 }, { - "epoch": 0.2749003984063745, - "grad_norm": 5.339881268528538, - "learning_rate": 1.9801760478490722e-05, - "loss": 2.5616, + "epoch": 0.18511719853148828, + "grad_norm": 5.311433751864453, + "learning_rate": 1.9947416776108665e-05, + "loss": 1.2074, "step": 1311 }, { - "epoch": 0.27511008597190184, - "grad_norm": 5.073466426714328, - "learning_rate": 1.9801311687184182e-05, - "loss": 2.3906, + "epoch": 0.18525840158147416, + "grad_norm": 4.6885898969286215, + "learning_rate": 1.994726053246271e-05, + "loss": 1.2217, "step": 1312 }, { - "epoch": 0.27531977353742926, - "grad_norm": 5.755291216600122, - "learning_rate": 1.980086239354342e-05, - "loss": 2.4414, + "epoch": 0.18539960463146005, + "grad_norm": 5.384508304446427, + "learning_rate": 1.9947104057646716e-05, + "loss": 1.1769, "step": 1313 }, { - "epoch": 0.2755294611029566, - "grad_norm": 5.095370180899617, - "learning_rate": 1.9800412597591462e-05, - "loss": 2.4344, + "epoch": 0.18554080768144593, + "grad_norm": 4.938337074046915, + "learning_rate": 1.9946947351664324e-05, + "loss": 1.1609, "step": 1314 }, { - "epoch": 0.275739148668484, - "grad_norm": 5.8522030111540575, - "learning_rate": 1.9799962299351364e-05, - "loss": 2.3958, + "epoch": 0.1856820107314318, + "grad_norm": 4.348731252736528, + "learning_rate": 1.9946790414519167e-05, + "loss": 0.9727, "step": 1315 }, { - "epoch": 0.27594883623401134, - "grad_norm": 5.027363512027019, - "learning_rate": 1.9799511498846193e-05, - "loss": 2.2256, + "epoch": 0.18582321378141767, + "grad_norm": 4.345858913635626, + "learning_rate": 1.99466332462149e-05, + "loss": 1.161, "step": 1316 }, { - "epoch": 0.2761585237995387, - "grad_norm": 4.66448394610109, - "learning_rate": 1.9799060196099066e-05, - "loss": 2.2794, + "epoch": 0.18596441683140355, + "grad_norm": 4.7088288301731085, + "learning_rate": 1.9946475846755166e-05, + "loss": 1.1086, "step": 1317 }, { - "epoch": 0.27636821136506606, - "grad_norm": 5.49077036740618, - "learning_rate": 1.979860839113311e-05, - "loss": 2.386, + "epoch": 0.18610561988138943, + "grad_norm": 4.6905140970151304, + "learning_rate": 1.9946318216143633e-05, + "loss": 1.0715, "step": 1318 }, { - "epoch": 0.2765778989305934, - "grad_norm": 6.534615289486747, - "learning_rate": 1.9798156083971474e-05, - "loss": 2.3167, + "epoch": 0.18624682293137532, + "grad_norm": 4.77404387780695, + "learning_rate": 1.994616035438396e-05, + "loss": 1.2865, "step": 1319 }, { - "epoch": 0.2767875864961208, - "grad_norm": 4.908776706378267, - "learning_rate": 1.9797703274637348e-05, - "loss": 2.2464, + "epoch": 0.1863880259813612, + "grad_norm": 4.586524909983407, + "learning_rate": 1.9946002261479817e-05, + "loss": 1.1532, "step": 1320 }, { - "epoch": 0.27699727406164815, - "grad_norm": 5.154659062106493, - "learning_rate": 1.9797249963153936e-05, - "loss": 2.4426, + "epoch": 0.18652922903134708, + "grad_norm": 4.30123606743428, + "learning_rate": 1.9945843937434875e-05, + "loss": 0.9626, "step": 1321 }, { - "epoch": 0.2772069616271755, - "grad_norm": 5.135014652381726, - "learning_rate": 1.9796796149544472e-05, - "loss": 2.4183, + "epoch": 0.18667043208133297, + "grad_norm": 5.120312756155164, + "learning_rate": 1.9945685382252816e-05, + "loss": 1.1421, "step": 1322 }, { - "epoch": 0.27741664919270287, - "grad_norm": 5.512941845904786, - "learning_rate": 1.979634183383221e-05, - "loss": 2.2915, + "epoch": 0.18681163513131885, + "grad_norm": 5.058646208404037, + "learning_rate": 1.994552659593732e-05, + "loss": 1.2529, "step": 1323 }, { - "epoch": 0.27762633675823023, - "grad_norm": 5.316121042051358, - "learning_rate": 1.979588701604044e-05, - "loss": 2.5303, + "epoch": 0.1869528381813047, + "grad_norm": 4.608940549326115, + "learning_rate": 1.9945367578492085e-05, + "loss": 0.9914, "step": 1324 }, { - "epoch": 0.2778360243237576, - "grad_norm": 5.832968606183224, - "learning_rate": 1.9795431696192475e-05, - "loss": 2.3015, + "epoch": 0.1870940412312906, + "grad_norm": 5.069868504857425, + "learning_rate": 1.9945208329920802e-05, + "loss": 1.153, "step": 1325 }, { - "epoch": 0.27804571188928495, - "grad_norm": 5.184487745454246, - "learning_rate": 1.979497587431164e-05, - "loss": 2.3555, + "epoch": 0.18723524428127647, + "grad_norm": 4.399188099857936, + "learning_rate": 1.994504885022717e-05, + "loss": 0.9999, "step": 1326 }, { - "epoch": 0.2782553994548123, - "grad_norm": 5.232871375189829, - "learning_rate": 1.9794519550421305e-05, - "loss": 2.2517, + "epoch": 0.18737644733126235, + "grad_norm": 4.930740486047185, + "learning_rate": 1.9944889139414904e-05, + "loss": 1.1089, "step": 1327 }, { - "epoch": 0.2784650870203397, - "grad_norm": 5.5910032617212, - "learning_rate": 1.9794062724544855e-05, - "loss": 2.0663, + "epoch": 0.18751765038124824, + "grad_norm": 5.3975213730845, + "learning_rate": 1.9944729197487702e-05, + "loss": 1.1878, "step": 1328 }, { - "epoch": 0.27867477458586704, - "grad_norm": 5.753176372874745, - "learning_rate": 1.9793605396705706e-05, - "loss": 2.4483, + "epoch": 0.18765885343123412, + "grad_norm": 4.821977108353716, + "learning_rate": 1.994456902444929e-05, + "loss": 1.2141, "step": 1329 }, { - "epoch": 0.2788844621513944, - "grad_norm": 6.848631648726224, - "learning_rate": 1.979314756692729e-05, - "loss": 2.5071, + "epoch": 0.18780005648122, + "grad_norm": 5.018021630479794, + "learning_rate": 1.9944408620303393e-05, + "loss": 1.0732, "step": 1330 }, { - "epoch": 0.2790941497169218, - "grad_norm": 5.706418835675507, - "learning_rate": 1.9792689235233075e-05, - "loss": 2.1219, + "epoch": 0.18794125953120588, + "grad_norm": 4.954923731787435, + "learning_rate": 1.994424798505373e-05, + "loss": 1.1086, "step": 1331 }, { - "epoch": 0.2793038372824492, - "grad_norm": 5.833301559624962, - "learning_rate": 1.9792230401646556e-05, - "loss": 2.4436, + "epoch": 0.18808246258119177, + "grad_norm": 5.214749913426401, + "learning_rate": 1.994408711870404e-05, + "loss": 1.3411, "step": 1332 }, { - "epoch": 0.27951352484797654, - "grad_norm": 5.532161417154169, - "learning_rate": 1.9791771066191244e-05, - "loss": 2.4784, + "epoch": 0.18822366563117762, + "grad_norm": 4.390996133137107, + "learning_rate": 1.994392602125806e-05, + "loss": 1.1271, "step": 1333 }, { - "epoch": 0.2797232124135039, - "grad_norm": 5.951944046329034, - "learning_rate": 1.9791311228890682e-05, - "loss": 2.3727, + "epoch": 0.1883648686811635, + "grad_norm": 4.394339174489535, + "learning_rate": 1.9943764692719535e-05, + "loss": 0.9938, "step": 1334 }, { - "epoch": 0.27993289997903126, - "grad_norm": 5.629058746810909, - "learning_rate": 1.9790850889768436e-05, - "loss": 2.635, + "epoch": 0.1885060717311494, + "grad_norm": 4.210998121060827, + "learning_rate": 1.9943603133092208e-05, + "loss": 0.9607, "step": 1335 }, { - "epoch": 0.2801425875445586, - "grad_norm": 6.3772229118586505, - "learning_rate": 1.9790390048848104e-05, - "loss": 2.4768, + "epoch": 0.18864727478113527, + "grad_norm": 4.380465402443947, + "learning_rate": 1.9943441342379843e-05, + "loss": 0.9735, "step": 1336 }, { - "epoch": 0.280352275110086, - "grad_norm": 6.9499510630635974, - "learning_rate": 1.9789928706153297e-05, - "loss": 2.392, + "epoch": 0.18878847783112115, + "grad_norm": 4.38393020161247, + "learning_rate": 1.9943279320586194e-05, + "loss": 1.0468, "step": 1337 }, { - "epoch": 0.28056196267561334, - "grad_norm": 5.175022399981712, - "learning_rate": 1.9789466861707666e-05, - "loss": 2.5344, + "epoch": 0.18892968088110704, + "grad_norm": 4.58730758032554, + "learning_rate": 1.994311706771503e-05, + "loss": 1.1235, "step": 1338 }, { - "epoch": 0.2807716502411407, - "grad_norm": 5.267937924448901, - "learning_rate": 1.978900451553488e-05, - "loss": 2.3307, + "epoch": 0.18907088393109292, + "grad_norm": 4.698073968088262, + "learning_rate": 1.9942954583770116e-05, + "loss": 1.0193, "step": 1339 }, { - "epoch": 0.28098133780666806, - "grad_norm": 5.876642090105805, - "learning_rate": 1.9788541667658638e-05, - "loss": 2.4386, + "epoch": 0.1892120869810788, + "grad_norm": 4.403201393664041, + "learning_rate": 1.9942791868755233e-05, + "loss": 1.093, "step": 1340 }, { - "epoch": 0.2811910253721954, - "grad_norm": 6.409002022908709, - "learning_rate": 1.9788078318102652e-05, - "loss": 2.3743, + "epoch": 0.18935329003106466, + "grad_norm": 5.162686923005779, + "learning_rate": 1.994262892267416e-05, + "loss": 1.3125, "step": 1341 }, { - "epoch": 0.2814007129377228, - "grad_norm": 6.38620051212082, - "learning_rate": 1.9787614466890678e-05, - "loss": 2.7258, + "epoch": 0.18949449308105054, + "grad_norm": 5.377391322826577, + "learning_rate": 1.9942465745530687e-05, + "loss": 1.181, "step": 1342 }, { - "epoch": 0.28161040050325015, - "grad_norm": 5.79571050887188, - "learning_rate": 1.9787150114046487e-05, - "loss": 2.4201, + "epoch": 0.18963569613103642, + "grad_norm": 4.564608012748201, + "learning_rate": 1.99423023373286e-05, + "loss": 1.0372, "step": 1343 }, { - "epoch": 0.2818200880687775, - "grad_norm": 6.101610185361124, - "learning_rate": 1.9786685259593877e-05, - "loss": 2.3834, + "epoch": 0.1897768991810223, + "grad_norm": 5.1526342850790074, + "learning_rate": 1.99421386980717e-05, + "loss": 1.2143, "step": 1344 }, { - "epoch": 0.28202977563430487, - "grad_norm": 5.823266309508754, - "learning_rate": 1.9786219903556675e-05, - "loss": 2.3987, + "epoch": 0.1899181022310082, + "grad_norm": 4.890502070791986, + "learning_rate": 1.9941974827763797e-05, + "loss": 1.0797, "step": 1345 }, { - "epoch": 0.28223946319983223, - "grad_norm": 5.769019618170738, - "learning_rate": 1.9785754045958727e-05, - "loss": 2.3386, + "epoch": 0.19005930528099407, + "grad_norm": 4.296482830134802, + "learning_rate": 1.994181072640869e-05, + "loss": 0.9844, "step": 1346 }, { - "epoch": 0.2824491507653596, - "grad_norm": 5.496938248960908, - "learning_rate": 1.9785287686823912e-05, - "loss": 2.27, + "epoch": 0.19020050833097996, + "grad_norm": 4.3273981564878055, + "learning_rate": 1.9941646394010194e-05, + "loss": 1.1888, "step": 1347 }, { - "epoch": 0.28265883833088695, - "grad_norm": 7.040263575470214, - "learning_rate": 1.9784820826176132e-05, - "loss": 2.7362, + "epoch": 0.19034171138096584, + "grad_norm": 5.159120481243468, + "learning_rate": 1.9941481830572126e-05, + "loss": 1.2561, "step": 1348 }, { - "epoch": 0.28286852589641437, - "grad_norm": 5.237667922569154, - "learning_rate": 1.9784353464039314e-05, - "loss": 2.3929, + "epoch": 0.19048291443095172, + "grad_norm": 3.8535509932377887, + "learning_rate": 1.994131703609832e-05, + "loss": 0.9681, "step": 1349 }, { - "epoch": 0.28307821346194173, - "grad_norm": 6.12180871962915, - "learning_rate": 1.9783885600437412e-05, - "loss": 2.4483, + "epoch": 0.19062411748093758, + "grad_norm": 5.636729918625148, + "learning_rate": 1.9941152010592594e-05, + "loss": 1.2005, "step": 1350 }, { - "epoch": 0.2832879010274691, - "grad_norm": 5.841388344868005, - "learning_rate": 1.97834172353944e-05, - "loss": 2.4532, + "epoch": 0.19076532053092346, + "grad_norm": 4.234432154714101, + "learning_rate": 1.9940986754058792e-05, + "loss": 1.0525, "step": 1351 }, { - "epoch": 0.28349758859299645, - "grad_norm": 6.830689426606595, - "learning_rate": 1.978294836893429e-05, - "loss": 2.5169, + "epoch": 0.19090652358090934, + "grad_norm": 4.45955658381661, + "learning_rate": 1.994082126650075e-05, + "loss": 1.0848, "step": 1352 }, { - "epoch": 0.2837072761585238, - "grad_norm": 5.406962651623831, - "learning_rate": 1.9782479001081107e-05, - "loss": 2.4342, + "epoch": 0.19104772663089523, + "grad_norm": 4.495511389432425, + "learning_rate": 1.9940655547922314e-05, + "loss": 1.169, "step": 1353 }, { - "epoch": 0.2839169637240512, - "grad_norm": 5.287942940600494, - "learning_rate": 1.978200913185891e-05, - "loss": 2.1069, + "epoch": 0.1911889296808811, + "grad_norm": 3.966821731365161, + "learning_rate": 1.9940489598327336e-05, + "loss": 0.9496, "step": 1354 }, { - "epoch": 0.28412665128957854, - "grad_norm": 5.480016943633358, - "learning_rate": 1.9781538761291777e-05, - "loss": 2.5337, + "epoch": 0.191330132730867, + "grad_norm": 4.988245421127051, + "learning_rate": 1.9940323417719677e-05, + "loss": 1.2218, "step": 1355 }, { - "epoch": 0.2843363388551059, - "grad_norm": 5.063895214655192, - "learning_rate": 1.9781067889403815e-05, - "loss": 2.154, + "epoch": 0.19147133578085287, + "grad_norm": 5.077921266949037, + "learning_rate": 1.994015700610319e-05, + "loss": 1.0569, "step": 1356 }, { - "epoch": 0.28454602642063326, - "grad_norm": 5.439974558336936, - "learning_rate": 1.9780596516219165e-05, - "loss": 2.4046, + "epoch": 0.19161253883083876, + "grad_norm": 4.256113929917655, + "learning_rate": 1.9939990363481748e-05, + "loss": 1.0854, "step": 1357 }, { - "epoch": 0.2847557139861606, - "grad_norm": 5.823023622451933, - "learning_rate": 1.9780124641761976e-05, - "loss": 2.3336, + "epoch": 0.1917537418808246, + "grad_norm": 4.380369823664638, + "learning_rate": 1.9939823489859226e-05, + "loss": 1.2463, "step": 1358 }, { - "epoch": 0.284965401551688, - "grad_norm": 5.030387248568188, - "learning_rate": 1.9779652266056435e-05, - "loss": 2.1636, + "epoch": 0.1918949449308105, + "grad_norm": 5.8405042242999725, + "learning_rate": 1.9939656385239493e-05, + "loss": 1.1355, "step": 1359 }, { - "epoch": 0.28517508911721534, - "grad_norm": 5.725925329443499, - "learning_rate": 1.9779179389126758e-05, - "loss": 2.3717, + "epoch": 0.19203614798079638, + "grad_norm": 4.076259785178609, + "learning_rate": 1.993948904962644e-05, + "loss": 1.1287, "step": 1360 }, { - "epoch": 0.2853847766827427, - "grad_norm": 5.757928748102482, - "learning_rate": 1.977870601099717e-05, - "loss": 2.4127, + "epoch": 0.19217735103078226, + "grad_norm": 3.9368530576347145, + "learning_rate": 1.9939321483023958e-05, + "loss": 0.8798, "step": 1361 }, { - "epoch": 0.28559446424827006, - "grad_norm": 5.375997260570959, - "learning_rate": 1.9778232131691947e-05, - "loss": 2.4381, + "epoch": 0.19231855408076814, + "grad_norm": 4.780702129696289, + "learning_rate": 1.9939153685435932e-05, + "loss": 1.1728, "step": 1362 }, { - "epoch": 0.2858041518137974, - "grad_norm": 5.5983631364185875, - "learning_rate": 1.977775775123536e-05, - "loss": 2.3248, + "epoch": 0.19245975713075403, + "grad_norm": 5.237488985157414, + "learning_rate": 1.993898565686627e-05, + "loss": 1.2618, "step": 1363 }, { - "epoch": 0.2860138393793248, - "grad_norm": 5.207252969281748, - "learning_rate": 1.9777282869651734e-05, - "loss": 2.3333, + "epoch": 0.1926009601807399, + "grad_norm": 4.486296177813556, + "learning_rate": 1.9938817397318872e-05, + "loss": 1.0266, "step": 1364 }, { - "epoch": 0.28622352694485215, - "grad_norm": 5.679604890805039, - "learning_rate": 1.9776807486965404e-05, - "loss": 2.5376, + "epoch": 0.1927421632307258, + "grad_norm": 4.884647580743861, + "learning_rate": 1.993864890679765e-05, + "loss": 1.2699, "step": 1365 }, { - "epoch": 0.2864332145103795, - "grad_norm": 5.16206243131396, - "learning_rate": 1.9776331603200728e-05, - "loss": 2.1735, + "epoch": 0.19288336628071168, + "grad_norm": 5.06937477857426, + "learning_rate": 1.993848018530652e-05, + "loss": 1.1504, "step": 1366 }, { - "epoch": 0.2866429020759069, - "grad_norm": 4.679696428349341, - "learning_rate": 1.9775855218382104e-05, - "loss": 2.3827, + "epoch": 0.19302456933069753, + "grad_norm": 5.2310621854171435, + "learning_rate": 1.9938311232849403e-05, + "loss": 1.196, "step": 1367 }, { - "epoch": 0.2868525896414343, - "grad_norm": 5.840075579797279, - "learning_rate": 1.9775378332533948e-05, - "loss": 2.2297, + "epoch": 0.19316577238068341, + "grad_norm": 4.445733762014927, + "learning_rate": 1.9938142049430223e-05, + "loss": 1.1346, "step": 1368 }, { - "epoch": 0.28706227720696165, - "grad_norm": 6.893413814083934, - "learning_rate": 1.9774900945680694e-05, - "loss": 2.0948, + "epoch": 0.1933069754306693, + "grad_norm": 4.961584455490589, + "learning_rate": 1.9937972635052918e-05, + "loss": 1.1495, "step": 1369 }, { - "epoch": 0.287271964772489, - "grad_norm": 5.296062084816024, - "learning_rate": 1.9774423057846812e-05, - "loss": 2.35, + "epoch": 0.19344817848065518, + "grad_norm": 4.6009485669165695, + "learning_rate": 1.9937802989721417e-05, + "loss": 1.1562, "step": 1370 }, { - "epoch": 0.28748165233801637, - "grad_norm": 5.75696701292084, - "learning_rate": 1.97739446690568e-05, - "loss": 2.524, + "epoch": 0.19358938153064106, + "grad_norm": 4.786603149168164, + "learning_rate": 1.9937633113439667e-05, + "loss": 1.1402, "step": 1371 }, { - "epoch": 0.28769133990354373, - "grad_norm": 6.774835984463509, - "learning_rate": 1.9773465779335168e-05, - "loss": 2.251, + "epoch": 0.19373058458062695, + "grad_norm": 5.215685371857311, + "learning_rate": 1.9937463006211617e-05, + "loss": 1.2459, "step": 1372 }, { - "epoch": 0.2879010274690711, - "grad_norm": 5.918354258554908, - "learning_rate": 1.9772986388706466e-05, - "loss": 2.6306, + "epoch": 0.19387178763061283, + "grad_norm": 4.219953271400533, + "learning_rate": 1.9937292668041216e-05, + "loss": 1.0828, "step": 1373 }, { - "epoch": 0.28811071503459845, - "grad_norm": 5.484237689896832, - "learning_rate": 1.9772506497195256e-05, - "loss": 2.3988, + "epoch": 0.1940129906805987, + "grad_norm": 4.439440428967038, + "learning_rate": 1.9937122098932428e-05, + "loss": 0.9913, "step": 1374 }, { - "epoch": 0.2883204026001258, - "grad_norm": 6.18271467119657, - "learning_rate": 1.9772026104826146e-05, - "loss": 2.269, + "epoch": 0.19415419373058457, + "grad_norm": 4.157030725787838, + "learning_rate": 1.993695129888921e-05, + "loss": 1.0894, "step": 1375 }, { - "epoch": 0.2885300901656532, - "grad_norm": 5.073049190896248, - "learning_rate": 1.9771545211623743e-05, - "loss": 2.3343, + "epoch": 0.19429539678057045, + "grad_norm": 34.570734153991495, + "learning_rate": 1.993678026791554e-05, + "loss": 1.2165, "step": 1376 }, { - "epoch": 0.28873977773118054, - "grad_norm": 5.521289749311444, - "learning_rate": 1.9771063817612704e-05, - "loss": 2.4625, + "epoch": 0.19443659983055633, + "grad_norm": 4.359256269584173, + "learning_rate": 1.9936609006015383e-05, + "loss": 1.0305, "step": 1377 }, { - "epoch": 0.2889494652967079, - "grad_norm": 5.010207279435596, - "learning_rate": 1.9770581922817695e-05, - "loss": 2.2395, + "epoch": 0.19457780288054222, + "grad_norm": 4.502427167387434, + "learning_rate": 1.9936437513192728e-05, + "loss": 1.1464, "step": 1378 }, { - "epoch": 0.28915915286223526, - "grad_norm": 5.677074069196393, - "learning_rate": 1.9770099527263417e-05, - "loss": 2.5001, + "epoch": 0.1947190059305281, + "grad_norm": 4.256541823699716, + "learning_rate": 1.9936265789451554e-05, + "loss": 1.236, "step": 1379 }, { - "epoch": 0.2893688404277626, - "grad_norm": 6.312993090262843, - "learning_rate": 1.9769616630974594e-05, - "loss": 2.3592, + "epoch": 0.19486020898051398, + "grad_norm": 4.698197044398401, + "learning_rate": 1.9936093834795853e-05, + "loss": 1.115, "step": 1380 }, { - "epoch": 0.28957852799329, - "grad_norm": 5.276873511139441, - "learning_rate": 1.976913323397597e-05, - "loss": 2.2, + "epoch": 0.19500141203049987, + "grad_norm": 4.587669651026083, + "learning_rate": 1.9935921649229626e-05, + "loss": 1.0151, "step": 1381 }, { - "epoch": 0.28978821555881734, - "grad_norm": 5.188193178326891, - "learning_rate": 1.9768649336292327e-05, - "loss": 2.5627, + "epoch": 0.19514261508048575, + "grad_norm": 5.122064895287609, + "learning_rate": 1.9935749232756868e-05, + "loss": 1.152, "step": 1382 }, { - "epoch": 0.2899979031243447, - "grad_norm": 5.077044232057226, - "learning_rate": 1.9768164937948463e-05, - "loss": 2.2167, + "epoch": 0.19528381813047163, + "grad_norm": 5.884767488767738, + "learning_rate": 1.993557658538159e-05, + "loss": 1.1704, "step": 1383 }, { - "epoch": 0.29020759068987206, - "grad_norm": 5.304612498918937, - "learning_rate": 1.9767680038969207e-05, - "loss": 2.3809, + "epoch": 0.1954250211804575, + "grad_norm": 4.55416969934983, + "learning_rate": 1.9935403707107804e-05, + "loss": 0.9859, "step": 1384 }, { - "epoch": 0.2904172782553995, - "grad_norm": 5.075595825045641, - "learning_rate": 1.9767194639379405e-05, - "loss": 2.2904, + "epoch": 0.19556622423044337, + "grad_norm": 4.093946759835486, + "learning_rate": 1.9935230597939525e-05, + "loss": 1.0983, "step": 1385 }, { - "epoch": 0.29062696582092684, - "grad_norm": 5.545721663532519, - "learning_rate": 1.9766708739203935e-05, - "loss": 2.4336, + "epoch": 0.19570742728042925, + "grad_norm": 4.495323307741869, + "learning_rate": 1.9935057257880777e-05, + "loss": 0.9505, "step": 1386 }, { - "epoch": 0.2908366533864542, - "grad_norm": 5.264407927054516, - "learning_rate": 1.9766222338467705e-05, - "loss": 2.3763, + "epoch": 0.19584863033041514, + "grad_norm": 5.834897892507581, + "learning_rate": 1.993488368693559e-05, + "loss": 1.3045, "step": 1387 }, { - "epoch": 0.29104634095198156, - "grad_norm": 5.080513593407458, - "learning_rate": 1.9765735437195638e-05, - "loss": 2.2608, + "epoch": 0.19598983338040102, + "grad_norm": 4.338397488445801, + "learning_rate": 1.9934709885108e-05, + "loss": 1.1545, "step": 1388 }, { - "epoch": 0.2912560285175089, - "grad_norm": 5.1577538582083635, - "learning_rate": 1.9765248035412696e-05, - "loss": 2.3994, + "epoch": 0.1961310364303869, + "grad_norm": 4.810445045051072, + "learning_rate": 1.9934535852402038e-05, + "loss": 1.1318, "step": 1389 }, { - "epoch": 0.2914657160830363, - "grad_norm": 5.187958189933125, - "learning_rate": 1.9764760133143857e-05, - "loss": 2.2776, + "epoch": 0.19627223948037278, + "grad_norm": 4.378292848894843, + "learning_rate": 1.9934361588821757e-05, + "loss": 1.0782, "step": 1390 }, { - "epoch": 0.29167540364856365, - "grad_norm": 7.465843409221242, - "learning_rate": 1.9764271730414122e-05, - "loss": 2.8008, + "epoch": 0.19641344253035867, + "grad_norm": 5.746582632715619, + "learning_rate": 1.9934187094371197e-05, + "loss": 1.3311, "step": 1391 }, { - "epoch": 0.291885091214091, - "grad_norm": 5.463369237727748, - "learning_rate": 1.9763782827248527e-05, - "loss": 2.4756, + "epoch": 0.19655464558034452, + "grad_norm": 5.540869649696159, + "learning_rate": 1.9934012369054422e-05, + "loss": 1.2732, "step": 1392 }, { - "epoch": 0.29209477877961837, - "grad_norm": 5.702296479020473, - "learning_rate": 1.9763293423672123e-05, - "loss": 2.3126, + "epoch": 0.1966958486303304, + "grad_norm": 5.174596563827367, + "learning_rate": 1.9933837412875493e-05, + "loss": 1.3111, "step": 1393 }, { - "epoch": 0.29230446634514573, - "grad_norm": 5.464472570622636, - "learning_rate": 1.9762803519710002e-05, - "loss": 2.4072, + "epoch": 0.1968370516803163, + "grad_norm": 5.298528945248415, + "learning_rate": 1.993366222583847e-05, + "loss": 1.1612, "step": 1394 }, { - "epoch": 0.2925141539106731, - "grad_norm": 5.334102500625808, - "learning_rate": 1.9762313115387268e-05, - "loss": 2.53, + "epoch": 0.19697825473030217, + "grad_norm": 5.076095831294379, + "learning_rate": 1.9933486807947425e-05, + "loss": 1.1071, "step": 1395 }, { - "epoch": 0.29272384147620045, - "grad_norm": 5.471228197868595, - "learning_rate": 1.9761822210729052e-05, - "loss": 2.1569, + "epoch": 0.19711945778028805, + "grad_norm": 4.6502752061493275, + "learning_rate": 1.993331115920644e-05, + "loss": 1.1118, "step": 1396 }, { - "epoch": 0.2929335290417278, - "grad_norm": 4.937720654859672, - "learning_rate": 1.976133080576052e-05, - "loss": 2.1694, + "epoch": 0.19726066083027394, + "grad_norm": 4.25801742623009, + "learning_rate": 1.9933135279619592e-05, + "loss": 0.9641, "step": 1397 }, { - "epoch": 0.2931432166072552, - "grad_norm": 4.893923206418719, - "learning_rate": 1.976083890050685e-05, - "loss": 2.3735, + "epoch": 0.19740186388025982, + "grad_norm": 3.800037628500738, + "learning_rate": 1.993295916919097e-05, + "loss": 0.9734, "step": 1398 }, { - "epoch": 0.29335290417278254, - "grad_norm": 6.8811178992586175, - "learning_rate": 1.976034649499326e-05, - "loss": 2.5713, + "epoch": 0.1975430669302457, + "grad_norm": 4.459562974299471, + "learning_rate": 1.9932782827924667e-05, + "loss": 0.99, "step": 1399 }, { - "epoch": 0.2935625917383099, - "grad_norm": 5.492288795082419, - "learning_rate": 1.9759853589244984e-05, - "loss": 2.3401, + "epoch": 0.1976842699802316, + "grad_norm": 4.9326287738430175, + "learning_rate": 1.993260625582478e-05, + "loss": 1.1598, "step": 1400 }, { - "epoch": 0.29377227930383726, - "grad_norm": 5.821463970397368, - "learning_rate": 1.9759360183287284e-05, - "loss": 2.2034, + "epoch": 0.19782547303021744, + "grad_norm": 5.796397860227611, + "learning_rate": 1.9932429452895413e-05, + "loss": 1.2993, "step": 1401 }, { - "epoch": 0.2939819668693646, - "grad_norm": 5.366478947306584, - "learning_rate": 1.9758866277145445e-05, - "loss": 2.0132, + "epoch": 0.19796667608020332, + "grad_norm": 4.433274851023001, + "learning_rate": 1.9932252419140675e-05, + "loss": 1.0181, "step": 1402 }, { - "epoch": 0.29419165443489204, - "grad_norm": 5.246015318044939, - "learning_rate": 1.9758371870844786e-05, - "loss": 2.435, + "epoch": 0.1981078791301892, + "grad_norm": 4.844598420948536, + "learning_rate": 1.9932075154564682e-05, + "loss": 1.0453, "step": 1403 }, { - "epoch": 0.2944013420004194, - "grad_norm": 5.864002220307482, - "learning_rate": 1.9757876964410643e-05, - "loss": 2.3031, + "epoch": 0.1982490821801751, + "grad_norm": 4.465201641543277, + "learning_rate": 1.993189765917155e-05, + "loss": 1.054, "step": 1404 }, { - "epoch": 0.29461102956594676, - "grad_norm": 6.511298799388658, - "learning_rate": 1.9757381557868384e-05, - "loss": 2.4059, + "epoch": 0.19839028523016097, + "grad_norm": 4.4696411377702105, + "learning_rate": 1.9931719932965405e-05, + "loss": 1.3337, "step": 1405 }, { - "epoch": 0.2948207171314741, - "grad_norm": 5.624179605917954, - "learning_rate": 1.9756885651243395e-05, - "loss": 2.4428, + "epoch": 0.19853148828014686, + "grad_norm": 4.601386120181231, + "learning_rate": 1.993154197595038e-05, + "loss": 1.0814, "step": 1406 }, { - "epoch": 0.2950304046970015, - "grad_norm": 5.547381298034921, - "learning_rate": 1.975638924456109e-05, - "loss": 2.3485, + "epoch": 0.19867269133013274, + "grad_norm": 4.206286431512165, + "learning_rate": 1.9931363788130607e-05, + "loss": 0.9872, "step": 1407 }, { - "epoch": 0.29524009226252884, - "grad_norm": 6.130195742780277, - "learning_rate": 1.975589233784692e-05, - "loss": 2.5537, + "epoch": 0.19881389438011862, + "grad_norm": 3.859504779739252, + "learning_rate": 1.9931185369510228e-05, + "loss": 0.8377, "step": 1408 }, { - "epoch": 0.2954497798280562, - "grad_norm": 5.924402322305737, - "learning_rate": 1.9755394931126345e-05, - "loss": 1.9947, + "epoch": 0.19895509743010448, + "grad_norm": 4.755391681682118, + "learning_rate": 1.993100672009339e-05, + "loss": 1.1062, "step": 1409 }, { - "epoch": 0.29565946739358356, - "grad_norm": 5.112370655468548, - "learning_rate": 1.9754897024424857e-05, - "loss": 2.2103, + "epoch": 0.19909630048009036, + "grad_norm": 4.798617143925379, + "learning_rate": 1.993082783988425e-05, + "loss": 1.0188, "step": 1410 }, { - "epoch": 0.2958691549591109, - "grad_norm": 5.9626419276039, - "learning_rate": 1.9754398617767982e-05, - "loss": 2.5006, + "epoch": 0.19923750353007624, + "grad_norm": 4.611536969524065, + "learning_rate": 1.9930648728886952e-05, + "loss": 1.0135, "step": 1411 }, { - "epoch": 0.2960788425246383, - "grad_norm": 5.970263228310468, - "learning_rate": 1.9753899711181258e-05, - "loss": 2.3722, + "epoch": 0.19937870658006213, + "grad_norm": 5.150334462883534, + "learning_rate": 1.993046938710567e-05, + "loss": 1.0598, "step": 1412 }, { - "epoch": 0.29628853009016565, - "grad_norm": 5.7557982346698635, - "learning_rate": 1.9753400304690256e-05, - "loss": 2.4389, + "epoch": 0.199519909630048, + "grad_norm": 4.996195006898983, + "learning_rate": 1.993028981454457e-05, + "loss": 1.2022, "step": 1413 }, { - "epoch": 0.296498217655693, - "grad_norm": 5.3759009942366, - "learning_rate": 1.9752900398320573e-05, - "loss": 2.5187, + "epoch": 0.1996611126800339, + "grad_norm": 4.488016184218211, + "learning_rate": 1.993011001120782e-05, + "loss": 1.1398, "step": 1414 }, { - "epoch": 0.29670790522122037, - "grad_norm": 6.178018382512329, - "learning_rate": 1.9752399992097827e-05, - "loss": 2.5569, + "epoch": 0.19980231573001977, + "grad_norm": 4.165576760566369, + "learning_rate": 1.9929929977099603e-05, + "loss": 1.1742, "step": 1415 }, { - "epoch": 0.29691759278674773, - "grad_norm": 5.11597217931808, - "learning_rate": 1.975189908604767e-05, - "loss": 2.3324, + "epoch": 0.19994351878000566, + "grad_norm": 6.499337496573019, + "learning_rate": 1.9929749712224104e-05, + "loss": 1.2797, "step": 1416 }, { - "epoch": 0.2971272803522751, - "grad_norm": 5.759278113157731, - "learning_rate": 1.975139768019577e-05, - "loss": 2.3471, + "epoch": 0.20008472182999154, + "grad_norm": 4.45437976331422, + "learning_rate": 1.9929569216585506e-05, + "loss": 1.0323, "step": 1417 }, { - "epoch": 0.29733696791780245, - "grad_norm": 5.333804587512334, - "learning_rate": 1.9750895774567822e-05, - "loss": 2.3706, + "epoch": 0.2002259248799774, + "grad_norm": 4.607436291322566, + "learning_rate": 1.992938849018801e-05, + "loss": 1.0396, "step": 1418 }, { - "epoch": 0.2975466554833298, - "grad_norm": 5.450258079150982, - "learning_rate": 1.9750393369189556e-05, - "loss": 2.3107, + "epoch": 0.20036712792996328, + "grad_norm": 4.8726097691886325, + "learning_rate": 1.9929207533035812e-05, + "loss": 0.995, "step": 1419 }, { - "epoch": 0.29775634304885723, - "grad_norm": 5.847153736487412, - "learning_rate": 1.974989046408672e-05, - "loss": 2.134, + "epoch": 0.20050833097994916, + "grad_norm": 3.730693804058299, + "learning_rate": 1.992902634513312e-05, + "loss": 0.9677, "step": 1420 }, { - "epoch": 0.2979660306143846, - "grad_norm": 5.29119410342301, - "learning_rate": 1.9749387059285085e-05, - "loss": 2.2594, + "epoch": 0.20064953402993504, + "grad_norm": 4.24225325135271, + "learning_rate": 1.9928844926484145e-05, + "loss": 1.0331, "step": 1421 }, { - "epoch": 0.29817571817991195, - "grad_norm": 5.724515865347008, - "learning_rate": 1.974888315481046e-05, - "loss": 2.2891, + "epoch": 0.20079073707992093, + "grad_norm": 4.519568897635765, + "learning_rate": 1.99286632770931e-05, + "loss": 1.1464, "step": 1422 }, { - "epoch": 0.2983854057454393, - "grad_norm": 6.034746892094913, - "learning_rate": 1.9748378750688657e-05, - "loss": 2.3084, + "epoch": 0.2009319401299068, + "grad_norm": 4.969497536701674, + "learning_rate": 1.992848139696421e-05, + "loss": 1.2325, "step": 1423 }, { - "epoch": 0.2985950933109667, - "grad_norm": 5.377297012841492, - "learning_rate": 1.9747873846945537e-05, - "loss": 1.9538, + "epoch": 0.2010731431798927, + "grad_norm": 5.022407721128527, + "learning_rate": 1.9928299286101696e-05, + "loss": 1.0214, "step": 1424 }, { - "epoch": 0.29880478087649404, - "grad_norm": 5.458174296259753, - "learning_rate": 1.974736844360698e-05, - "loss": 2.3644, + "epoch": 0.20121434622987858, + "grad_norm": 5.242440192214647, + "learning_rate": 1.99281169445098e-05, + "loss": 1.2644, "step": 1425 }, { - "epoch": 0.2990144684420214, - "grad_norm": 6.1247899748716925, - "learning_rate": 1.9746862540698882e-05, - "loss": 2.1765, + "epoch": 0.20135554927986443, + "grad_norm": 4.893282100137156, + "learning_rate": 1.992793437219275e-05, + "loss": 0.9517, "step": 1426 }, { - "epoch": 0.29922415600754876, - "grad_norm": 5.7119420216059185, - "learning_rate": 1.9746356138247172e-05, - "loss": 2.5048, + "epoch": 0.20149675232985031, + "grad_norm": 4.950594188754899, + "learning_rate": 1.9927751569154795e-05, + "loss": 1.2253, "step": 1427 }, { - "epoch": 0.2994338435730761, - "grad_norm": 6.398661353954476, - "learning_rate": 1.9745849236277805e-05, - "loss": 2.2233, + "epoch": 0.2016379553798362, + "grad_norm": 4.173985614297457, + "learning_rate": 1.992756853540018e-05, + "loss": 1.015, "step": 1428 }, { - "epoch": 0.2996435311386035, - "grad_norm": 5.430975759887312, - "learning_rate": 1.974534183481676e-05, - "loss": 2.3021, + "epoch": 0.20177915842982208, + "grad_norm": 4.090141921959075, + "learning_rate": 1.992738527093316e-05, + "loss": 1.0505, "step": 1429 }, { - "epoch": 0.29985321870413084, - "grad_norm": 6.358817732784684, - "learning_rate": 1.9744833933890048e-05, - "loss": 2.2365, + "epoch": 0.20192036147980796, + "grad_norm": 5.561530417477826, + "learning_rate": 1.9927201775757995e-05, + "loss": 1.2441, "step": 1430 }, { - "epoch": 0.3000629062696582, - "grad_norm": 5.2373038692391924, - "learning_rate": 1.9744325533523697e-05, - "loss": 2.2837, + "epoch": 0.20206156452979385, + "grad_norm": 4.655025890670126, + "learning_rate": 1.9927018049878945e-05, + "loss": 1.1776, "step": 1431 }, { - "epoch": 0.30027259383518556, - "grad_norm": 5.444389155516945, - "learning_rate": 1.974381663374376e-05, - "loss": 2.311, + "epoch": 0.20220276757977973, + "grad_norm": 4.126348351315558, + "learning_rate": 1.9926834093300283e-05, + "loss": 1.0454, "step": 1432 }, { - "epoch": 0.3004822814007129, - "grad_norm": 5.230672184753493, - "learning_rate": 1.9743307234576315e-05, - "loss": 2.2068, + "epoch": 0.2023439706297656, + "grad_norm": 5.280453087456826, + "learning_rate": 1.9926649906026285e-05, + "loss": 1.3948, "step": 1433 }, { - "epoch": 0.3006919689662403, - "grad_norm": 4.93466298469086, - "learning_rate": 1.9742797336047483e-05, - "loss": 2.3903, + "epoch": 0.2024851736797515, + "grad_norm": 4.799321396686698, + "learning_rate": 1.9926465488061232e-05, + "loss": 1.2189, "step": 1434 }, { - "epoch": 0.30090165653176765, - "grad_norm": 5.512805175024408, - "learning_rate": 1.9742286938183383e-05, - "loss": 2.431, + "epoch": 0.20262637672973735, + "grad_norm": 4.050827720164738, + "learning_rate": 1.9926280839409405e-05, + "loss": 0.9316, "step": 1435 }, { - "epoch": 0.301111344097295, - "grad_norm": 4.83193287888066, - "learning_rate": 1.9741776041010183e-05, - "loss": 2.1003, + "epoch": 0.20276757977972323, + "grad_norm": 4.28213369161842, + "learning_rate": 1.9926095960075097e-05, + "loss": 1.1436, "step": 1436 }, { - "epoch": 0.30132103166282237, - "grad_norm": 6.1005931067695585, - "learning_rate": 1.9741264644554067e-05, - "loss": 2.3219, + "epoch": 0.20290878282970912, + "grad_norm": 4.570101208221942, + "learning_rate": 1.9925910850062607e-05, + "loss": 1.1577, "step": 1437 }, { - "epoch": 0.3015307192283498, - "grad_norm": 5.0592401441334225, - "learning_rate": 1.974075274884124e-05, - "loss": 2.3954, + "epoch": 0.203049985879695, + "grad_norm": 4.011418407683016, + "learning_rate": 1.9925725509376236e-05, + "loss": 0.8617, "step": 1438 }, { - "epoch": 0.30174040679387715, - "grad_norm": 4.994985346391976, - "learning_rate": 1.974024035389794e-05, - "loss": 2.4175, + "epoch": 0.20319118892968088, + "grad_norm": 5.096557054767951, + "learning_rate": 1.9925539938020292e-05, + "loss": 1.1157, "step": 1439 }, { - "epoch": 0.3019500943594045, - "grad_norm": 5.911793106618969, - "learning_rate": 1.973972745975043e-05, - "loss": 2.3551, + "epoch": 0.20333239197966677, + "grad_norm": 4.40117843333325, + "learning_rate": 1.9925354135999083e-05, + "loss": 1.0635, "step": 1440 }, { - "epoch": 0.30215978192493187, - "grad_norm": 6.133682346007816, - "learning_rate": 1.9739214066424992e-05, - "loss": 2.3435, + "epoch": 0.20347359502965265, + "grad_norm": 5.328924646195068, + "learning_rate": 1.9925168103316938e-05, + "loss": 1.1236, "step": 1441 }, { - "epoch": 0.30236946949045923, - "grad_norm": 6.083464090655175, - "learning_rate": 1.9738700173947944e-05, - "loss": 2.5624, + "epoch": 0.20361479807963853, + "grad_norm": 5.081907097277156, + "learning_rate": 1.9924981839978167e-05, + "loss": 1.2163, "step": 1442 }, { - "epoch": 0.3025791570559866, - "grad_norm": 5.770741610649502, - "learning_rate": 1.9738185782345623e-05, - "loss": 2.5611, + "epoch": 0.2037560011296244, + "grad_norm": 5.546581969134257, + "learning_rate": 1.9924795345987103e-05, + "loss": 1.1932, "step": 1443 }, { - "epoch": 0.30278884462151395, - "grad_norm": 5.183926297317286, - "learning_rate": 1.9737670891644387e-05, - "loss": 2.089, + "epoch": 0.20389720417961027, + "grad_norm": 5.389560198664573, + "learning_rate": 1.992460862134808e-05, + "loss": 1.149, "step": 1444 }, { - "epoch": 0.3029985321870413, - "grad_norm": 5.611065594858339, - "learning_rate": 1.973715550187063e-05, - "loss": 2.1529, + "epoch": 0.20403840722959615, + "grad_norm": 5.098083112279385, + "learning_rate": 1.992442166606544e-05, + "loss": 1.2768, "step": 1445 }, { - "epoch": 0.3032082197525687, - "grad_norm": 5.034184357449487, - "learning_rate": 1.973663961305076e-05, - "loss": 2.1034, + "epoch": 0.20417961027958204, + "grad_norm": 4.721295768714217, + "learning_rate": 1.992423448014353e-05, + "loss": 1.0414, "step": 1446 }, { - "epoch": 0.30341790731809604, - "grad_norm": 7.041873809217281, - "learning_rate": 1.9736123225211233e-05, - "loss": 2.6767, + "epoch": 0.20432081332956792, + "grad_norm": 4.0322282072817766, + "learning_rate": 1.9924047063586695e-05, + "loss": 0.9314, "step": 1447 }, { - "epoch": 0.3036275948836234, - "grad_norm": 5.409772700612872, - "learning_rate": 1.97356063383785e-05, - "loss": 2.5794, + "epoch": 0.2044620163795538, + "grad_norm": 4.5603603854322445, + "learning_rate": 1.9923859416399295e-05, + "loss": 1.078, "step": 1448 }, { - "epoch": 0.30383728244915076, - "grad_norm": 5.806629665273312, - "learning_rate": 1.9735088952579054e-05, - "loss": 2.2354, + "epoch": 0.20460321942953968, + "grad_norm": 4.1447334075637015, + "learning_rate": 1.992367153858568e-05, + "loss": 0.9364, "step": 1449 }, { - "epoch": 0.3040469700146781, - "grad_norm": 6.015174753195954, - "learning_rate": 1.9734571067839417e-05, - "loss": 2.1491, + "epoch": 0.20474442247952557, + "grad_norm": 4.692272883159346, + "learning_rate": 1.992348343015023e-05, + "loss": 1.1447, "step": 1450 }, { - "epoch": 0.3042566575802055, - "grad_norm": 5.474443396407675, - "learning_rate": 1.9734052684186123e-05, - "loss": 2.0347, + "epoch": 0.20488562552951145, + "grad_norm": 5.074080390468344, + "learning_rate": 1.9923295091097312e-05, + "loss": 0.9989, "step": 1451 }, { - "epoch": 0.30446634514573284, - "grad_norm": 6.699735488091636, - "learning_rate": 1.9733533801645754e-05, - "loss": 2.2523, + "epoch": 0.2050268285794973, + "grad_norm": 4.49410017850021, + "learning_rate": 1.9923106521431296e-05, + "loss": 1.0541, "step": 1452 }, { - "epoch": 0.3046760327112602, - "grad_norm": 5.7365581935845205, - "learning_rate": 1.9733014420244894e-05, - "loss": 2.7728, + "epoch": 0.2051680316294832, + "grad_norm": 5.267474052231831, + "learning_rate": 1.9922917721156575e-05, + "loss": 1.3713, "step": 1453 }, { - "epoch": 0.30488572027678756, - "grad_norm": 5.34055720734635, - "learning_rate": 1.973249454001016e-05, - "loss": 2.2638, + "epoch": 0.20530923467946907, + "grad_norm": 4.605919090905005, + "learning_rate": 1.9922728690277528e-05, + "loss": 1.2021, "step": 1454 }, { - "epoch": 0.3050954078423149, - "grad_norm": 5.275955633358409, - "learning_rate": 1.9731974160968206e-05, - "loss": 2.3764, + "epoch": 0.20545043772945495, + "grad_norm": 3.7376788048629033, + "learning_rate": 1.9922539428798555e-05, + "loss": 0.9375, "step": 1455 }, { - "epoch": 0.30530509540784234, - "grad_norm": 5.338190496900929, - "learning_rate": 1.9731453283145693e-05, - "loss": 2.4801, + "epoch": 0.20559164077944084, + "grad_norm": 4.6501607230742, + "learning_rate": 1.9922349936724048e-05, + "loss": 1.0705, "step": 1456 }, { - "epoch": 0.3055147829733697, - "grad_norm": 5.327515316400625, - "learning_rate": 1.973093190656932e-05, - "loss": 2.2876, + "epoch": 0.20573284382942672, + "grad_norm": 5.373320219894962, + "learning_rate": 1.9922160214058417e-05, + "loss": 1.1918, "step": 1457 }, { - "epoch": 0.30572447053889706, - "grad_norm": 6.607126768932123, - "learning_rate": 1.9730410031265813e-05, - "loss": 2.1421, + "epoch": 0.2058740468794126, + "grad_norm": 5.976873633998706, + "learning_rate": 1.9921970260806064e-05, + "loss": 1.371, "step": 1458 }, { - "epoch": 0.3059341581044244, - "grad_norm": 5.124583826743587, - "learning_rate": 1.9729887657261914e-05, - "loss": 1.9938, + "epoch": 0.2060152499293985, + "grad_norm": 5.356476979599017, + "learning_rate": 1.992178007697141e-05, + "loss": 1.0875, "step": 1459 }, { - "epoch": 0.3061438456699518, - "grad_norm": 4.966580818159501, - "learning_rate": 1.9729364784584396e-05, - "loss": 2.5906, + "epoch": 0.20615645297938434, + "grad_norm": 4.440129025654568, + "learning_rate": 1.992158966255887e-05, + "loss": 1.1862, "step": 1460 }, { - "epoch": 0.30635353323547915, - "grad_norm": 5.366111213305886, - "learning_rate": 1.972884141326006e-05, - "loss": 2.0271, + "epoch": 0.20629765602937022, + "grad_norm": 6.499771637881478, + "learning_rate": 1.992139901757287e-05, + "loss": 1.3755, "step": 1461 }, { - "epoch": 0.3065632208010065, - "grad_norm": 5.5833115204001285, - "learning_rate": 1.9728317543315727e-05, - "loss": 2.4378, + "epoch": 0.2064388590793561, + "grad_norm": 4.635436112066935, + "learning_rate": 1.9921208142017844e-05, + "loss": 1.0043, "step": 1462 }, { - "epoch": 0.30677290836653387, - "grad_norm": 5.396520349249426, - "learning_rate": 1.9727793174778247e-05, - "loss": 2.3029, + "epoch": 0.206580062129342, + "grad_norm": 5.696350947923137, + "learning_rate": 1.9921017035898226e-05, + "loss": 1.038, "step": 1463 }, { - "epoch": 0.30698259593206123, - "grad_norm": 5.473257724899315, - "learning_rate": 1.9727268307674495e-05, - "loss": 2.405, + "epoch": 0.20672126517932787, + "grad_norm": 6.3100624273487265, + "learning_rate": 1.9920825699218453e-05, + "loss": 1.268, "step": 1464 }, { - "epoch": 0.3071922834975886, - "grad_norm": 5.979601176544394, - "learning_rate": 1.972674294203137e-05, - "loss": 2.1358, + "epoch": 0.20686246822931376, + "grad_norm": 4.30454260141745, + "learning_rate": 1.9920634131982976e-05, + "loss": 1.0673, "step": 1465 }, { - "epoch": 0.30740197106311595, - "grad_norm": 5.264328114008282, - "learning_rate": 1.97262170778758e-05, - "loss": 2.4749, + "epoch": 0.20700367127929964, + "grad_norm": 4.0361986779457375, + "learning_rate": 1.9920442334196248e-05, + "loss": 1.1551, "step": 1466 }, { - "epoch": 0.3076116586286433, - "grad_norm": 5.9035118195019125, - "learning_rate": 1.9725690715234735e-05, - "loss": 2.4332, + "epoch": 0.20714487432928552, + "grad_norm": 4.44560883748071, + "learning_rate": 1.9920250305862723e-05, + "loss": 1.0469, "step": 1467 }, { - "epoch": 0.3078213461941707, - "grad_norm": 5.489031807224302, - "learning_rate": 1.9725163854135153e-05, - "loss": 2.2876, + "epoch": 0.2072860773792714, + "grad_norm": 4.5047295442522595, + "learning_rate": 1.9920058046986867e-05, + "loss": 1.1378, "step": 1468 }, { - "epoch": 0.30803103375969804, - "grad_norm": 5.536941271034893, - "learning_rate": 1.9724636494604055e-05, - "loss": 2.5461, + "epoch": 0.20742728042925726, + "grad_norm": 4.603228962027549, + "learning_rate": 1.9919865557573142e-05, + "loss": 1.0567, "step": 1469 }, { - "epoch": 0.3082407213252254, - "grad_norm": 5.455906373338647, - "learning_rate": 1.972410863666847e-05, - "loss": 2.3427, + "epoch": 0.20756848347924314, + "grad_norm": 3.984572622435439, + "learning_rate": 1.991967283762603e-05, + "loss": 0.9173, "step": 1470 }, { - "epoch": 0.30845040889075276, - "grad_norm": 5.977633632870943, - "learning_rate": 1.9723580280355455e-05, - "loss": 2.3815, + "epoch": 0.20770968652922903, + "grad_norm": 5.380944958497025, + "learning_rate": 1.9919479887150007e-05, + "loss": 0.9284, "step": 1471 }, { - "epoch": 0.3086600964562801, - "grad_norm": 5.5493686692025275, - "learning_rate": 1.972305142569208e-05, - "loss": 2.3138, + "epoch": 0.2078508895792149, + "grad_norm": 4.132279338072665, + "learning_rate": 1.991928670614955e-05, + "loss": 1.0698, "step": 1472 }, { - "epoch": 0.3088697840218075, - "grad_norm": 6.107626901014838, - "learning_rate": 1.972252207270546e-05, - "loss": 2.4634, + "epoch": 0.2079920926292008, + "grad_norm": 5.629660767190314, + "learning_rate": 1.9919093294629157e-05, + "loss": 1.1172, "step": 1473 }, { - "epoch": 0.3090794715873349, - "grad_norm": 6.255474493494085, - "learning_rate": 1.972199222142272e-05, - "loss": 2.0997, + "epoch": 0.20813329567918668, + "grad_norm": 4.204390157487826, + "learning_rate": 1.9918899652593316e-05, + "loss": 0.9352, "step": 1474 }, { - "epoch": 0.30928915915286226, - "grad_norm": 6.8162714043097905, - "learning_rate": 1.9721461871871017e-05, - "loss": 2.312, + "epoch": 0.20827449872917256, + "grad_norm": 4.4772005200888625, + "learning_rate": 1.9918705780046536e-05, + "loss": 0.8675, "step": 1475 }, { - "epoch": 0.3094988467183896, - "grad_norm": 5.288022117194409, - "learning_rate": 1.9720931024077533e-05, - "loss": 2.3113, + "epoch": 0.20841570177915844, + "grad_norm": 5.626415393579422, + "learning_rate": 1.9918511676993315e-05, + "loss": 1.2055, "step": 1476 }, { - "epoch": 0.309708534283917, - "grad_norm": 5.663020110072521, - "learning_rate": 1.9720399678069474e-05, - "loss": 2.4339, + "epoch": 0.2085569048291443, + "grad_norm": 6.125437345751269, + "learning_rate": 1.9918317343438164e-05, + "loss": 1.469, "step": 1477 }, { - "epoch": 0.30991822184944434, - "grad_norm": 5.121772898756945, - "learning_rate": 1.971986783387407e-05, - "loss": 2.2952, + "epoch": 0.20869810787913018, + "grad_norm": 4.200591435048954, + "learning_rate": 1.99181227793856e-05, + "loss": 0.9065, "step": 1478 }, { - "epoch": 0.3101279094149717, - "grad_norm": 5.347115126345129, - "learning_rate": 1.971933549151858e-05, - "loss": 2.2576, + "epoch": 0.20883931092911606, + "grad_norm": 4.439866975875026, + "learning_rate": 1.9917927984840154e-05, + "loss": 1.1414, "step": 1479 }, { - "epoch": 0.31033759698049906, - "grad_norm": 5.413061789510903, - "learning_rate": 1.971880265103029e-05, - "loss": 2.4407, + "epoch": 0.20898051397910195, + "grad_norm": 4.4176848686859955, + "learning_rate": 1.9917732959806336e-05, + "loss": 0.906, "step": 1480 }, { - "epoch": 0.3105472845460264, - "grad_norm": 5.172916408616316, - "learning_rate": 1.971826931243651e-05, - "loss": 2.3881, + "epoch": 0.20912171702908783, + "grad_norm": 6.079682722341598, + "learning_rate": 1.9917537704288693e-05, + "loss": 1.1725, "step": 1481 }, { - "epoch": 0.3107569721115538, - "grad_norm": 5.384918675099962, - "learning_rate": 1.971773547576457e-05, - "loss": 2.1328, + "epoch": 0.2092629200790737, + "grad_norm": 4.274384577599541, + "learning_rate": 1.9917342218291752e-05, + "loss": 1.0346, "step": 1482 }, { - "epoch": 0.31096665967708115, - "grad_norm": 4.589607753773831, - "learning_rate": 1.9717201141041835e-05, - "loss": 2.3187, + "epoch": 0.2094041231290596, + "grad_norm": 4.914864329118279, + "learning_rate": 1.9917146501820067e-05, + "loss": 0.9349, "step": 1483 }, { - "epoch": 0.3111763472426085, - "grad_norm": 4.952098214472724, - "learning_rate": 1.9716666308295687e-05, - "loss": 2.5112, + "epoch": 0.20954532617904548, + "grad_norm": 4.9990494170716415, + "learning_rate": 1.9916950554878178e-05, + "loss": 1.0778, "step": 1484 }, { - "epoch": 0.31138603480813587, - "grad_norm": 6.319063293801881, - "learning_rate": 1.9716130977553536e-05, - "loss": 2.3032, + "epoch": 0.20968652922903136, + "grad_norm": 5.079526362438078, + "learning_rate": 1.9916754377470637e-05, + "loss": 1.1046, "step": 1485 }, { - "epoch": 0.31159572237366323, - "grad_norm": 6.08533099718583, - "learning_rate": 1.971559514884282e-05, - "loss": 2.5791, + "epoch": 0.20982773227901722, + "grad_norm": 4.511611139455432, + "learning_rate": 1.9916557969602007e-05, + "loss": 1.0812, "step": 1486 }, { - "epoch": 0.3118054099391906, - "grad_norm": 6.2827158548655655, - "learning_rate": 1.9715058822191008e-05, - "loss": 2.5598, + "epoch": 0.2099689353290031, + "grad_norm": 4.536891383435789, + "learning_rate": 1.9916361331276857e-05, + "loss": 1.17, "step": 1487 }, { - "epoch": 0.31201509750471795, - "grad_norm": 5.504699670486899, - "learning_rate": 1.9714521997625576e-05, - "loss": 2.4408, + "epoch": 0.21011013837898898, + "grad_norm": 4.353493985291046, + "learning_rate": 1.991616446249975e-05, + "loss": 0.9709, "step": 1488 }, { - "epoch": 0.3122247850702453, - "grad_norm": 5.86624997625924, - "learning_rate": 1.9713984675174042e-05, - "loss": 2.3816, + "epoch": 0.21025134142897486, + "grad_norm": 4.616006887337012, + "learning_rate": 1.9915967363275264e-05, + "loss": 1.0103, "step": 1489 }, { - "epoch": 0.3124344726357727, - "grad_norm": 6.224865164676491, - "learning_rate": 1.9713446854863946e-05, - "loss": 2.3346, + "epoch": 0.21039254447896075, + "grad_norm": 5.104775374154522, + "learning_rate": 1.9915770033607978e-05, + "loss": 1.262, "step": 1490 }, { - "epoch": 0.31264416020130004, - "grad_norm": 5.091888438800018, - "learning_rate": 1.971290853672285e-05, - "loss": 2.4144, + "epoch": 0.21053374752894663, + "grad_norm": 4.124448992722673, + "learning_rate": 1.991557247350248e-05, + "loss": 0.9985, "step": 1491 }, { - "epoch": 0.31285384776682745, - "grad_norm": 6.150932119184296, - "learning_rate": 1.971236972077835e-05, - "loss": 2.3949, + "epoch": 0.2106749505789325, + "grad_norm": 5.593889407889321, + "learning_rate": 1.9915374682963358e-05, + "loss": 1.1781, "step": 1492 }, { - "epoch": 0.3130635353323548, - "grad_norm": 4.846635162830004, - "learning_rate": 1.9711830407058056e-05, - "loss": 2.2806, + "epoch": 0.2108161536289184, + "grad_norm": 4.188496778636398, + "learning_rate": 1.991517666199521e-05, + "loss": 1.1469, "step": 1493 }, { - "epoch": 0.3132732228978822, - "grad_norm": 5.25417386387375, - "learning_rate": 1.9711290595589606e-05, - "loss": 2.5214, + "epoch": 0.21095735667890425, + "grad_norm": 3.68563527640135, + "learning_rate": 1.991497841060264e-05, + "loss": 0.8996, "step": 1494 }, { - "epoch": 0.31348291046340954, - "grad_norm": 5.794325600518844, - "learning_rate": 1.971075028640067e-05, - "loss": 2.3367, + "epoch": 0.21109855972889013, + "grad_norm": 4.255939108084237, + "learning_rate": 1.9914779928790255e-05, + "loss": 1.0845, "step": 1495 }, { - "epoch": 0.3136925980289369, - "grad_norm": 4.948303636813416, - "learning_rate": 1.9710209479518942e-05, - "loss": 2.1773, + "epoch": 0.21123976277887602, + "grad_norm": 4.797282241781136, + "learning_rate": 1.9914581216562664e-05, + "loss": 1.1462, "step": 1496 }, { - "epoch": 0.31390228559446426, - "grad_norm": 5.345897867183844, - "learning_rate": 1.970966817497214e-05, - "loss": 2.1757, + "epoch": 0.2113809658288619, + "grad_norm": 4.041068469546502, + "learning_rate": 1.991438227392449e-05, + "loss": 1.0085, "step": 1497 }, { - "epoch": 0.3141119731599916, - "grad_norm": 4.736279500731373, - "learning_rate": 1.9709126372787994e-05, - "loss": 2.3428, + "epoch": 0.21152216887884778, + "grad_norm": 4.355245823655319, + "learning_rate": 1.991418310088035e-05, + "loss": 1.136, "step": 1498 }, { - "epoch": 0.314321660725519, - "grad_norm": 4.9165498979709215, - "learning_rate": 1.9708584072994287e-05, - "loss": 2.2653, + "epoch": 0.21166337192883367, + "grad_norm": 4.697790786814241, + "learning_rate": 1.991398369743488e-05, + "loss": 1.2309, "step": 1499 }, { - "epoch": 0.31453134829104634, - "grad_norm": 5.390229038885812, - "learning_rate": 1.970804127561881e-05, - "loss": 2.2994, + "epoch": 0.21180457497881955, + "grad_norm": 4.608504291540901, + "learning_rate": 1.9913784063592708e-05, + "loss": 1.1577, "step": 1500 }, { - "epoch": 0.3147410358565737, - "grad_norm": 5.9022811350477085, - "learning_rate": 1.9707497980689375e-05, - "loss": 2.4414, + "epoch": 0.21194577802880543, + "grad_norm": 4.396493283712529, + "learning_rate": 1.9913584199358476e-05, + "loss": 1.2312, "step": 1501 }, { - "epoch": 0.31495072342210106, - "grad_norm": 5.720020625309213, - "learning_rate": 1.9706954188233832e-05, - "loss": 2.1059, + "epoch": 0.21208698107879131, + "grad_norm": 4.337194424116833, + "learning_rate": 1.991338410473683e-05, + "loss": 0.9917, "step": 1502 }, { - "epoch": 0.3151604109876284, - "grad_norm": 5.540632883318829, - "learning_rate": 1.9706409898280053e-05, - "loss": 2.3746, + "epoch": 0.21222818412877717, + "grad_norm": 4.4367918720550925, + "learning_rate": 1.9913183779732417e-05, + "loss": 1.1141, "step": 1503 }, { - "epoch": 0.3153700985531558, - "grad_norm": 4.99106647903385, - "learning_rate": 1.9705865110855936e-05, - "loss": 2.3022, + "epoch": 0.21236938717876305, + "grad_norm": 4.732995747219146, + "learning_rate": 1.9912983224349894e-05, + "loss": 1.2732, "step": 1504 }, { - "epoch": 0.31557978611868315, - "grad_norm": 5.728424190867146, - "learning_rate": 1.970531982598939e-05, - "loss": 2.1741, + "epoch": 0.21251059022874894, + "grad_norm": 4.106189987215339, + "learning_rate": 1.9912782438593922e-05, + "loss": 1.0514, "step": 1505 }, { - "epoch": 0.3157894736842105, - "grad_norm": 5.641338266472313, - "learning_rate": 1.9704774043708378e-05, - "loss": 2.2423, + "epoch": 0.21265179327873482, + "grad_norm": 5.956079726298888, + "learning_rate": 1.991258142246917e-05, + "loss": 1.4877, "step": 1506 }, { - "epoch": 0.31599916124973787, - "grad_norm": 6.360506019842943, - "learning_rate": 1.9704227764040856e-05, - "loss": 2.3389, + "epoch": 0.2127929963287207, + "grad_norm": 4.427212311994908, + "learning_rate": 1.9912380175980305e-05, + "loss": 1.0993, "step": 1507 }, { - "epoch": 0.31620884881526523, - "grad_norm": 5.77971291857449, - "learning_rate": 1.9703680987014836e-05, - "loss": 2.3239, + "epoch": 0.21293419937870658, + "grad_norm": 4.686264403318897, + "learning_rate": 1.9912178699132005e-05, + "loss": 1.1904, "step": 1508 }, { - "epoch": 0.3164185363807926, - "grad_norm": 5.688068680117667, - "learning_rate": 1.9703133712658334e-05, - "loss": 1.9481, + "epoch": 0.21307540242869247, + "grad_norm": 4.422309263030953, + "learning_rate": 1.991197699192895e-05, + "loss": 1.2044, "step": 1509 }, { - "epoch": 0.31662822394632, - "grad_norm": 5.312862118132038, - "learning_rate": 1.9702585940999397e-05, - "loss": 2.2139, + "epoch": 0.21321660547867835, + "grad_norm": 5.2389404849321215, + "learning_rate": 1.9911775054375835e-05, + "loss": 1.2236, "step": 1510 }, { - "epoch": 0.31683791151184737, - "grad_norm": 7.674997326080807, - "learning_rate": 1.9702037672066105e-05, - "loss": 2.4508, + "epoch": 0.2133578085286642, + "grad_norm": 4.759276753928988, + "learning_rate": 1.9911572886477345e-05, + "loss": 1.1365, "step": 1511 }, { - "epoch": 0.31704759907737473, - "grad_norm": 5.615429093840941, - "learning_rate": 1.9701488905886553e-05, - "loss": 2.3787, + "epoch": 0.2134990115786501, + "grad_norm": 4.933440300020826, + "learning_rate": 1.9911370488238185e-05, + "loss": 1.3686, "step": 1512 }, { - "epoch": 0.3172572866429021, - "grad_norm": 5.197316859027535, - "learning_rate": 1.970093964248887e-05, - "loss": 2.0848, + "epoch": 0.21364021462863597, + "grad_norm": 4.695924520308634, + "learning_rate": 1.9911167859663055e-05, + "loss": 0.9598, "step": 1513 }, { - "epoch": 0.31746697420842945, - "grad_norm": 5.689690188829284, - "learning_rate": 1.9700389881901203e-05, - "loss": 2.1779, + "epoch": 0.21378141767862185, + "grad_norm": 3.7344504328232353, + "learning_rate": 1.9910965000756662e-05, + "loss": 0.8862, "step": 1514 }, { - "epoch": 0.3176766617739568, - "grad_norm": 5.930786295968636, - "learning_rate": 1.9699839624151732e-05, - "loss": 2.6955, + "epoch": 0.21392262072860774, + "grad_norm": 4.432426426627992, + "learning_rate": 1.991076191152372e-05, + "loss": 1.0417, "step": 1515 }, { - "epoch": 0.3178863493394842, - "grad_norm": 5.751919466836633, - "learning_rate": 1.9699288869268657e-05, - "loss": 2.3673, + "epoch": 0.21406382377859362, + "grad_norm": 4.597702040015849, + "learning_rate": 1.9910558591968956e-05, + "loss": 1.1174, "step": 1516 }, { - "epoch": 0.31809603690501154, - "grad_norm": 5.741769380008591, - "learning_rate": 1.96987376172802e-05, - "loss": 2.5176, + "epoch": 0.2142050268285795, + "grad_norm": 4.6744777669417745, + "learning_rate": 1.991035504209709e-05, + "loss": 1.1904, "step": 1517 }, { - "epoch": 0.3183057244705389, - "grad_norm": 5.053275356963386, - "learning_rate": 1.9698185868214625e-05, - "loss": 2.4545, + "epoch": 0.2143462298785654, + "grad_norm": 5.458366886242176, + "learning_rate": 1.991015126191285e-05, + "loss": 1.4444, "step": 1518 }, { - "epoch": 0.31851541203606626, - "grad_norm": 6.164444263708838, - "learning_rate": 1.9697633622100203e-05, - "loss": 2.6943, + "epoch": 0.21448743292855127, + "grad_norm": 3.962004808059215, + "learning_rate": 1.9909947251420977e-05, + "loss": 0.9747, "step": 1519 }, { - "epoch": 0.3187250996015936, - "grad_norm": 5.535265046041835, - "learning_rate": 1.9697080878965232e-05, - "loss": 2.3771, + "epoch": 0.21462863597853712, + "grad_norm": 4.625214726607694, + "learning_rate": 1.990974301062621e-05, + "loss": 1.001, "step": 1520 }, { - "epoch": 0.318934787167121, - "grad_norm": 5.243331805782426, - "learning_rate": 1.9696527638838053e-05, - "loss": 2.4186, + "epoch": 0.214769839028523, + "grad_norm": 4.172237718183528, + "learning_rate": 1.9909538539533293e-05, + "loss": 1.1562, "step": 1521 }, { - "epoch": 0.31914447473264834, - "grad_norm": 5.608816302323981, - "learning_rate": 1.9695973901747015e-05, - "loss": 2.3495, + "epoch": 0.2149110420785089, + "grad_norm": 4.3872925671729375, + "learning_rate": 1.990933383814698e-05, + "loss": 1.0332, "step": 1522 }, { - "epoch": 0.3193541622981757, - "grad_norm": 5.441417244285983, - "learning_rate": 1.9695419667720497e-05, - "loss": 2.2835, + "epoch": 0.21505224512849477, + "grad_norm": 4.859260209205926, + "learning_rate": 1.990912890647203e-05, + "loss": 1.1799, "step": 1523 }, { - "epoch": 0.31956384986370306, - "grad_norm": 4.645072259927482, - "learning_rate": 1.9694864936786905e-05, - "loss": 2.4053, + "epoch": 0.21519344817848066, + "grad_norm": 4.6710726461081, + "learning_rate": 1.99089237445132e-05, + "loss": 1.2096, "step": 1524 }, { - "epoch": 0.3197735374292304, - "grad_norm": 5.538116879192616, - "learning_rate": 1.9694309708974664e-05, - "loss": 2.24, + "epoch": 0.21533465122846654, + "grad_norm": 4.548794129724055, + "learning_rate": 1.9908718352275267e-05, + "loss": 1.0294, "step": 1525 }, { - "epoch": 0.3199832249947578, - "grad_norm": 6.133448749236639, - "learning_rate": 1.9693753984312244e-05, - "loss": 2.2234, + "epoch": 0.21547585427845242, + "grad_norm": 4.500362508875807, + "learning_rate": 1.9908512729762995e-05, + "loss": 1.0094, "step": 1526 }, { - "epoch": 0.32019291256028515, - "grad_norm": 5.121488847265792, - "learning_rate": 1.9693197762828116e-05, - "loss": 2.3061, + "epoch": 0.2156170573284383, + "grad_norm": 5.042199568145323, + "learning_rate": 1.9908306876981166e-05, + "loss": 1.1713, "step": 1527 }, { - "epoch": 0.32040260012581256, - "grad_norm": 5.661960536524321, - "learning_rate": 1.9692641044550793e-05, - "loss": 2.3722, + "epoch": 0.21575826037842416, + "grad_norm": 4.825496254065435, + "learning_rate": 1.9908100793934566e-05, + "loss": 1.2199, "step": 1528 }, { - "epoch": 0.3206122876913399, - "grad_norm": 5.332912685738281, - "learning_rate": 1.9692083829508805e-05, - "loss": 2.3572, + "epoch": 0.21589946342841004, + "grad_norm": 4.3534431572420065, + "learning_rate": 1.990789448062798e-05, + "loss": 0.9683, "step": 1529 }, { - "epoch": 0.3208219752568673, - "grad_norm": 5.564093842628362, - "learning_rate": 1.969152611773071e-05, - "loss": 2.4022, + "epoch": 0.21604066647839593, + "grad_norm": 3.847853603742493, + "learning_rate": 1.990768793706621e-05, + "loss": 0.9884, "step": 1530 }, { - "epoch": 0.32103166282239465, - "grad_norm": 5.753427394116873, - "learning_rate": 1.9690967909245093e-05, - "loss": 2.2846, + "epoch": 0.2161818695283818, + "grad_norm": 4.1650735143090785, + "learning_rate": 1.9907481163254047e-05, + "loss": 0.995, "step": 1531 }, { - "epoch": 0.321241350387922, - "grad_norm": 5.971292538542521, - "learning_rate": 1.969040920408056e-05, - "loss": 2.1369, + "epoch": 0.2163230725783677, + "grad_norm": 3.8020847374106697, + "learning_rate": 1.9907274159196302e-05, + "loss": 0.9169, "step": 1532 }, { - "epoch": 0.32145103795344937, - "grad_norm": 6.050386304353231, - "learning_rate": 1.9689850002265748e-05, - "loss": 2.3976, + "epoch": 0.21646427562835358, + "grad_norm": 5.8000634516265945, + "learning_rate": 1.9907066924897784e-05, + "loss": 1.0506, "step": 1533 }, { - "epoch": 0.32166072551897673, - "grad_norm": 5.7453821039470565, - "learning_rate": 1.968929030382932e-05, - "loss": 2.1402, + "epoch": 0.21660547867833946, + "grad_norm": 4.456617879701141, + "learning_rate": 1.9906859460363307e-05, + "loss": 1.2079, "step": 1534 }, { - "epoch": 0.3218704130845041, - "grad_norm": 5.3989866592591955, - "learning_rate": 1.9688730108799958e-05, - "loss": 2.4425, + "epoch": 0.21674668172832534, + "grad_norm": 4.714095787046404, + "learning_rate": 1.99066517655977e-05, + "loss": 1.0313, "step": 1535 }, { - "epoch": 0.32208010065003145, - "grad_norm": 6.120850902242426, - "learning_rate": 1.9688169417206374e-05, - "loss": 2.2639, + "epoch": 0.21688788477831122, + "grad_norm": 5.158629324384314, + "learning_rate": 1.990644384060578e-05, + "loss": 1.3862, "step": 1536 }, { - "epoch": 0.3222897882155588, - "grad_norm": 6.249207718670505, - "learning_rate": 1.9687608229077302e-05, - "loss": 2.3343, + "epoch": 0.21702908782829708, + "grad_norm": 4.676539847816805, + "learning_rate": 1.9906235685392384e-05, + "loss": 1.1049, "step": 1537 }, { - "epoch": 0.3224994757810862, - "grad_norm": 6.136818169893906, - "learning_rate": 1.9687046544441508e-05, - "loss": 2.3297, + "epoch": 0.21717029087828296, + "grad_norm": 4.411957573916679, + "learning_rate": 1.990602729996235e-05, + "loss": 1.0721, "step": 1538 }, { - "epoch": 0.32270916334661354, - "grad_norm": 5.8256544480599075, - "learning_rate": 1.9686484363327774e-05, - "loss": 2.0781, + "epoch": 0.21731149392826885, + "grad_norm": 4.611149247724363, + "learning_rate": 1.9905818684320518e-05, + "loss": 1.2322, "step": 1539 }, { - "epoch": 0.3229188509121409, - "grad_norm": 6.106603338544645, - "learning_rate": 1.9685921685764923e-05, - "loss": 2.2792, + "epoch": 0.21745269697825473, + "grad_norm": 5.2704085433679975, + "learning_rate": 1.990560983847174e-05, + "loss": 1.1648, "step": 1540 }, { - "epoch": 0.32312853847766826, - "grad_norm": 6.391780487074041, - "learning_rate": 1.968535851178178e-05, - "loss": 2.5572, + "epoch": 0.2175939000282406, + "grad_norm": 4.836683215434626, + "learning_rate": 1.9905400762420862e-05, + "loss": 1.3735, "step": 1541 }, { - "epoch": 0.3233382260431956, - "grad_norm": 5.143244807163231, - "learning_rate": 1.9684794841407217e-05, - "loss": 2.2325, + "epoch": 0.2177351030782265, + "grad_norm": 5.308078643337155, + "learning_rate": 1.9905191456172757e-05, + "loss": 1.3188, "step": 1542 }, { - "epoch": 0.323547913608723, - "grad_norm": 5.134603730519258, - "learning_rate": 1.968423067467012e-05, - "loss": 2.0837, + "epoch": 0.21787630612821238, + "grad_norm": 4.2899630934464446, + "learning_rate": 1.9904981919732272e-05, + "loss": 1.1812, "step": 1543 }, { - "epoch": 0.32375760117425034, - "grad_norm": 5.049928770051305, - "learning_rate": 1.9683666011599408e-05, - "loss": 2.2789, + "epoch": 0.21801750917819826, + "grad_norm": 4.444702670080381, + "learning_rate": 1.990477215310429e-05, + "loss": 1.1512, "step": 1544 }, { - "epoch": 0.3239672887397777, - "grad_norm": 5.681812380984174, - "learning_rate": 1.9683100852224014e-05, - "loss": 2.3705, + "epoch": 0.21815871222818412, + "grad_norm": 4.354822961166766, + "learning_rate": 1.9904562156293678e-05, + "loss": 1.1362, "step": 1545 }, { - "epoch": 0.3241769763053051, - "grad_norm": 5.977128581745251, - "learning_rate": 1.9682535196572907e-05, - "loss": 2.4287, + "epoch": 0.21829991527817, + "grad_norm": 4.602600327130179, + "learning_rate": 1.990435192930532e-05, + "loss": 1.103, "step": 1546 }, { - "epoch": 0.3243866638708325, - "grad_norm": 5.6009328101179525, - "learning_rate": 1.9681969044675076e-05, - "loss": 2.655, + "epoch": 0.21844111832815588, + "grad_norm": 4.241725068341514, + "learning_rate": 1.99041414721441e-05, + "loss": 0.9984, "step": 1547 }, { - "epoch": 0.32459635143635984, - "grad_norm": 4.910291568982428, - "learning_rate": 1.9681402396559545e-05, - "loss": 2.4707, + "epoch": 0.21858232137814176, + "grad_norm": 4.664697190232162, + "learning_rate": 1.9903930784814908e-05, + "loss": 1.0443, "step": 1548 }, { - "epoch": 0.3248060390018872, - "grad_norm": 5.5895750952885175, - "learning_rate": 1.9680835252255345e-05, - "loss": 2.1304, + "epoch": 0.21872352442812765, + "grad_norm": 5.476567973922895, + "learning_rate": 1.9903719867322645e-05, + "loss": 1.4682, "step": 1549 }, { - "epoch": 0.32501572656741456, - "grad_norm": 5.418398241038866, - "learning_rate": 1.9680267611791547e-05, - "loss": 2.3803, + "epoch": 0.21886472747811353, + "grad_norm": 3.9668953778314795, + "learning_rate": 1.9903508719672208e-05, + "loss": 1.0169, "step": 1550 }, { - "epoch": 0.3252254141329419, - "grad_norm": 5.418530758358457, - "learning_rate": 1.9679699475197248e-05, - "loss": 2.4121, + "epoch": 0.2190059305280994, + "grad_norm": 4.497128929965146, + "learning_rate": 1.9903297341868506e-05, + "loss": 1.1391, "step": 1551 }, { - "epoch": 0.3254351016984693, - "grad_norm": 6.257273767798533, - "learning_rate": 1.9679130842501556e-05, - "loss": 2.313, + "epoch": 0.2191471335780853, + "grad_norm": 4.232371536835727, + "learning_rate": 1.990308573391645e-05, + "loss": 1.0663, "step": 1552 }, { - "epoch": 0.32564478926399665, - "grad_norm": 5.732328243238281, - "learning_rate": 1.9678561713733626e-05, - "loss": 2.7302, + "epoch": 0.21928833662807118, + "grad_norm": 4.53944282642431, + "learning_rate": 1.990287389582096e-05, + "loss": 1.1959, "step": 1553 }, { - "epoch": 0.325854476829524, - "grad_norm": 5.492290864673131, - "learning_rate": 1.9677992088922622e-05, - "loss": 2.2384, + "epoch": 0.21942953967805703, + "grad_norm": 5.121249690152903, + "learning_rate": 1.9902661827586953e-05, + "loss": 1.0896, "step": 1554 }, { - "epoch": 0.32606416439505137, - "grad_norm": 5.764705594021949, - "learning_rate": 1.9677421968097737e-05, - "loss": 2.3671, + "epoch": 0.21957074272804292, + "grad_norm": 4.762971631669555, + "learning_rate": 1.9902449529219367e-05, + "loss": 0.9744, "step": 1555 }, { - "epoch": 0.32627385196057873, - "grad_norm": 5.508376691207428, - "learning_rate": 1.967685135128819e-05, - "loss": 2.2202, + "epoch": 0.2197119457780288, + "grad_norm": 4.60961996644637, + "learning_rate": 1.9902237000723127e-05, + "loss": 1.0719, "step": 1556 }, { - "epoch": 0.3264835395261061, - "grad_norm": 5.762762534915171, - "learning_rate": 1.967628023852323e-05, - "loss": 2.1665, + "epoch": 0.21985314882801468, + "grad_norm": 4.3142455290782245, + "learning_rate": 1.9902024242103174e-05, + "loss": 1.116, "step": 1557 }, { - "epoch": 0.32669322709163345, - "grad_norm": 5.162693486956407, - "learning_rate": 1.967570862983212e-05, - "loss": 2.4545, + "epoch": 0.21999435187800057, + "grad_norm": 5.094831059601257, + "learning_rate": 1.9901811253364458e-05, + "loss": 1.2812, "step": 1558 }, { - "epoch": 0.3269029146571608, - "grad_norm": 5.9500425410853595, - "learning_rate": 1.9675136525244168e-05, - "loss": 2.3419, + "epoch": 0.22013555492798645, + "grad_norm": 3.9721541071523916, + "learning_rate": 1.990159803451192e-05, + "loss": 0.9889, "step": 1559 }, { - "epoch": 0.3271126022226882, - "grad_norm": 5.466850158694769, - "learning_rate": 1.9674563924788683e-05, - "loss": 1.8989, + "epoch": 0.22027675797797233, + "grad_norm": 3.998532612690012, + "learning_rate": 1.9901384585550527e-05, + "loss": 0.9128, "step": 1560 }, { - "epoch": 0.32732228978821554, - "grad_norm": 5.294882977362833, - "learning_rate": 1.967399082849502e-05, - "loss": 2.2718, + "epoch": 0.22041796102795821, + "grad_norm": 3.438385046725496, + "learning_rate": 1.9901170906485227e-05, + "loss": 0.878, "step": 1561 }, { - "epoch": 0.3275319773537429, - "grad_norm": 5.0070359353971945, - "learning_rate": 1.9673417236392547e-05, - "loss": 2.2493, + "epoch": 0.22055916407794407, + "grad_norm": 5.087753206009238, + "learning_rate": 1.990095699732099e-05, + "loss": 1.0865, "step": 1562 }, { - "epoch": 0.3277416649192703, - "grad_norm": 5.991696998198487, - "learning_rate": 1.9672843148510663e-05, - "loss": 2.5096, + "epoch": 0.22070036712792995, + "grad_norm": 5.168488996112628, + "learning_rate": 1.9900742858062792e-05, + "loss": 1.1686, "step": 1563 }, { - "epoch": 0.3279513524847977, - "grad_norm": 5.539697722314044, - "learning_rate": 1.9672268564878792e-05, - "loss": 2.2199, + "epoch": 0.22084157017791584, + "grad_norm": 6.4865301478730295, + "learning_rate": 1.990052848871561e-05, + "loss": 1.0443, "step": 1564 }, { - "epoch": 0.32816104005032504, - "grad_norm": 6.1905639514724395, - "learning_rate": 1.967169348552638e-05, - "loss": 2.5273, + "epoch": 0.22098277322790172, + "grad_norm": 3.904903801146498, + "learning_rate": 1.9900313889284413e-05, + "loss": 0.9511, "step": 1565 }, { - "epoch": 0.3283707276158524, - "grad_norm": 6.724766239438472, - "learning_rate": 1.9671117910482903e-05, - "loss": 2.4645, + "epoch": 0.2211239762778876, + "grad_norm": 4.605190462916444, + "learning_rate": 1.9900099059774197e-05, + "loss": 1.0432, "step": 1566 }, { - "epoch": 0.32858041518137976, - "grad_norm": 6.011917438780637, - "learning_rate": 1.967054183977786e-05, - "loss": 2.2566, + "epoch": 0.22126517932787348, + "grad_norm": 4.0027389562042055, + "learning_rate": 1.9899884000189958e-05, + "loss": 0.9627, "step": 1567 }, { - "epoch": 0.3287901027469071, - "grad_norm": 7.572410217569467, - "learning_rate": 1.9669965273440776e-05, - "loss": 2.5161, + "epoch": 0.22140638237785937, + "grad_norm": 5.773245037479774, + "learning_rate": 1.989966871053669e-05, + "loss": 1.42, "step": 1568 }, { - "epoch": 0.3289997903124345, - "grad_norm": 5.421217346534776, - "learning_rate": 1.9669388211501195e-05, - "loss": 2.737, + "epoch": 0.22154758542784525, + "grad_norm": 4.065406299580875, + "learning_rate": 1.98994531908194e-05, + "loss": 1.0858, "step": 1569 }, { - "epoch": 0.32920947787796184, - "grad_norm": 5.72999759942945, - "learning_rate": 1.9668810653988702e-05, - "loss": 2.5211, + "epoch": 0.22168878847783113, + "grad_norm": 4.962583575556403, + "learning_rate": 1.989923744104309e-05, + "loss": 1.2689, "step": 1570 }, { - "epoch": 0.3294191654434892, - "grad_norm": 6.419669988627167, - "learning_rate": 1.966823260093289e-05, - "loss": 2.3514, + "epoch": 0.221829991527817, + "grad_norm": 4.093737009840619, + "learning_rate": 1.989902146121277e-05, + "loss": 0.9932, "step": 1571 }, { - "epoch": 0.32962885300901656, - "grad_norm": 5.5758203723037925, - "learning_rate": 1.9667654052363393e-05, - "loss": 2.393, + "epoch": 0.22197119457780287, + "grad_norm": 3.9980750774423957, + "learning_rate": 1.9898805251333477e-05, + "loss": 0.909, "step": 1572 }, { - "epoch": 0.3298385405745439, - "grad_norm": 5.66092324490033, - "learning_rate": 1.9667075008309852e-05, - "loss": 2.2449, + "epoch": 0.22211239762778875, + "grad_norm": 4.683772560445976, + "learning_rate": 1.9898588811410218e-05, + "loss": 1.2598, "step": 1573 }, { - "epoch": 0.3300482281400713, - "grad_norm": 5.591044279551592, - "learning_rate": 1.9666495468801957e-05, - "loss": 2.307, + "epoch": 0.22225360067777464, + "grad_norm": 5.011504456496572, + "learning_rate": 1.9898372141448033e-05, + "loss": 1.0185, "step": 1574 }, { - "epoch": 0.33025791570559865, - "grad_norm": 6.430045426141139, - "learning_rate": 1.9665915433869397e-05, - "loss": 2.2199, + "epoch": 0.22239480372776052, + "grad_norm": 4.29085771167402, + "learning_rate": 1.989815524145195e-05, + "loss": 1.0056, "step": 1575 }, { - "epoch": 0.330467603271126, - "grad_norm": 5.299603723054972, - "learning_rate": 1.9665334903541913e-05, - "loss": 2.4392, + "epoch": 0.2225360067777464, + "grad_norm": 4.788502181666632, + "learning_rate": 1.989793811142702e-05, + "loss": 1.1437, "step": 1576 }, { - "epoch": 0.33067729083665337, - "grad_norm": 5.9714112690123615, - "learning_rate": 1.9664753877849246e-05, - "loss": 2.3323, + "epoch": 0.2226772098277323, + "grad_norm": 4.89009601638008, + "learning_rate": 1.989772075137828e-05, + "loss": 1.1972, "step": 1577 }, { - "epoch": 0.33088697840218073, - "grad_norm": 6.231667801598009, - "learning_rate": 1.966417235682118e-05, - "loss": 2.2128, + "epoch": 0.22281841287771817, + "grad_norm": 4.49598540136094, + "learning_rate": 1.9897503161310786e-05, + "loss": 1.0385, "step": 1578 }, { - "epoch": 0.3310966659677081, - "grad_norm": 5.975659435429296, - "learning_rate": 1.9663590340487528e-05, - "loss": 2.4869, + "epoch": 0.22295961592770402, + "grad_norm": 4.122466128148814, + "learning_rate": 1.989728534122959e-05, + "loss": 0.9304, "step": 1579 }, { - "epoch": 0.33130635353323545, - "grad_norm": 5.926377874482364, - "learning_rate": 1.96630078288781e-05, - "loss": 2.5833, + "epoch": 0.2231008189776899, + "grad_norm": 4.769051863148613, + "learning_rate": 1.989706729113976e-05, + "loss": 1.238, "step": 1580 }, { - "epoch": 0.33151604109876287, - "grad_norm": 5.412963581577702, - "learning_rate": 1.9662424822022767e-05, - "loss": 2.3962, + "epoch": 0.2232420220276758, + "grad_norm": 3.9472027050939538, + "learning_rate": 1.9896849011046356e-05, + "loss": 1.0991, "step": 1581 }, { - "epoch": 0.33172572866429023, - "grad_norm": 5.351755265084626, - "learning_rate": 1.96618413199514e-05, - "loss": 2.2915, + "epoch": 0.22338322507766167, + "grad_norm": 4.293727963057834, + "learning_rate": 1.989663050095446e-05, + "loss": 1.0645, "step": 1582 }, { - "epoch": 0.3319354162298176, - "grad_norm": 5.840228175903158, - "learning_rate": 1.9661257322693908e-05, - "loss": 2.4686, + "epoch": 0.22352442812764756, + "grad_norm": 4.0812027797809565, + "learning_rate": 1.9896411760869142e-05, + "loss": 0.9768, "step": 1583 }, { - "epoch": 0.33214510379534495, - "grad_norm": 5.426385117783255, - "learning_rate": 1.9660672830280224e-05, - "loss": 2.2827, + "epoch": 0.22366563117763344, + "grad_norm": 4.286286461908829, + "learning_rate": 1.989619279079549e-05, + "loss": 0.9328, "step": 1584 }, { - "epoch": 0.3323547913608723, - "grad_norm": 5.506850952057615, - "learning_rate": 1.96600878427403e-05, - "loss": 2.4203, + "epoch": 0.22380683422761932, + "grad_norm": 5.974064484809082, + "learning_rate": 1.9895973590738592e-05, + "loss": 1.5244, "step": 1585 }, { - "epoch": 0.3325644789263997, - "grad_norm": 5.543616564479627, - "learning_rate": 1.965950236010412e-05, - "loss": 2.2861, + "epoch": 0.2239480372776052, + "grad_norm": 4.773150924543597, + "learning_rate": 1.989575416070354e-05, + "loss": 1.1058, "step": 1586 }, { - "epoch": 0.33277416649192704, - "grad_norm": 6.055319824117364, - "learning_rate": 1.965891638240169e-05, - "loss": 2.6901, + "epoch": 0.2240892403275911, + "grad_norm": 5.452686610528497, + "learning_rate": 1.9895534500695435e-05, + "loss": 1.1389, "step": 1587 }, { - "epoch": 0.3329838540574544, - "grad_norm": 5.658463143578527, - "learning_rate": 1.965832990966304e-05, - "loss": 2.3455, + "epoch": 0.22423044337757694, + "grad_norm": 4.381928584108818, + "learning_rate": 1.9895314610719382e-05, + "loss": 1.0225, "step": 1588 }, { - "epoch": 0.33319354162298176, - "grad_norm": 5.066372843347675, - "learning_rate": 1.9657742941918237e-05, - "loss": 2.2029, + "epoch": 0.22437164642756283, + "grad_norm": 4.13852941032075, + "learning_rate": 1.989509449078049e-05, + "loss": 1.0382, "step": 1589 }, { - "epoch": 0.3334032291885091, - "grad_norm": 6.328899485383506, - "learning_rate": 1.9657155479197356e-05, - "loss": 2.1475, + "epoch": 0.2245128494775487, + "grad_norm": 4.092587861445075, + "learning_rate": 1.9894874140883877e-05, + "loss": 1.0752, "step": 1590 }, { - "epoch": 0.3336129167540365, - "grad_norm": 5.843336215643689, - "learning_rate": 1.9656567521530502e-05, - "loss": 2.3222, + "epoch": 0.2246540525275346, + "grad_norm": 4.983711400258463, + "learning_rate": 1.9894653561034664e-05, + "loss": 1.1382, "step": 1591 }, { - "epoch": 0.33382260431956384, - "grad_norm": 5.828937114150826, - "learning_rate": 1.965597906894782e-05, - "loss": 2.5918, + "epoch": 0.22479525557752048, + "grad_norm": 4.87150613365761, + "learning_rate": 1.9894432751237974e-05, + "loss": 0.9787, "step": 1592 }, { - "epoch": 0.3340322918850912, - "grad_norm": 5.479724679731844, - "learning_rate": 1.965539012147946e-05, - "loss": 2.2339, + "epoch": 0.22493645862750636, + "grad_norm": 4.841579794591345, + "learning_rate": 1.9894211711498938e-05, + "loss": 1.3469, "step": 1593 }, { - "epoch": 0.33424197945061856, - "grad_norm": 5.479146255059593, - "learning_rate": 1.965480067915561e-05, - "loss": 2.2398, + "epoch": 0.22507766167749224, + "grad_norm": 4.070352136164378, + "learning_rate": 1.9893990441822698e-05, + "loss": 1.2348, "step": 1594 }, { - "epoch": 0.3344516670161459, - "grad_norm": 5.904441620670538, - "learning_rate": 1.9654210742006482e-05, - "loss": 2.295, + "epoch": 0.22521886472747812, + "grad_norm": 4.590809813785775, + "learning_rate": 1.989376894221439e-05, + "loss": 1.1557, "step": 1595 }, { - "epoch": 0.3346613545816733, - "grad_norm": 5.552254721346579, - "learning_rate": 1.9653620310062308e-05, - "loss": 2.1975, + "epoch": 0.225360067777464, + "grad_norm": 4.589861481282709, + "learning_rate": 1.9893547212679162e-05, + "loss": 1.238, "step": 1596 }, { - "epoch": 0.33487104214720065, - "grad_norm": 6.427397347049776, - "learning_rate": 1.965302938335335e-05, - "loss": 2.3856, + "epoch": 0.22550127082744986, + "grad_norm": 4.1176045795267004, + "learning_rate": 1.9893325253222177e-05, + "loss": 1.0737, "step": 1597 }, { - "epoch": 0.335080729712728, - "grad_norm": 6.5043489418639995, - "learning_rate": 1.9652437961909893e-05, - "loss": 2.6487, + "epoch": 0.22564247387743575, + "grad_norm": 4.7917550472321695, + "learning_rate": 1.989310306384858e-05, + "loss": 1.0873, "step": 1598 }, { - "epoch": 0.3352904172782554, - "grad_norm": 5.786232951460627, - "learning_rate": 1.965184604576225e-05, - "loss": 2.1514, + "epoch": 0.22578367692742163, + "grad_norm": 4.0297417752338225, + "learning_rate": 1.9892880644563544e-05, + "loss": 0.8758, "step": 1599 }, { - "epoch": 0.3355001048437828, - "grad_norm": 6.404225458094209, - "learning_rate": 1.9651253634940753e-05, - "loss": 2.3743, + "epoch": 0.2259248799774075, + "grad_norm": 4.9923761745667266, + "learning_rate": 1.9892657995372227e-05, + "loss": 1.2517, "step": 1600 }, { - "epoch": 0.33570979240931015, - "grad_norm": 4.906540186692006, - "learning_rate": 1.965066072947577e-05, - "loss": 2.0767, + "epoch": 0.2260660830273934, + "grad_norm": 4.0530308591510105, + "learning_rate": 1.989243511627982e-05, + "loss": 0.9146, "step": 1601 }, { - "epoch": 0.3359194799748375, - "grad_norm": 5.777920523296027, - "learning_rate": 1.965006732939769e-05, - "loss": 2.258, + "epoch": 0.22620728607737928, + "grad_norm": 4.706955425336845, + "learning_rate": 1.989221200729149e-05, + "loss": 1.1599, "step": 1602 }, { - "epoch": 0.33612916754036487, - "grad_norm": 4.858194864119895, - "learning_rate": 1.9649473434736917e-05, - "loss": 2.2744, + "epoch": 0.22634848912736516, + "grad_norm": 3.9181733669419794, + "learning_rate": 1.989198866841242e-05, + "loss": 1.0262, "step": 1603 }, { - "epoch": 0.33633885510589223, - "grad_norm": 5.016821726110959, - "learning_rate": 1.96488790455239e-05, - "loss": 2.2743, + "epoch": 0.22648969217735104, + "grad_norm": 4.251451376510779, + "learning_rate": 1.989176509964781e-05, + "loss": 0.9518, "step": 1604 }, { - "epoch": 0.3365485426714196, - "grad_norm": 5.27403008151518, - "learning_rate": 1.9648284161789096e-05, - "loss": 2.4679, + "epoch": 0.2266308952273369, + "grad_norm": 3.8329407864119522, + "learning_rate": 1.989154130100285e-05, + "loss": 0.8747, "step": 1605 }, { - "epoch": 0.33675823023694695, - "grad_norm": 5.670777104096143, - "learning_rate": 1.9647688783562992e-05, - "loss": 2.6915, + "epoch": 0.22677209827732278, + "grad_norm": 3.608231562736672, + "learning_rate": 1.9891317272482744e-05, + "loss": 0.9787, "step": 1606 }, { - "epoch": 0.3369679178024743, - "grad_norm": 5.513491182680519, - "learning_rate": 1.9647092910876102e-05, - "loss": 2.3359, + "epoch": 0.22691330132730866, + "grad_norm": 5.041225098011339, + "learning_rate": 1.9891093014092695e-05, + "loss": 1.1582, "step": 1607 }, { - "epoch": 0.3371776053680017, - "grad_norm": 6.182513180992043, - "learning_rate": 1.9646496543758972e-05, - "loss": 2.445, + "epoch": 0.22705450437729455, + "grad_norm": 3.7433228897161754, + "learning_rate": 1.9890868525837917e-05, + "loss": 0.8656, "step": 1608 }, { - "epoch": 0.33738729293352904, - "grad_norm": 5.876352155212737, - "learning_rate": 1.9645899682242164e-05, - "loss": 2.3224, + "epoch": 0.22719570742728043, + "grad_norm": 3.8263966088984698, + "learning_rate": 1.9890643807723622e-05, + "loss": 0.9359, "step": 1609 }, { - "epoch": 0.3375969804990564, - "grad_norm": 5.632639409916287, - "learning_rate": 1.9645302326356267e-05, - "loss": 2.0864, + "epoch": 0.2273369104772663, + "grad_norm": 4.840298786277609, + "learning_rate": 1.989041885975504e-05, + "loss": 1.1281, "step": 1610 }, { - "epoch": 0.33780666806458376, - "grad_norm": 5.104252825325276, - "learning_rate": 1.9644704476131896e-05, - "loss": 2.0801, + "epoch": 0.2274781135272522, + "grad_norm": 4.025610919817758, + "learning_rate": 1.9890193681937395e-05, + "loss": 1.0443, "step": 1611 }, { - "epoch": 0.3380163556301111, - "grad_norm": 5.1521356080030465, - "learning_rate": 1.964410613159969e-05, - "loss": 2.4843, + "epoch": 0.22761931657723808, + "grad_norm": 4.584359521097056, + "learning_rate": 1.9889968274275916e-05, + "loss": 1.1226, "step": 1612 }, { - "epoch": 0.3382260431956385, - "grad_norm": 5.960694466519882, - "learning_rate": 1.964350729279032e-05, - "loss": 2.0485, + "epoch": 0.22776051962722396, + "grad_norm": 4.203958313483456, + "learning_rate": 1.988974263677585e-05, + "loss": 1.0554, "step": 1613 }, { - "epoch": 0.33843573076116584, - "grad_norm": 6.05848934820254, - "learning_rate": 1.964290795973448e-05, - "loss": 1.9623, + "epoch": 0.22790172267720982, + "grad_norm": 5.48371195877179, + "learning_rate": 1.9889516769442436e-05, + "loss": 1.1697, "step": 1614 }, { - "epoch": 0.3386454183266932, - "grad_norm": 5.658683941597805, - "learning_rate": 1.9642308132462877e-05, - "loss": 2.3733, + "epoch": 0.2280429257271957, + "grad_norm": 4.438725634337092, + "learning_rate": 1.988929067228092e-05, + "loss": 1.008, "step": 1615 }, { - "epoch": 0.33885510589222056, - "grad_norm": 6.276085311129812, - "learning_rate": 1.964170781100626e-05, - "loss": 2.2151, + "epoch": 0.22818412877718158, + "grad_norm": 6.411832488371095, + "learning_rate": 1.9889064345296563e-05, + "loss": 1.3004, "step": 1616 }, { - "epoch": 0.339064793457748, - "grad_norm": 5.894701877814183, - "learning_rate": 1.9641106995395393e-05, - "loss": 2.1953, + "epoch": 0.22832533182716747, + "grad_norm": 4.642232756329534, + "learning_rate": 1.988883778849462e-05, + "loss": 1.1189, "step": 1617 }, { - "epoch": 0.33927448102327534, - "grad_norm": 5.2367358637664845, - "learning_rate": 1.964050568566107e-05, - "loss": 2.2236, + "epoch": 0.22846653487715335, + "grad_norm": 4.025352841483412, + "learning_rate": 1.9888611001880357e-05, + "loss": 0.9657, "step": 1618 }, { - "epoch": 0.3394841685888027, - "grad_norm": 5.361926510254171, - "learning_rate": 1.9639903881834115e-05, - "loss": 2.4229, + "epoch": 0.22860773792713923, + "grad_norm": 5.279854436286271, + "learning_rate": 1.9888383985459047e-05, + "loss": 1.1961, "step": 1619 }, { - "epoch": 0.33969385615433006, - "grad_norm": 6.009733127460258, - "learning_rate": 1.9639301583945364e-05, - "loss": 2.2764, + "epoch": 0.22874894097712511, + "grad_norm": 5.1586505585892555, + "learning_rate": 1.988815673923596e-05, + "loss": 1.2634, "step": 1620 }, { - "epoch": 0.3399035437198574, - "grad_norm": 6.501378521577173, - "learning_rate": 1.963869879202569e-05, - "loss": 2.5485, + "epoch": 0.228890144027111, + "grad_norm": 4.755881687895121, + "learning_rate": 1.9887929263216382e-05, + "loss": 1.1455, "step": 1621 }, { - "epoch": 0.3401132312853848, - "grad_norm": 4.934838676195025, - "learning_rate": 1.963809550610598e-05, - "loss": 2.4331, + "epoch": 0.22903134707709685, + "grad_norm": 4.239708742975153, + "learning_rate": 1.9887701557405598e-05, + "loss": 1.1488, "step": 1622 }, { - "epoch": 0.34032291885091215, - "grad_norm": 5.5369639586136445, - "learning_rate": 1.9637491726217167e-05, - "loss": 2.3229, + "epoch": 0.22917255012708274, + "grad_norm": 5.4964979493626, + "learning_rate": 1.9887473621808904e-05, + "loss": 1.4709, "step": 1623 }, { - "epoch": 0.3405326064164395, - "grad_norm": 5.38792022680214, - "learning_rate": 1.963688745239018e-05, - "loss": 2.2018, + "epoch": 0.22931375317706862, + "grad_norm": 4.752632069110938, + "learning_rate": 1.988724545643159e-05, + "loss": 1.2249, "step": 1624 }, { - "epoch": 0.34074229398196687, - "grad_norm": 6.27070047343671, - "learning_rate": 1.9636282684656e-05, - "loss": 1.7591, + "epoch": 0.2294549562270545, + "grad_norm": 5.763181318267491, + "learning_rate": 1.9887017061278962e-05, + "loss": 1.3164, "step": 1625 }, { - "epoch": 0.34095198154749423, - "grad_norm": 6.2044132393959055, - "learning_rate": 1.963567742304562e-05, - "loss": 2.1486, + "epoch": 0.22959615927704038, + "grad_norm": 3.8139962099498668, + "learning_rate": 1.9886788436356325e-05, + "loss": 0.7099, "step": 1626 }, { - "epoch": 0.3411616691130216, - "grad_norm": 6.279797936852069, - "learning_rate": 1.963507166759006e-05, - "loss": 2.2836, + "epoch": 0.22973736232702627, + "grad_norm": 4.27628324361042, + "learning_rate": 1.9886559581669e-05, + "loss": 1.0002, "step": 1627 }, { - "epoch": 0.34137135667854895, - "grad_norm": 6.451175365308464, - "learning_rate": 1.9634465418320364e-05, - "loss": 2.3515, + "epoch": 0.22987856537701215, + "grad_norm": 4.233865796128205, + "learning_rate": 1.9886330497222294e-05, + "loss": 1.0313, "step": 1628 }, { - "epoch": 0.3415810442440763, - "grad_norm": 5.752042339164163, - "learning_rate": 1.9633858675267605e-05, - "loss": 2.2526, + "epoch": 0.23001976842699803, + "grad_norm": 4.172323111604617, + "learning_rate": 1.988610118302154e-05, + "loss": 1.2652, "step": 1629 }, { - "epoch": 0.3417907318096037, - "grad_norm": 5.267816016992654, - "learning_rate": 1.963325143846288e-05, - "loss": 2.2267, + "epoch": 0.23016097147698392, + "grad_norm": 4.405271108499988, + "learning_rate": 1.988587163907206e-05, + "loss": 1.0866, "step": 1630 }, { - "epoch": 0.34200041937513104, - "grad_norm": 6.796672919437261, - "learning_rate": 1.9632643707937313e-05, - "loss": 2.2328, + "epoch": 0.23030217452696977, + "grad_norm": 4.4386995630694965, + "learning_rate": 1.9885641865379197e-05, + "loss": 1.0777, "step": 1631 }, { - "epoch": 0.3422101069406584, - "grad_norm": 5.569296422166354, - "learning_rate": 1.9632035483722048e-05, - "loss": 2.2812, + "epoch": 0.23044337757695565, + "grad_norm": 4.498919369634765, + "learning_rate": 1.9885411861948287e-05, + "loss": 1.1183, "step": 1632 }, { - "epoch": 0.34241979450618576, - "grad_norm": 5.833129645716108, - "learning_rate": 1.9631426765848255e-05, - "loss": 2.1111, + "epoch": 0.23058458062694154, + "grad_norm": 4.641683352066934, + "learning_rate": 1.988518162878467e-05, + "loss": 1.157, "step": 1633 }, { - "epoch": 0.3426294820717131, - "grad_norm": 6.869886053626056, - "learning_rate": 1.963081755434714e-05, - "loss": 2.336, + "epoch": 0.23072578367692742, + "grad_norm": 4.615818095742444, + "learning_rate": 1.9884951165893706e-05, + "loss": 1.0426, "step": 1634 }, { - "epoch": 0.34283916963724054, - "grad_norm": 5.2288693017899535, - "learning_rate": 1.963020784924992e-05, - "loss": 2.4505, + "epoch": 0.2308669867269133, + "grad_norm": 4.600505659773712, + "learning_rate": 1.9884720473280744e-05, + "loss": 0.9647, "step": 1635 }, { - "epoch": 0.3430488572027679, - "grad_norm": 5.5027633364292985, - "learning_rate": 1.9629597650587847e-05, - "loss": 2.0743, + "epoch": 0.2310081897768992, + "grad_norm": 4.539403666804867, + "learning_rate": 1.9884489550951146e-05, + "loss": 0.9634, "step": 1636 }, { - "epoch": 0.34325854476829526, - "grad_norm": 6.086821112740867, - "learning_rate": 1.962898695839219e-05, - "loss": 2.3118, + "epoch": 0.23114939282688507, + "grad_norm": 4.765487592744753, + "learning_rate": 1.988425839891028e-05, + "loss": 1.0634, "step": 1637 }, { - "epoch": 0.3434682323338226, - "grad_norm": 6.1338275972895, - "learning_rate": 1.9628375772694254e-05, - "loss": 2.1605, + "epoch": 0.23129059587687095, + "grad_norm": 4.808267275996351, + "learning_rate": 1.9884027017163515e-05, + "loss": 1.2106, "step": 1638 }, { - "epoch": 0.34367791989935, - "grad_norm": 5.734923905222298, - "learning_rate": 1.9627764093525353e-05, - "loss": 1.8931, + "epoch": 0.2314317989268568, + "grad_norm": 4.32328230949736, + "learning_rate": 1.9883795405716236e-05, + "loss": 0.9145, "step": 1639 }, { - "epoch": 0.34388760746487734, - "grad_norm": 5.403554791675531, - "learning_rate": 1.9627151920916853e-05, - "loss": 2.1866, + "epoch": 0.2315730019768427, + "grad_norm": 4.55498929764465, + "learning_rate": 1.9883563564573815e-05, + "loss": 1.1138, "step": 1640 }, { - "epoch": 0.3440972950304047, - "grad_norm": 8.125251051343719, - "learning_rate": 1.962653925490011e-05, - "loss": 2.146, + "epoch": 0.23171420502682857, + "grad_norm": 4.246380775676594, + "learning_rate": 1.9883331493741652e-05, + "loss": 0.9501, "step": 1641 }, { - "epoch": 0.34430698259593207, - "grad_norm": 7.3874474581574185, - "learning_rate": 1.962592609550654e-05, - "loss": 2.5429, + "epoch": 0.23185540807681446, + "grad_norm": 4.400183714381763, + "learning_rate": 1.9883099193225125e-05, + "loss": 1.0882, "step": 1642 }, { - "epoch": 0.3445166701614594, - "grad_norm": 7.161177938723063, - "learning_rate": 1.962531244276756e-05, - "loss": 2.3126, + "epoch": 0.23199661112680034, + "grad_norm": 4.30313156476879, + "learning_rate": 1.9882866663029645e-05, + "loss": 1.0138, "step": 1643 }, { - "epoch": 0.3447263577269868, - "grad_norm": 6.215777060934349, - "learning_rate": 1.9624698296714625e-05, - "loss": 2.3888, + "epoch": 0.23213781417678622, + "grad_norm": 4.42083778849144, + "learning_rate": 1.9882633903160612e-05, + "loss": 0.9531, "step": 1644 }, { - "epoch": 0.34493604529251415, - "grad_norm": 5.959337715393357, - "learning_rate": 1.962408365737921e-05, - "loss": 2.2543, + "epoch": 0.2322790172267721, + "grad_norm": 4.350775836451018, + "learning_rate": 1.9882400913623436e-05, + "loss": 0.9756, "step": 1645 }, { - "epoch": 0.3451457328580415, - "grad_norm": 5.303832614130179, - "learning_rate": 1.9623468524792814e-05, - "loss": 2.1992, + "epoch": 0.232420220276758, + "grad_norm": 4.484867879510931, + "learning_rate": 1.988216769442353e-05, + "loss": 0.9172, "step": 1646 }, { - "epoch": 0.34535542042356887, - "grad_norm": 6.033451549111891, - "learning_rate": 1.962285289898696e-05, - "loss": 2.3358, + "epoch": 0.23256142332674387, + "grad_norm": 4.517316634333912, + "learning_rate": 1.9881934245566313e-05, + "loss": 1.299, "step": 1647 }, { - "epoch": 0.34556510798909623, - "grad_norm": 5.392432382614776, - "learning_rate": 1.9622236779993212e-05, - "loss": 2.3739, + "epoch": 0.23270262637672973, + "grad_norm": 4.36812790086071, + "learning_rate": 1.9881700567057214e-05, + "loss": 0.9679, "step": 1648 }, { - "epoch": 0.3457747955546236, - "grad_norm": 5.608161651093695, - "learning_rate": 1.962162016784314e-05, - "loss": 2.199, + "epoch": 0.2328438294267156, + "grad_norm": 4.313217494495988, + "learning_rate": 1.9881466658901664e-05, + "loss": 1.1748, "step": 1649 }, { - "epoch": 0.34598448312015095, - "grad_norm": 6.80955398444917, - "learning_rate": 1.962100306256834e-05, - "loss": 2.6845, + "epoch": 0.2329850324767015, + "grad_norm": 4.920702194718632, + "learning_rate": 1.988123252110509e-05, + "loss": 1.1448, "step": 1650 }, { - "epoch": 0.3461941706856783, - "grad_norm": 5.650547318967569, - "learning_rate": 1.962038546420045e-05, - "loss": 2.2906, + "epoch": 0.23312623552668738, + "grad_norm": 4.162698486147186, + "learning_rate": 1.9880998153672945e-05, + "loss": 1.0029, "step": 1651 }, { - "epoch": 0.3464038582512057, - "grad_norm": 5.99594850177852, - "learning_rate": 1.961976737277112e-05, - "loss": 2.5299, + "epoch": 0.23326743857667326, + "grad_norm": 4.008862954302087, + "learning_rate": 1.9880763556610666e-05, + "loss": 1.0292, "step": 1652 }, { - "epoch": 0.3466135458167331, - "grad_norm": 6.101160751009999, - "learning_rate": 1.9619148788312027e-05, - "loss": 2.5141, + "epoch": 0.23340864162665914, + "grad_norm": 5.696606464318699, + "learning_rate": 1.988052872992371e-05, + "loss": 1.3239, "step": 1653 }, { - "epoch": 0.34682323338226045, - "grad_norm": 5.152095935492857, - "learning_rate": 1.9618529710854875e-05, - "loss": 2.2265, + "epoch": 0.23354984467664502, + "grad_norm": 5.6417345089900595, + "learning_rate": 1.988029367361753e-05, + "loss": 1.3931, "step": 1654 }, { - "epoch": 0.3470329209477878, - "grad_norm": 5.859880899841965, - "learning_rate": 1.9617910140431396e-05, - "loss": 2.4224, + "epoch": 0.2336910477266309, + "grad_norm": 4.903798270941525, + "learning_rate": 1.98800583876976e-05, + "loss": 1.3083, "step": 1655 }, { - "epoch": 0.3472426085133152, - "grad_norm": 6.79054628801436, - "learning_rate": 1.9617290077073336e-05, - "loss": 2.285, + "epoch": 0.23383225077661676, + "grad_norm": 4.8103839701358515, + "learning_rate": 1.9879822872169378e-05, + "loss": 1.0626, "step": 1656 }, { - "epoch": 0.34745229607884254, - "grad_norm": 5.308834508709281, - "learning_rate": 1.9616669520812482e-05, - "loss": 2.2054, + "epoch": 0.23397345382660265, + "grad_norm": 4.547335470661128, + "learning_rate": 1.9879587127038333e-05, + "loss": 1.1994, "step": 1657 }, { - "epoch": 0.3476619836443699, - "grad_norm": 5.640969058226901, - "learning_rate": 1.9616048471680632e-05, - "loss": 2.2392, + "epoch": 0.23411465687658853, + "grad_norm": 4.348026670633358, + "learning_rate": 1.9879351152309955e-05, + "loss": 1.0391, "step": 1658 }, { - "epoch": 0.34787167120989726, - "grad_norm": 6.039022430432571, - "learning_rate": 1.9615426929709624e-05, - "loss": 1.9644, + "epoch": 0.2342558599265744, + "grad_norm": 5.117209783414749, + "learning_rate": 1.9879114947989723e-05, + "loss": 1.1435, "step": 1659 }, { - "epoch": 0.3480813587754246, - "grad_norm": 5.330338928975169, - "learning_rate": 1.9614804894931307e-05, - "loss": 2.441, + "epoch": 0.2343970629765603, + "grad_norm": 4.165254928531248, + "learning_rate": 1.9878878514083124e-05, + "loss": 0.9047, "step": 1660 }, { - "epoch": 0.348291046340952, - "grad_norm": 6.3073108706206895, - "learning_rate": 1.9614182367377563e-05, - "loss": 2.2988, + "epoch": 0.23453826602654618, + "grad_norm": 4.505105868756046, + "learning_rate": 1.9878641850595658e-05, + "loss": 1.0744, "step": 1661 }, { - "epoch": 0.34850073390647934, - "grad_norm": 5.895407758624858, - "learning_rate": 1.96135593470803e-05, - "loss": 2.2281, + "epoch": 0.23467946907653206, + "grad_norm": 4.705833125224752, + "learning_rate": 1.9878404957532817e-05, + "loss": 1.0001, "step": 1662 }, { - "epoch": 0.3487104214720067, - "grad_norm": 5.956443281079859, - "learning_rate": 1.9612935834071445e-05, - "loss": 2.389, + "epoch": 0.23482067212651794, + "grad_norm": 5.032335874207705, + "learning_rate": 1.9878167834900114e-05, + "loss": 1.288, "step": 1663 }, { - "epoch": 0.34892010903753407, - "grad_norm": 5.774753742817088, - "learning_rate": 1.9612311828382954e-05, - "loss": 2.3765, + "epoch": 0.23496187517650383, + "grad_norm": 4.705290949900482, + "learning_rate": 1.9877930482703057e-05, + "loss": 1.1444, "step": 1664 }, { - "epoch": 0.3491297966030614, - "grad_norm": 5.124911428731447, - "learning_rate": 1.961168733004681e-05, - "loss": 1.8536, + "epoch": 0.23510307822648968, + "grad_norm": 4.939845541324191, + "learning_rate": 1.9877692900947156e-05, + "loss": 1.073, "step": 1665 }, { - "epoch": 0.3493394841685888, - "grad_norm": 5.29897050230378, - "learning_rate": 1.9611062339095023e-05, - "loss": 1.8147, + "epoch": 0.23524428127647556, + "grad_norm": 4.85716042244654, + "learning_rate": 1.9877455089637944e-05, + "loss": 1.2671, "step": 1666 }, { - "epoch": 0.34954917173411615, - "grad_norm": 5.205653865348943, - "learning_rate": 1.961043685555962e-05, - "loss": 2.2019, + "epoch": 0.23538548432646145, + "grad_norm": 4.500251023508907, + "learning_rate": 1.987721704878094e-05, + "loss": 1.2238, "step": 1667 }, { - "epoch": 0.3497588592996435, - "grad_norm": 5.213782365705236, - "learning_rate": 1.960981087947266e-05, - "loss": 2.1829, + "epoch": 0.23552668737644733, + "grad_norm": 4.242874695954041, + "learning_rate": 1.9876978778381675e-05, + "loss": 1.0523, "step": 1668 }, { - "epoch": 0.34996854686517087, - "grad_norm": 6.395047417916513, - "learning_rate": 1.9609184410866224e-05, - "loss": 2.1343, + "epoch": 0.2356678904264332, + "grad_norm": 3.8910482723160786, + "learning_rate": 1.987674027844569e-05, + "loss": 0.9617, "step": 1669 }, { - "epoch": 0.35017823443069823, - "grad_norm": 5.759269846466217, - "learning_rate": 1.960855744977242e-05, - "loss": 2.1201, + "epoch": 0.2358090934764191, + "grad_norm": 4.731797056617106, + "learning_rate": 1.9876501548978527e-05, + "loss": 1.0026, "step": 1670 }, { - "epoch": 0.35038792199622565, - "grad_norm": 5.866259648829047, - "learning_rate": 1.9607929996223383e-05, - "loss": 1.8925, + "epoch": 0.23595029652640498, + "grad_norm": 4.595267519435098, + "learning_rate": 1.9876262589985737e-05, + "loss": 1.0876, "step": 1671 }, { - "epoch": 0.350597609561753, - "grad_norm": 6.654200167466861, - "learning_rate": 1.960730205025127e-05, - "loss": 2.4153, + "epoch": 0.23609149957639086, + "grad_norm": 4.873601545974836, + "learning_rate": 1.9876023401472865e-05, + "loss": 1.037, "step": 1672 }, { - "epoch": 0.35080729712728037, - "grad_norm": 6.163470414170069, - "learning_rate": 1.9606673611888262e-05, - "loss": 2.1775, + "epoch": 0.23623270262637672, + "grad_norm": 4.307670544084782, + "learning_rate": 1.9875783983445473e-05, + "loss": 1.0749, "step": 1673 }, { - "epoch": 0.35101698469280773, - "grad_norm": 7.09991290533726, - "learning_rate": 1.960604468116657e-05, - "loss": 2.4434, + "epoch": 0.2363739056763626, + "grad_norm": 6.235705195087301, + "learning_rate": 1.987554433590913e-05, + "loss": 1.2804, "step": 1674 }, { - "epoch": 0.3512266722583351, - "grad_norm": 5.746313634499447, - "learning_rate": 1.960541525811843e-05, - "loss": 2.1227, + "epoch": 0.23651510872634848, + "grad_norm": 4.71858407063096, + "learning_rate": 1.98753044588694e-05, + "loss": 1.0622, "step": 1675 }, { - "epoch": 0.35143635982386245, - "grad_norm": 6.107570967975584, - "learning_rate": 1.9604785342776095e-05, - "loss": 2.1543, + "epoch": 0.23665631177633437, + "grad_norm": 3.793384657262079, + "learning_rate": 1.987506435233186e-05, + "loss": 0.9055, "step": 1676 }, { - "epoch": 0.3516460473893898, - "grad_norm": 5.410935218107208, - "learning_rate": 1.9604154935171854e-05, - "loss": 2.094, + "epoch": 0.23679751482632025, + "grad_norm": 4.338239967516377, + "learning_rate": 1.9874824016302088e-05, + "loss": 1.186, "step": 1677 }, { - "epoch": 0.3518557349549172, - "grad_norm": 5.167148370893116, - "learning_rate": 1.9603524035338017e-05, - "loss": 2.2303, + "epoch": 0.23693871787630613, + "grad_norm": 4.006051005718899, + "learning_rate": 1.987458345078567e-05, + "loss": 1.0295, "step": 1678 }, { - "epoch": 0.35206542252044454, - "grad_norm": 4.927149623795553, - "learning_rate": 1.9602892643306914e-05, - "loss": 2.2601, + "epoch": 0.23707992092629201, + "grad_norm": 4.066927362349002, + "learning_rate": 1.98743426557882e-05, + "loss": 0.9697, "step": 1679 }, { - "epoch": 0.3522751100859719, - "grad_norm": 5.499208224841035, - "learning_rate": 1.9602260759110905e-05, - "loss": 2.3447, + "epoch": 0.2372211239762779, + "grad_norm": 5.456678326874907, + "learning_rate": 1.9874101631315268e-05, + "loss": 1.2831, "step": 1680 }, { - "epoch": 0.35248479765149926, - "grad_norm": 7.292751486524627, - "learning_rate": 1.960162838278238e-05, - "loss": 2.3028, + "epoch": 0.23736232702626378, + "grad_norm": 4.1762344746772495, + "learning_rate": 1.987386037737248e-05, + "loss": 1.0886, "step": 1681 }, { - "epoch": 0.3526944852170266, - "grad_norm": 6.077661482205115, - "learning_rate": 1.9600995514353748e-05, - "loss": 2.1586, + "epoch": 0.23750353007624964, + "grad_norm": 4.117573342576986, + "learning_rate": 1.9873618893965442e-05, + "loss": 0.9458, "step": 1682 }, { - "epoch": 0.352904172782554, - "grad_norm": 5.872212024921919, - "learning_rate": 1.9600362153857445e-05, - "loss": 2.422, + "epoch": 0.23764473312623552, + "grad_norm": 5.3565714724989, + "learning_rate": 1.9873377181099763e-05, + "loss": 1.3172, "step": 1683 }, { - "epoch": 0.35311386034808134, - "grad_norm": 5.408163191936192, - "learning_rate": 1.959972830132593e-05, - "loss": 2.41, + "epoch": 0.2377859361762214, + "grad_norm": 3.849748866888073, + "learning_rate": 1.987313523878106e-05, + "loss": 0.8927, "step": 1684 }, { - "epoch": 0.3533235479136087, - "grad_norm": 6.5853438613257795, - "learning_rate": 1.9599093956791688e-05, - "loss": 1.9205, + "epoch": 0.23792713922620728, + "grad_norm": 4.451472552340125, + "learning_rate": 1.987289306701496e-05, + "loss": 1.2681, "step": 1685 }, { - "epoch": 0.35353323547913607, - "grad_norm": 5.25300984712405, - "learning_rate": 1.9598459120287234e-05, - "loss": 2.4163, + "epoch": 0.23806834227619317, + "grad_norm": 4.203497142817664, + "learning_rate": 1.987265066580709e-05, + "loss": 0.9937, "step": 1686 }, { - "epoch": 0.3537429230446634, - "grad_norm": 5.303118914330665, - "learning_rate": 1.9597823791845102e-05, - "loss": 2.2151, + "epoch": 0.23820954532617905, + "grad_norm": 4.9282752744568725, + "learning_rate": 1.9872408035163084e-05, + "loss": 1.1195, "step": 1687 }, { - "epoch": 0.35395261061019084, - "grad_norm": 6.041393593496545, - "learning_rate": 1.9597187971497853e-05, - "loss": 2.1429, + "epoch": 0.23835074837616493, + "grad_norm": 3.992380731046059, + "learning_rate": 1.9872165175088578e-05, + "loss": 1.0189, "step": 1688 }, { - "epoch": 0.3541622981757182, - "grad_norm": 5.241675286667038, - "learning_rate": 1.9596551659278076e-05, - "loss": 2.1497, + "epoch": 0.23849195142615082, + "grad_norm": 4.098839998121916, + "learning_rate": 1.9871922085589215e-05, + "loss": 1.0931, "step": 1689 }, { - "epoch": 0.35437198574124557, - "grad_norm": 5.804696079211872, - "learning_rate": 1.959591485521838e-05, - "loss": 2.3128, + "epoch": 0.23863315447613667, + "grad_norm": 5.208312723738809, + "learning_rate": 1.9871678766670647e-05, + "loss": 1.4505, "step": 1690 }, { - "epoch": 0.3545816733067729, - "grad_norm": 5.80782204025035, - "learning_rate": 1.959527755935141e-05, - "loss": 2.2326, + "epoch": 0.23877435752612255, + "grad_norm": 4.745912837766724, + "learning_rate": 1.9871435218338527e-05, + "loss": 1.2483, "step": 1691 }, { - "epoch": 0.3547913608723003, - "grad_norm": 7.3015953273941365, - "learning_rate": 1.9594639771709814e-05, - "loss": 2.3307, + "epoch": 0.23891556057610844, + "grad_norm": 4.033888158834617, + "learning_rate": 1.9871191440598515e-05, + "loss": 1.1336, "step": 1692 }, { - "epoch": 0.35500104843782765, - "grad_norm": 5.9833597325486, - "learning_rate": 1.9594001492326295e-05, - "loss": 2.1632, + "epoch": 0.23905676362609432, + "grad_norm": 3.891479233555438, + "learning_rate": 1.9870947433456278e-05, + "loss": 1.0295, "step": 1693 }, { - "epoch": 0.355210736003355, - "grad_norm": 6.581294966020599, - "learning_rate": 1.959336272123356e-05, - "loss": 2.3893, + "epoch": 0.2391979666760802, + "grad_norm": 5.141893524541058, + "learning_rate": 1.9870703196917485e-05, + "loss": 1.2484, "step": 1694 }, { - "epoch": 0.35542042356888237, - "grad_norm": 5.420440184064418, - "learning_rate": 1.9592723458464344e-05, - "loss": 2.1637, + "epoch": 0.2393391697260661, + "grad_norm": 4.203809566505391, + "learning_rate": 1.9870458730987815e-05, + "loss": 1.0601, "step": 1695 }, { - "epoch": 0.35563011113440973, - "grad_norm": 5.160742435019318, - "learning_rate": 1.9592083704051416e-05, - "loss": 2.2996, + "epoch": 0.23948037277605197, + "grad_norm": 5.2456508956128, + "learning_rate": 1.9870214035672945e-05, + "loss": 1.1457, "step": 1696 }, { - "epoch": 0.3558397986999371, - "grad_norm": 6.076157237108552, - "learning_rate": 1.9591443458027558e-05, - "loss": 1.9313, + "epoch": 0.23962157582603785, + "grad_norm": 4.552540297619828, + "learning_rate": 1.986996911097856e-05, + "loss": 0.9454, "step": 1697 }, { - "epoch": 0.35604948626546445, - "grad_norm": 5.434482143608857, - "learning_rate": 1.959080272042559e-05, - "loss": 2.4027, + "epoch": 0.23976277887602374, + "grad_norm": 4.224538218510822, + "learning_rate": 1.986972395691036e-05, + "loss": 1.1234, "step": 1698 }, { - "epoch": 0.3562591738309918, - "grad_norm": 5.3882051056958336, - "learning_rate": 1.9590161491278345e-05, - "loss": 2.2472, + "epoch": 0.2399039819260096, + "grad_norm": 3.833203831361616, + "learning_rate": 1.9869478573474038e-05, + "loss": 0.877, "step": 1699 }, { - "epoch": 0.3564688613965192, - "grad_norm": 6.249506551515541, - "learning_rate": 1.958951977061869e-05, - "loss": 2.2795, + "epoch": 0.24004518497599547, + "grad_norm": 5.052699288939831, + "learning_rate": 1.9869232960675292e-05, + "loss": 1.4346, "step": 1700 }, { - "epoch": 0.35667854896204654, - "grad_norm": 6.888026663964633, - "learning_rate": 1.9588877558479517e-05, - "loss": 2.4719, + "epoch": 0.24018638802598136, + "grad_norm": 4.035177596131794, + "learning_rate": 1.986898711851984e-05, + "loss": 1.0472, "step": 1701 }, { - "epoch": 0.3568882365275739, - "grad_norm": 5.342502986329096, - "learning_rate": 1.958823485489374e-05, - "loss": 2.3874, + "epoch": 0.24032759107596724, + "grad_norm": 4.7608775188113475, + "learning_rate": 1.9868741047013382e-05, + "loss": 1.2139, "step": 1702 }, { - "epoch": 0.35709792409310126, - "grad_norm": 6.564646797762203, - "learning_rate": 1.9587591659894293e-05, - "loss": 2.1616, + "epoch": 0.24046879412595312, + "grad_norm": 3.7414722798428137, + "learning_rate": 1.9868494746161652e-05, + "loss": 0.885, "step": 1703 }, { - "epoch": 0.3573076116586286, - "grad_norm": 5.3817900141911315, - "learning_rate": 1.958694797351415e-05, - "loss": 2.4482, + "epoch": 0.240609997175939, + "grad_norm": 3.4924063060126103, + "learning_rate": 1.986824821597036e-05, + "loss": 0.8345, "step": 1704 }, { - "epoch": 0.357517299224156, - "grad_norm": 5.810909851341291, - "learning_rate": 1.958630379578629e-05, - "loss": 2.1835, + "epoch": 0.2407512002259249, + "grad_norm": 4.422179777304984, + "learning_rate": 1.986800145644524e-05, + "loss": 1.0534, "step": 1705 }, { - "epoch": 0.3577269867896834, - "grad_norm": 5.069083654327974, - "learning_rate": 1.9585659126743734e-05, - "loss": 2.205, + "epoch": 0.24089240327591077, + "grad_norm": 4.466169029559722, + "learning_rate": 1.9867754467592037e-05, + "loss": 1.0033, "step": 1706 }, { - "epoch": 0.35793667435521076, - "grad_norm": 5.678218097735782, - "learning_rate": 1.9585013966419522e-05, - "loss": 2.3414, + "epoch": 0.24103360632589663, + "grad_norm": 5.039255682778423, + "learning_rate": 1.9867507249416476e-05, + "loss": 1.4238, "step": 1707 }, { - "epoch": 0.3581463619207381, - "grad_norm": 5.932166825456759, - "learning_rate": 1.9584368314846725e-05, - "loss": 2.4403, + "epoch": 0.2411748093758825, + "grad_norm": 4.281473088255995, + "learning_rate": 1.9867259801924306e-05, + "loss": 1.2614, "step": 1708 }, { - "epoch": 0.3583560494862655, - "grad_norm": 5.200048716721955, - "learning_rate": 1.9583722172058426e-05, - "loss": 2.2058, + "epoch": 0.2413160124258684, + "grad_norm": 4.5342909187472875, + "learning_rate": 1.9867012125121282e-05, + "loss": 1.0153, "step": 1709 }, { - "epoch": 0.35856573705179284, - "grad_norm": 5.605274255382252, - "learning_rate": 1.958307553808774e-05, - "loss": 2.2684, + "epoch": 0.24145721547585428, + "grad_norm": 5.086330216663602, + "learning_rate": 1.9866764219013154e-05, + "loss": 1.2414, "step": 1710 }, { - "epoch": 0.3587754246173202, - "grad_norm": 5.050702431116143, - "learning_rate": 1.9582428412967814e-05, - "loss": 2.1875, + "epoch": 0.24159841852584016, + "grad_norm": 4.678333873855341, + "learning_rate": 1.9866516083605693e-05, + "loss": 1.0939, "step": 1711 }, { - "epoch": 0.35898511218284757, - "grad_norm": 5.922291855023995, - "learning_rate": 1.9581780796731812e-05, - "loss": 2.2152, + "epoch": 0.24173962157582604, + "grad_norm": 5.202446602126027, + "learning_rate": 1.9866267718904655e-05, + "loss": 1.2421, "step": 1712 }, { - "epoch": 0.3591947997483749, - "grad_norm": 5.280457626243012, - "learning_rate": 1.958113268941293e-05, - "loss": 2.2874, + "epoch": 0.24188082462581192, + "grad_norm": 4.714916813295156, + "learning_rate": 1.986601912491582e-05, + "loss": 1.2287, "step": 1713 }, { - "epoch": 0.3594044873139023, - "grad_norm": 5.881500523228601, - "learning_rate": 1.9580484091044373e-05, - "loss": 2.1145, + "epoch": 0.2420220276757978, + "grad_norm": 4.144168509639218, + "learning_rate": 1.9865770301644956e-05, + "loss": 1.1826, "step": 1714 }, { - "epoch": 0.35961417487942965, - "grad_norm": 6.0607582480168105, - "learning_rate": 1.9579835001659396e-05, - "loss": 2.3703, + "epoch": 0.2421632307257837, + "grad_norm": 4.5366926271985735, + "learning_rate": 1.9865521249097854e-05, + "loss": 1.0736, "step": 1715 }, { - "epoch": 0.359823862444957, - "grad_norm": 6.263590217554079, - "learning_rate": 1.9579185421291257e-05, - "loss": 2.3474, + "epoch": 0.24230443377576955, + "grad_norm": 5.142197600056926, + "learning_rate": 1.9865271967280297e-05, + "loss": 1.2572, "step": 1716 }, { - "epoch": 0.36003355001048437, - "grad_norm": 5.587184617403286, - "learning_rate": 1.957853534997325e-05, - "loss": 2.573, + "epoch": 0.24244563682575543, + "grad_norm": 4.238112032002369, + "learning_rate": 1.986502245619808e-05, + "loss": 0.9562, "step": 1717 }, { - "epoch": 0.36024323757601173, - "grad_norm": 6.004519742100833, - "learning_rate": 1.9577884787738693e-05, - "loss": 2.3938, + "epoch": 0.2425868398757413, + "grad_norm": 4.600405060786195, + "learning_rate": 1.9864772715857e-05, + "loss": 1.2361, "step": 1718 }, { - "epoch": 0.3604529251415391, - "grad_norm": 6.138337337081805, - "learning_rate": 1.9577233734620928e-05, - "loss": 2.4345, + "epoch": 0.2427280429257272, + "grad_norm": 4.5580871962843315, + "learning_rate": 1.9864522746262867e-05, + "loss": 0.9631, "step": 1719 }, { - "epoch": 0.36066261270706645, - "grad_norm": 5.827310281471527, - "learning_rate": 1.9576582190653326e-05, - "loss": 2.5692, + "epoch": 0.24286924597571308, + "grad_norm": 4.192100401700932, + "learning_rate": 1.9864272547421482e-05, + "loss": 1.1597, "step": 1720 }, { - "epoch": 0.3608723002725938, - "grad_norm": 6.211644111480424, - "learning_rate": 1.9575930155869277e-05, - "loss": 2.4837, + "epoch": 0.24301044902569896, + "grad_norm": 4.794180102186992, + "learning_rate": 1.9864022119338667e-05, + "loss": 1.1438, "step": 1721 }, { - "epoch": 0.3610819878381212, - "grad_norm": 5.599209143469495, - "learning_rate": 1.9575277630302193e-05, - "loss": 2.4039, + "epoch": 0.24315165207568484, + "grad_norm": 4.558330996321931, + "learning_rate": 1.9863771462020235e-05, + "loss": 1.0424, "step": 1722 }, { - "epoch": 0.36129167540364854, - "grad_norm": 4.959977386523943, - "learning_rate": 1.957462461398553e-05, - "loss": 2.2625, + "epoch": 0.24329285512567073, + "grad_norm": 5.314968533501325, + "learning_rate": 1.9863520575472014e-05, + "loss": 1.5042, "step": 1723 }, { - "epoch": 0.36150136296917595, - "grad_norm": 4.891211376289939, - "learning_rate": 1.9573971106952746e-05, - "loss": 1.9175, + "epoch": 0.24343405817565658, + "grad_norm": 4.028263697608648, + "learning_rate": 1.9863269459699836e-05, + "loss": 1.1039, "step": 1724 }, { - "epoch": 0.3617110505347033, - "grad_norm": 6.065117789132127, - "learning_rate": 1.9573317109237337e-05, - "loss": 2.2494, + "epoch": 0.24357526122564246, + "grad_norm": 4.240398429772271, + "learning_rate": 1.9863018114709534e-05, + "loss": 1.3008, "step": 1725 }, { - "epoch": 0.3619207381002307, - "grad_norm": 6.049693301932738, - "learning_rate": 1.9572662620872825e-05, - "loss": 2.5247, + "epoch": 0.24371646427562835, + "grad_norm": 6.089845837229606, + "learning_rate": 1.986276654050695e-05, + "loss": 1.222, "step": 1726 }, { - "epoch": 0.36213042566575804, - "grad_norm": 5.701447641767417, - "learning_rate": 1.957200764189275e-05, - "loss": 2.2293, + "epoch": 0.24385766732561423, + "grad_norm": 4.273309056232436, + "learning_rate": 1.986251473709793e-05, + "loss": 1.0969, "step": 1727 }, { - "epoch": 0.3623401132312854, - "grad_norm": 5.413194695023114, - "learning_rate": 1.957135217233068e-05, - "loss": 2.1644, + "epoch": 0.2439988703756001, + "grad_norm": 4.252715119148133, + "learning_rate": 1.986226270448833e-05, + "loss": 1.1508, "step": 1728 }, { - "epoch": 0.36254980079681276, - "grad_norm": 5.54205890665771, - "learning_rate": 1.957069621222021e-05, - "loss": 2.1572, + "epoch": 0.244140073425586, + "grad_norm": 4.1978920833157725, + "learning_rate": 1.9862010442684004e-05, + "loss": 0.9508, "step": 1729 }, { - "epoch": 0.3627594883623401, - "grad_norm": 5.369699570129372, - "learning_rate": 1.957003976159496e-05, - "loss": 2.2254, + "epoch": 0.24428127647557188, + "grad_norm": 4.870562332202742, + "learning_rate": 1.9861757951690813e-05, + "loss": 1.099, "step": 1730 }, { - "epoch": 0.3629691759278675, - "grad_norm": 5.606180579482364, - "learning_rate": 1.9569382820488575e-05, - "loss": 2.5863, + "epoch": 0.24442247952555776, + "grad_norm": 3.929110816382615, + "learning_rate": 1.9861505231514626e-05, + "loss": 0.9672, "step": 1731 }, { - "epoch": 0.36317886349339484, - "grad_norm": 5.983595416644938, - "learning_rate": 1.9568725388934723e-05, - "loss": 2.284, + "epoch": 0.24456368257554364, + "grad_norm": 4.5533792958763915, + "learning_rate": 1.9861252282161313e-05, + "loss": 1.1598, "step": 1732 }, { - "epoch": 0.3633885510589222, - "grad_norm": 5.60807875619355, - "learning_rate": 1.95680674669671e-05, - "loss": 2.2106, + "epoch": 0.2447048856255295, + "grad_norm": 5.061786705193654, + "learning_rate": 1.986099910363676e-05, + "loss": 1.21, "step": 1733 }, { - "epoch": 0.36359823862444957, - "grad_norm": 7.6127692459497265, - "learning_rate": 1.956740905461942e-05, - "loss": 2.5548, + "epoch": 0.24484608867551538, + "grad_norm": 4.199344169831758, + "learning_rate": 1.9860745695946848e-05, + "loss": 1.0084, "step": 1734 }, { - "epoch": 0.3638079261899769, - "grad_norm": 5.636538864772774, - "learning_rate": 1.9566750151925436e-05, - "loss": 2.3459, + "epoch": 0.24498729172550127, + "grad_norm": 5.0954181604947735, + "learning_rate": 1.986049205909746e-05, + "loss": 1.3114, "step": 1735 }, { - "epoch": 0.3640176137555043, - "grad_norm": 5.933719806951929, - "learning_rate": 1.9566090758918913e-05, - "loss": 2.3465, + "epoch": 0.24512849477548715, + "grad_norm": 4.367454921385469, + "learning_rate": 1.9860238193094497e-05, + "loss": 1.1882, "step": 1736 }, { - "epoch": 0.36422730132103165, - "grad_norm": 6.256097942666154, - "learning_rate": 1.9565430875633648e-05, - "loss": 2.3948, + "epoch": 0.24526969782547303, + "grad_norm": 4.005041540695415, + "learning_rate": 1.9859984097943855e-05, + "loss": 1.0128, "step": 1737 }, { - "epoch": 0.364436988886559, - "grad_norm": 5.66321593403629, - "learning_rate": 1.956477050210346e-05, - "loss": 2.1085, + "epoch": 0.24541090087545891, + "grad_norm": 5.14873948961256, + "learning_rate": 1.9859729773651446e-05, + "loss": 1.2109, "step": 1738 }, { - "epoch": 0.36464667645208637, - "grad_norm": 5.2921111983457445, - "learning_rate": 1.956410963836219e-05, - "loss": 2.2224, + "epoch": 0.2455521039254448, + "grad_norm": 4.367870147356684, + "learning_rate": 1.985947522022317e-05, + "loss": 1.0385, "step": 1739 }, { - "epoch": 0.36485636401761373, - "grad_norm": 5.980122971396161, - "learning_rate": 1.9563448284443717e-05, - "loss": 2.2052, + "epoch": 0.24569330697543068, + "grad_norm": 4.309987246038187, + "learning_rate": 1.985922043766495e-05, + "loss": 1.16, "step": 1740 }, { - "epoch": 0.3650660515831411, - "grad_norm": 5.1069635358808565, - "learning_rate": 1.9562786440381933e-05, - "loss": 2.2934, + "epoch": 0.24583451002541654, + "grad_norm": 4.943393901506551, + "learning_rate": 1.9858965425982703e-05, + "loss": 1.1889, "step": 1741 }, { - "epoch": 0.3652757391486685, - "grad_norm": 5.318715414569083, - "learning_rate": 1.9562124106210758e-05, - "loss": 2.2583, + "epoch": 0.24597571307540242, + "grad_norm": 4.232150717719606, + "learning_rate": 1.985871018518236e-05, + "loss": 1.2098, "step": 1742 }, { - "epoch": 0.36548542671419587, - "grad_norm": 5.721554311240375, - "learning_rate": 1.9561461281964135e-05, - "loss": 2.3655, + "epoch": 0.2461169161253883, + "grad_norm": 4.994145106063239, + "learning_rate": 1.985845471526985e-05, + "loss": 1.3068, "step": 1743 }, { - "epoch": 0.36569511427972323, - "grad_norm": 5.73652509627809, - "learning_rate": 1.956079796767604e-05, - "loss": 2.304, + "epoch": 0.24625811917537418, + "grad_norm": 5.020111169772371, + "learning_rate": 1.9858199016251106e-05, + "loss": 1.1494, "step": 1744 }, { - "epoch": 0.3659048018452506, - "grad_norm": 5.368147113544979, - "learning_rate": 1.9560134163380467e-05, - "loss": 2.4203, + "epoch": 0.24639932222536007, + "grad_norm": 4.528137671511692, + "learning_rate": 1.985794308813208e-05, + "loss": 1.0523, "step": 1745 }, { - "epoch": 0.36611448941077795, - "grad_norm": 5.272285917075877, - "learning_rate": 1.9559469869111437e-05, - "loss": 2.0666, + "epoch": 0.24654052527534595, + "grad_norm": 4.141333717613249, + "learning_rate": 1.985768693091871e-05, + "loss": 0.9969, "step": 1746 }, { - "epoch": 0.3663241769763053, - "grad_norm": 4.8188749260014365, - "learning_rate": 1.9558805084902995e-05, - "loss": 2.2457, + "epoch": 0.24668172832533183, + "grad_norm": 3.5676800497331267, + "learning_rate": 1.9857430544616953e-05, + "loss": 0.9079, "step": 1747 }, { - "epoch": 0.3665338645418327, - "grad_norm": 6.257958190807909, - "learning_rate": 1.9558139810789216e-05, - "loss": 2.4562, + "epoch": 0.24682293137531772, + "grad_norm": 4.316703458443119, + "learning_rate": 1.9857173929232768e-05, + "loss": 1.0516, "step": 1748 }, { - "epoch": 0.36674355210736004, - "grad_norm": 5.104900912262871, - "learning_rate": 1.9557474046804194e-05, - "loss": 2.1576, + "epoch": 0.2469641344253036, + "grad_norm": 4.258493156549214, + "learning_rate": 1.9856917084772117e-05, + "loss": 1.1117, "step": 1749 }, { - "epoch": 0.3669532396728874, - "grad_norm": 6.226248081374414, - "learning_rate": 1.9556807792982045e-05, - "loss": 2.3186, + "epoch": 0.24710533747528945, + "grad_norm": 4.140218259882781, + "learning_rate": 1.985666001124097e-05, + "loss": 1.1266, "step": 1750 }, { - "epoch": 0.36716292723841476, - "grad_norm": 5.040256406768721, - "learning_rate": 1.9556141049356924e-05, - "loss": 2.0172, + "epoch": 0.24724654052527534, + "grad_norm": 4.21720054668738, + "learning_rate": 1.9856402708645305e-05, + "loss": 0.9311, "step": 1751 }, { - "epoch": 0.3673726148039421, - "grad_norm": 5.1157351849864865, - "learning_rate": 1.9555473815963002e-05, - "loss": 2.325, + "epoch": 0.24738774357526122, + "grad_norm": 5.593212202313464, + "learning_rate": 1.9856145176991093e-05, + "loss": 1.4965, "step": 1752 }, { - "epoch": 0.3675823023694695, - "grad_norm": 5.085508325886163, - "learning_rate": 1.9554806092834473e-05, - "loss": 2.3395, + "epoch": 0.2475289466252471, + "grad_norm": 4.057438177436997, + "learning_rate": 1.9855887416284325e-05, + "loss": 1.1052, "step": 1753 }, { - "epoch": 0.36779198993499684, - "grad_norm": 5.8794078042652185, - "learning_rate": 1.955413788000556e-05, - "loss": 2.4353, + "epoch": 0.247670149675233, + "grad_norm": 4.356891711290078, + "learning_rate": 1.9855629426530992e-05, + "loss": 1.0993, "step": 1754 }, { - "epoch": 0.3680016775005242, - "grad_norm": 6.237286463140929, - "learning_rate": 1.955346917751051e-05, - "loss": 2.335, + "epoch": 0.24781135272521887, + "grad_norm": 4.342036746806214, + "learning_rate": 1.9855371207737084e-05, + "loss": 1.0341, "step": 1755 }, { - "epoch": 0.36821136506605157, - "grad_norm": 5.882972795049092, - "learning_rate": 1.9552799985383593e-05, - "loss": 2.4365, + "epoch": 0.24795255577520475, + "grad_norm": 4.641989901468239, + "learning_rate": 1.9855112759908607e-05, + "loss": 1.0026, "step": 1756 }, { - "epoch": 0.3684210526315789, - "grad_norm": 5.5896404710564465, - "learning_rate": 1.955213030365911e-05, - "loss": 2.1801, + "epoch": 0.24809375882519064, + "grad_norm": 4.5300387958261386, + "learning_rate": 1.9854854083051563e-05, + "loss": 1.1155, "step": 1757 }, { - "epoch": 0.3686307401971063, - "grad_norm": 5.111875555213847, - "learning_rate": 1.955146013237138e-05, - "loss": 2.3078, + "epoch": 0.2482349618751765, + "grad_norm": 3.277970357469273, + "learning_rate": 1.9854595177171968e-05, + "loss": 0.923, "step": 1758 }, { - "epoch": 0.36884042776263365, - "grad_norm": 5.421058354005871, - "learning_rate": 1.9550789471554754e-05, - "loss": 2.3057, + "epoch": 0.24837616492516237, + "grad_norm": 3.7674849049875077, + "learning_rate": 1.985433604227584e-05, + "loss": 0.9485, "step": 1759 }, { - "epoch": 0.36905011532816107, - "grad_norm": 5.821862040099351, - "learning_rate": 1.95501183212436e-05, - "loss": 2.4892, + "epoch": 0.24851736797514826, + "grad_norm": 5.019852036579426, + "learning_rate": 1.9854076678369197e-05, + "loss": 1.0545, "step": 1760 }, { - "epoch": 0.3692598028936884, - "grad_norm": 6.018041114057587, - "learning_rate": 1.9549446681472323e-05, - "loss": 2.0957, + "epoch": 0.24865857102513414, + "grad_norm": 4.313766028592151, + "learning_rate": 1.9853817085458065e-05, + "loss": 1.0266, "step": 1761 }, { - "epoch": 0.3694694904592158, - "grad_norm": 5.52649957626701, - "learning_rate": 1.9548774552275335e-05, - "loss": 2.3012, + "epoch": 0.24879977407512002, + "grad_norm": 4.254151271200883, + "learning_rate": 1.985355726354848e-05, + "loss": 1.1362, "step": 1762 }, { - "epoch": 0.36967917802474315, - "grad_norm": 5.614782683905725, - "learning_rate": 1.9548101933687094e-05, - "loss": 2.3079, + "epoch": 0.2489409771251059, + "grad_norm": 4.542462728662068, + "learning_rate": 1.985329721264648e-05, + "loss": 1.0829, "step": 1763 }, { - "epoch": 0.3698888655902705, - "grad_norm": 5.84946680341017, - "learning_rate": 1.9547428825742064e-05, - "loss": 2.3786, + "epoch": 0.2490821801750918, + "grad_norm": 4.43666385220854, + "learning_rate": 1.985303693275811e-05, + "loss": 1.1686, "step": 1764 }, { - "epoch": 0.37009855315579787, - "grad_norm": 5.44972348490551, - "learning_rate": 1.954675522847475e-05, - "loss": 2.031, + "epoch": 0.24922338322507767, + "grad_norm": 4.442954744062333, + "learning_rate": 1.9852776423889414e-05, + "loss": 1.2025, "step": 1765 }, { - "epoch": 0.37030824072132523, - "grad_norm": 5.388306391552122, - "learning_rate": 1.9546081141919676e-05, - "loss": 2.1767, + "epoch": 0.24936458627506355, + "grad_norm": 4.306206916782786, + "learning_rate": 1.9852515686046453e-05, + "loss": 1.0653, "step": 1766 }, { - "epoch": 0.3705179282868526, - "grad_norm": 5.859141628704206, - "learning_rate": 1.954540656611138e-05, - "loss": 2.2229, + "epoch": 0.2495057893250494, + "grad_norm": 4.4121707619056405, + "learning_rate": 1.9852254719235276e-05, + "loss": 1.0743, "step": 1767 }, { - "epoch": 0.37072761585237995, - "grad_norm": 5.864720920219384, - "learning_rate": 1.9544731501084445e-05, - "loss": 2.1253, + "epoch": 0.2496469923750353, + "grad_norm": 8.826121169885152, + "learning_rate": 1.985199352346196e-05, + "loss": 0.8463, "step": 1768 }, { - "epoch": 0.3709373034179073, - "grad_norm": 5.604754448946263, - "learning_rate": 1.954405594687347e-05, - "loss": 2.5683, + "epoch": 0.24978819542502118, + "grad_norm": 5.423001436827784, + "learning_rate": 1.9851732098732565e-05, + "loss": 1.1477, "step": 1769 }, { - "epoch": 0.3711469909834347, - "grad_norm": 6.398507553923447, - "learning_rate": 1.954337990351307e-05, - "loss": 2.4562, + "epoch": 0.24992939847500706, + "grad_norm": 4.349952410355591, + "learning_rate": 1.9851470445053173e-05, + "loss": 1.0024, "step": 1770 }, { - "epoch": 0.37135667854896204, - "grad_norm": 5.855825785013247, - "learning_rate": 1.95427033710379e-05, - "loss": 2.3962, + "epoch": 0.25007060152499294, + "grad_norm": 3.952842332133127, + "learning_rate": 1.9851208562429863e-05, + "loss": 1.1046, "step": 1771 }, { - "epoch": 0.3715663661144894, - "grad_norm": 5.4992735136383235, - "learning_rate": 1.954202634948263e-05, - "loss": 2.5828, + "epoch": 0.2502118045749788, + "grad_norm": 4.483581229400321, + "learning_rate": 1.985094645086872e-05, + "loss": 1.0459, "step": 1772 }, { - "epoch": 0.37177605368001676, - "grad_norm": 5.266256005586283, - "learning_rate": 1.954134883888196e-05, - "loss": 2.4694, + "epoch": 0.2503530076249647, + "grad_norm": 4.336622748908567, + "learning_rate": 1.9850684110375836e-05, + "loss": 0.9991, "step": 1773 }, { - "epoch": 0.3719857412455441, - "grad_norm": 5.803277768654528, - "learning_rate": 1.9540670839270614e-05, - "loss": 2.2878, + "epoch": 0.2504942106749506, + "grad_norm": 4.805756904086518, + "learning_rate": 1.9850421540957307e-05, + "loss": 1.0262, "step": 1774 }, { - "epoch": 0.3721954288110715, - "grad_norm": 6.420229155901658, - "learning_rate": 1.9539992350683337e-05, - "loss": 2.359, + "epoch": 0.2506354137249365, + "grad_norm": 4.158873471280884, + "learning_rate": 1.9850158742619233e-05, + "loss": 1.1686, "step": 1775 }, { - "epoch": 0.37240511637659884, - "grad_norm": 5.946736918596325, - "learning_rate": 1.9539313373154907e-05, - "loss": 2.2908, + "epoch": 0.25077661677492236, + "grad_norm": 3.8921747621044975, + "learning_rate": 1.9849895715367728e-05, + "loss": 1.007, "step": 1776 }, { - "epoch": 0.3726148039421262, - "grad_norm": 5.897266705001163, - "learning_rate": 1.9538633906720123e-05, - "loss": 2.186, + "epoch": 0.25091781982490824, + "grad_norm": 5.009127888899049, + "learning_rate": 1.9849632459208895e-05, + "loss": 1.2482, "step": 1777 }, { - "epoch": 0.3728244915076536, - "grad_norm": 6.227492316729951, - "learning_rate": 1.9537953951413806e-05, - "loss": 2.3604, + "epoch": 0.2510590228748941, + "grad_norm": 4.480161958489549, + "learning_rate": 1.9849368974148865e-05, + "loss": 1.0608, "step": 1778 }, { - "epoch": 0.373034179073181, - "grad_norm": 5.260347230947926, - "learning_rate": 1.953727350727081e-05, - "loss": 2.1722, + "epoch": 0.25120022592488, + "grad_norm": 5.315526488222281, + "learning_rate": 1.984910526019375e-05, + "loss": 1.1975, "step": 1779 }, { - "epoch": 0.37324386663870834, - "grad_norm": 5.298609573526748, - "learning_rate": 1.9536592574326e-05, - "loss": 2.2996, + "epoch": 0.25134142897486583, + "grad_norm": 3.9086107591683708, + "learning_rate": 1.984884131734968e-05, + "loss": 0.8438, "step": 1780 }, { - "epoch": 0.3734535542042357, - "grad_norm": 5.5121981120352075, - "learning_rate": 1.9535911152614284e-05, - "loss": 2.2951, + "epoch": 0.2514826320248517, + "grad_norm": 6.768880213102031, + "learning_rate": 1.984857714562279e-05, + "loss": 1.4772, "step": 1781 }, { - "epoch": 0.37366324176976307, - "grad_norm": 5.623201849578347, - "learning_rate": 1.953522924217058e-05, - "loss": 2.2044, + "epoch": 0.2516238350748376, + "grad_norm": 4.911678150941068, + "learning_rate": 1.9848312745019224e-05, + "loss": 1.1367, "step": 1782 }, { - "epoch": 0.3738729293352904, - "grad_norm": 5.965575327911224, - "learning_rate": 1.953454684302984e-05, - "loss": 2.3269, + "epoch": 0.2517650381248235, + "grad_norm": 3.7779031035231063, + "learning_rate": 1.9848048115545125e-05, + "loss": 0.9677, "step": 1783 }, { - "epoch": 0.3740826169008178, - "grad_norm": 6.06988319820644, - "learning_rate": 1.9533863955227035e-05, - "loss": 2.4885, + "epoch": 0.25190624117480936, + "grad_norm": 4.058710684059094, + "learning_rate": 1.984778325720664e-05, + "loss": 1.1123, "step": 1784 }, { - "epoch": 0.37429230446634515, - "grad_norm": 5.9272562588300435, - "learning_rate": 1.953318057879717e-05, - "loss": 1.9623, + "epoch": 0.25204744422479525, + "grad_norm": 4.453232888731743, + "learning_rate": 1.984751817000992e-05, + "loss": 1.1083, "step": 1785 }, { - "epoch": 0.3745019920318725, - "grad_norm": 5.209736945066261, - "learning_rate": 1.9532496713775266e-05, - "loss": 2.24, + "epoch": 0.25218864727478113, + "grad_norm": 4.370839913831289, + "learning_rate": 1.9847252853961136e-05, + "loss": 1.0981, "step": 1786 }, { - "epoch": 0.37471167959739987, - "grad_norm": 5.344081708985399, - "learning_rate": 1.953181236019637e-05, - "loss": 2.066, + "epoch": 0.252329850324767, + "grad_norm": 3.9839838840807453, + "learning_rate": 1.9846987309066445e-05, + "loss": 0.9425, "step": 1787 }, { - "epoch": 0.37492136716292723, - "grad_norm": 5.605855067993545, - "learning_rate": 1.953112751809556e-05, - "loss": 2.2658, + "epoch": 0.2524710533747529, + "grad_norm": 4.054469771188677, + "learning_rate": 1.984672153533202e-05, + "loss": 0.9768, "step": 1788 }, { - "epoch": 0.3751310547284546, - "grad_norm": 5.011748979544954, - "learning_rate": 1.9530442187507934e-05, - "loss": 2.4437, + "epoch": 0.2526122564247388, + "grad_norm": 4.272242433499775, + "learning_rate": 1.9846455532764043e-05, + "loss": 1.0561, "step": 1789 }, { - "epoch": 0.37534074229398195, - "grad_norm": 5.775688418011658, - "learning_rate": 1.9529756368468616e-05, - "loss": 2.2131, + "epoch": 0.25275345947472466, + "grad_norm": 3.875389121875868, + "learning_rate": 1.984618930136869e-05, + "loss": 0.9315, "step": 1790 }, { - "epoch": 0.3755504298595093, - "grad_norm": 6.104004423939276, - "learning_rate": 1.9529070061012753e-05, - "loss": 2.5495, + "epoch": 0.25289466252471055, + "grad_norm": 4.951368455212235, + "learning_rate": 1.9845922841152153e-05, + "loss": 1.0357, "step": 1791 }, { - "epoch": 0.3757601174250367, - "grad_norm": 5.25476828898554, - "learning_rate": 1.9528383265175527e-05, - "loss": 2.0111, + "epoch": 0.25303586557469643, + "grad_norm": 3.995807978284194, + "learning_rate": 1.9845656152120617e-05, + "loss": 1.07, "step": 1792 }, { - "epoch": 0.37596980499056404, - "grad_norm": 6.026721085477506, - "learning_rate": 1.952769598099213e-05, - "loss": 2.3937, + "epoch": 0.2531770686246823, + "grad_norm": 5.65951238732216, + "learning_rate": 1.9845389234280285e-05, + "loss": 1.0214, "step": 1793 }, { - "epoch": 0.3761794925560914, - "grad_norm": 6.065106070500315, - "learning_rate": 1.9527008208497787e-05, - "loss": 2.3891, + "epoch": 0.2533182716746682, + "grad_norm": 4.777351695852719, + "learning_rate": 1.984512208763736e-05, + "loss": 1.2566, "step": 1794 }, { - "epoch": 0.37638918012161876, - "grad_norm": 5.493475065879344, - "learning_rate": 1.952631994772775e-05, - "loss": 2.0691, + "epoch": 0.2534594747246541, + "grad_norm": 4.764090788148082, + "learning_rate": 1.984485471219805e-05, + "loss": 1.3729, "step": 1795 }, { - "epoch": 0.3765988676871462, - "grad_norm": 5.413536039386208, - "learning_rate": 1.9525631198717297e-05, - "loss": 2.1441, + "epoch": 0.25360067777463996, + "grad_norm": 4.737575841062879, + "learning_rate": 1.9844587107968567e-05, + "loss": 1.0511, "step": 1796 }, { - "epoch": 0.37680855525267354, - "grad_norm": 5.343933474412332, - "learning_rate": 1.952494196150172e-05, - "loss": 2.0614, + "epoch": 0.2537418808246258, + "grad_norm": 3.626165689130723, + "learning_rate": 1.9844319274955132e-05, + "loss": 0.8752, "step": 1797 }, { - "epoch": 0.3770182428182009, - "grad_norm": 4.921739814336073, - "learning_rate": 1.952425223611635e-05, - "loss": 2.2284, + "epoch": 0.25388308387461167, + "grad_norm": 4.306529757723179, + "learning_rate": 1.9844051213163967e-05, + "loss": 1.096, "step": 1798 }, { - "epoch": 0.37722793038372826, - "grad_norm": 5.585877886367357, - "learning_rate": 1.952356202259653e-05, - "loss": 2.6193, + "epoch": 0.25402428692459755, + "grad_norm": 4.327195513342646, + "learning_rate": 1.9843782922601305e-05, + "loss": 1.2814, "step": 1799 }, { - "epoch": 0.3774376179492556, - "grad_norm": 5.1345446354232225, - "learning_rate": 1.952287132097764e-05, - "loss": 2.1897, + "epoch": 0.25416548997458344, + "grad_norm": 4.125935957404839, + "learning_rate": 1.9843514403273378e-05, + "loss": 1.1041, "step": 1800 }, { - "epoch": 0.377647305514783, - "grad_norm": 5.083000134912092, - "learning_rate": 1.952218013129508e-05, - "loss": 2.3993, + "epoch": 0.2543066930245693, + "grad_norm": 4.984178006536586, + "learning_rate": 1.984324565518643e-05, + "loss": 0.9635, "step": 1801 }, { - "epoch": 0.37785699308031034, - "grad_norm": 5.563620657023212, - "learning_rate": 1.9521488453584273e-05, - "loss": 1.9981, + "epoch": 0.2544478960745552, + "grad_norm": 4.652130141229484, + "learning_rate": 1.98429766783467e-05, + "loss": 1.0501, "step": 1802 }, { - "epoch": 0.3780666806458377, - "grad_norm": 5.418593657875649, - "learning_rate": 1.9520796287880665e-05, - "loss": 2.2082, + "epoch": 0.2545890991245411, + "grad_norm": 4.021118114615804, + "learning_rate": 1.9842707472760443e-05, + "loss": 0.9566, "step": 1803 }, { - "epoch": 0.37827636821136507, - "grad_norm": 4.89514384485245, - "learning_rate": 1.952010363421974e-05, - "loss": 2.3457, + "epoch": 0.25473030217452697, + "grad_norm": 4.0336123105712725, + "learning_rate": 1.984243803843392e-05, + "loss": 1.1905, "step": 1804 }, { - "epoch": 0.3784860557768924, - "grad_norm": 5.911870982962662, - "learning_rate": 1.9519410492636987e-05, - "loss": 2.3468, + "epoch": 0.25487150522451285, + "grad_norm": 4.947408705066826, + "learning_rate": 1.984216837537338e-05, + "loss": 1.1713, "step": 1805 }, { - "epoch": 0.3786957433424198, - "grad_norm": 5.190451477521803, - "learning_rate": 1.951871686316794e-05, - "loss": 2.4207, + "epoch": 0.25501270827449873, + "grad_norm": 4.153105601150481, + "learning_rate": 1.98418984835851e-05, + "loss": 1.1338, "step": 1806 }, { - "epoch": 0.37890543090794715, - "grad_norm": 6.236609353571022, - "learning_rate": 1.9518022745848138e-05, - "loss": 2.2166, + "epoch": 0.2551539113244846, + "grad_norm": 3.889235495695041, + "learning_rate": 1.9841628363075353e-05, + "loss": 0.9695, "step": 1807 }, { - "epoch": 0.3791151184734745, - "grad_norm": 5.566966338171751, - "learning_rate": 1.951732814071317e-05, - "loss": 2.1937, + "epoch": 0.2552951143744705, + "grad_norm": 4.446497531794838, + "learning_rate": 1.9841358013850413e-05, + "loss": 1.1453, "step": 1808 }, { - "epoch": 0.37932480603900187, - "grad_norm": 5.3646977857229015, - "learning_rate": 1.951663304779862e-05, - "loss": 2.3121, + "epoch": 0.2554363174244564, + "grad_norm": 4.399695425177541, + "learning_rate": 1.9841087435916558e-05, + "loss": 0.9461, "step": 1809 }, { - "epoch": 0.37953449360452923, - "grad_norm": 6.041580362498255, - "learning_rate": 1.9515937467140126e-05, - "loss": 2.3487, + "epoch": 0.25557752047444227, + "grad_norm": 4.538241626368104, + "learning_rate": 1.9840816629280087e-05, + "loss": 1.3242, "step": 1810 }, { - "epoch": 0.3797441811700566, - "grad_norm": 5.937631434629704, - "learning_rate": 1.951524139877333e-05, - "loss": 2.0437, + "epoch": 0.25571872352442815, + "grad_norm": 5.591825633558243, + "learning_rate": 1.9840545593947286e-05, + "loss": 1.2403, "step": 1811 }, { - "epoch": 0.37995386873558396, - "grad_norm": 4.702474627762571, - "learning_rate": 1.9514544842733907e-05, - "loss": 2.0293, + "epoch": 0.25585992657441403, + "grad_norm": 4.486984518292431, + "learning_rate": 1.9840274329924452e-05, + "loss": 1.0376, "step": 1812 }, { - "epoch": 0.38016355630111137, - "grad_norm": 6.2191410024144576, - "learning_rate": 1.9513847799057565e-05, - "loss": 2.0789, + "epoch": 0.2560011296243999, + "grad_norm": 4.329310339441637, + "learning_rate": 1.9840002837217894e-05, + "loss": 1.0225, "step": 1813 }, { - "epoch": 0.38037324386663873, - "grad_norm": 6.09845026031638, - "learning_rate": 1.9513150267780018e-05, - "loss": 2.2375, + "epoch": 0.25614233267438574, + "grad_norm": 4.431670697571927, + "learning_rate": 1.983973111583392e-05, + "loss": 0.9871, "step": 1814 }, { - "epoch": 0.3805829314321661, - "grad_norm": 6.151681530295985, - "learning_rate": 1.9512452248937022e-05, - "loss": 2.4172, + "epoch": 0.2562835357243716, + "grad_norm": 4.562471030669177, + "learning_rate": 1.9839459165778842e-05, + "loss": 1.2063, "step": 1815 }, { - "epoch": 0.38079261899769346, - "grad_norm": 6.2437661159773254, - "learning_rate": 1.9511753742564345e-05, - "loss": 2.2121, + "epoch": 0.2564247387743575, + "grad_norm": 4.63735361125598, + "learning_rate": 1.9839186987058986e-05, + "loss": 1.1223, "step": 1816 }, { - "epoch": 0.3810023065632208, - "grad_norm": 5.972548574427595, - "learning_rate": 1.9511054748697797e-05, - "loss": 2.3818, + "epoch": 0.2565659418243434, + "grad_norm": 5.525235587274402, + "learning_rate": 1.983891457968067e-05, + "loss": 1.2139, "step": 1817 }, { - "epoch": 0.3812119941287482, - "grad_norm": 5.6958767391655245, - "learning_rate": 1.9510355267373193e-05, - "loss": 2.0622, + "epoch": 0.2567071448743293, + "grad_norm": 4.2274964265540165, + "learning_rate": 1.9838641943650234e-05, + "loss": 0.9316, "step": 1818 }, { - "epoch": 0.38142168169427554, - "grad_norm": 5.925729691011436, - "learning_rate": 1.950965529862639e-05, - "loss": 2.1346, + "epoch": 0.25684834792431516, + "grad_norm": 4.201389861322536, + "learning_rate": 1.9838369078974003e-05, + "loss": 0.9655, "step": 1819 }, { - "epoch": 0.3816313692598029, - "grad_norm": 6.437704271425044, - "learning_rate": 1.9508954842493257e-05, - "loss": 2.4005, + "epoch": 0.25698955097430104, + "grad_norm": 3.7078024571924892, + "learning_rate": 1.9838095985658324e-05, + "loss": 0.9423, "step": 1820 }, { - "epoch": 0.38184105682533026, - "grad_norm": 5.728483053173975, - "learning_rate": 1.9508253899009697e-05, - "loss": 2.2269, + "epoch": 0.2571307540242869, + "grad_norm": 3.8335519991973657, + "learning_rate": 1.9837822663709544e-05, + "loss": 0.9154, "step": 1821 }, { - "epoch": 0.3820507443908576, - "grad_norm": 5.667349795046705, - "learning_rate": 1.9507552468211632e-05, - "loss": 2.2204, + "epoch": 0.2572719570742728, + "grad_norm": 4.203192669592402, + "learning_rate": 1.9837549113134015e-05, + "loss": 0.9266, "step": 1822 }, { - "epoch": 0.382260431956385, - "grad_norm": 5.244236341061125, - "learning_rate": 1.9506850550135015e-05, - "loss": 1.8756, + "epoch": 0.2574131601242587, + "grad_norm": 4.936588423601883, + "learning_rate": 1.9837275333938093e-05, + "loss": 1.1949, "step": 1823 }, { - "epoch": 0.38247011952191234, - "grad_norm": 5.58873787539878, - "learning_rate": 1.9506148144815818e-05, - "loss": 2.1292, + "epoch": 0.25755436317424457, + "grad_norm": 4.021932857666165, + "learning_rate": 1.983700132612814e-05, + "loss": 1.0484, "step": 1824 }, { - "epoch": 0.3826798070874397, - "grad_norm": 6.786100292525884, - "learning_rate": 1.950544525229004e-05, - "loss": 2.241, + "epoch": 0.25769556622423045, + "grad_norm": 4.924022429296793, + "learning_rate": 1.983672708971052e-05, + "loss": 1.3763, "step": 1825 }, { - "epoch": 0.38288949465296707, - "grad_norm": 6.481006854056999, - "learning_rate": 1.9504741872593707e-05, - "loss": 2.4264, + "epoch": 0.25783676927421634, + "grad_norm": 5.015444273726999, + "learning_rate": 1.9836452624691617e-05, + "loss": 1.1707, "step": 1826 }, { - "epoch": 0.3830991822184944, - "grad_norm": 6.375598964649223, - "learning_rate": 1.950403800576287e-05, - "loss": 2.306, + "epoch": 0.2579779723242022, + "grad_norm": 4.532623168637367, + "learning_rate": 1.98361779310778e-05, + "loss": 1.0912, "step": 1827 }, { - "epoch": 0.3833088697840218, - "grad_norm": 5.141686094663298, - "learning_rate": 1.95033336518336e-05, - "loss": 2.0233, + "epoch": 0.2581191753741881, + "grad_norm": 5.193585695370866, + "learning_rate": 1.9835903008875458e-05, + "loss": 1.2462, "step": 1828 }, { - "epoch": 0.38351855734954915, - "grad_norm": 5.501768906196965, - "learning_rate": 1.9502628810841996e-05, - "loss": 2.2887, + "epoch": 0.258260378424174, + "grad_norm": 3.9134066716890397, + "learning_rate": 1.9835627858090977e-05, + "loss": 1.0365, "step": 1829 }, { - "epoch": 0.3837282449150765, - "grad_norm": 6.9227133018860085, - "learning_rate": 1.9501923482824182e-05, - "loss": 2.4476, + "epoch": 0.25840158147415987, + "grad_norm": 4.120363927549136, + "learning_rate": 1.983535247873075e-05, + "loss": 1.2367, "step": 1830 }, { - "epoch": 0.3839379324806039, - "grad_norm": 7.5199385007583635, - "learning_rate": 1.9501217667816312e-05, - "loss": 2.2419, + "epoch": 0.2585427845241457, + "grad_norm": 5.263593883539839, + "learning_rate": 1.9835076870801183e-05, + "loss": 1.2767, "step": 1831 }, { - "epoch": 0.3841476200461313, - "grad_norm": 5.160971370597283, - "learning_rate": 1.9500511365854558e-05, - "loss": 1.8077, + "epoch": 0.2586839875741316, + "grad_norm": 5.3404783896382915, + "learning_rate": 1.9834801034308674e-05, + "loss": 1.0237, "step": 1832 }, { - "epoch": 0.38435730761165865, - "grad_norm": 6.827245150658511, - "learning_rate": 1.9499804576975117e-05, - "loss": 2.2194, + "epoch": 0.25882519062411746, + "grad_norm": 4.470164343138368, + "learning_rate": 1.9834524969259636e-05, + "loss": 1.0849, "step": 1833 }, { - "epoch": 0.384566995177186, - "grad_norm": 6.14911191175889, - "learning_rate": 1.9499097301214216e-05, - "loss": 2.3399, + "epoch": 0.25896639367410335, + "grad_norm": 4.139023629204524, + "learning_rate": 1.9834248675660484e-05, + "loss": 1.2606, "step": 1834 }, { - "epoch": 0.38477668274271337, - "grad_norm": 6.141183349714347, - "learning_rate": 1.94983895386081e-05, - "loss": 2.3681, + "epoch": 0.25910759672408923, + "grad_norm": 4.0174191362774945, + "learning_rate": 1.983397215351764e-05, + "loss": 0.9803, "step": 1835 }, { - "epoch": 0.38498637030824073, - "grad_norm": 5.960549119418551, - "learning_rate": 1.9497681289193053e-05, - "loss": 2.217, + "epoch": 0.2592487997740751, + "grad_norm": 4.3630589931847314, + "learning_rate": 1.9833695402837536e-05, + "loss": 1.0285, "step": 1836 }, { - "epoch": 0.3851960578737681, - "grad_norm": 6.813486625485713, - "learning_rate": 1.9496972553005365e-05, - "loss": 2.0061, + "epoch": 0.259390002824061, + "grad_norm": 3.8069062518913546, + "learning_rate": 1.9833418423626593e-05, + "loss": 1.0439, "step": 1837 }, { - "epoch": 0.38540574543929546, - "grad_norm": 6.265557676414994, - "learning_rate": 1.9496263330081357e-05, - "loss": 2.1097, + "epoch": 0.2595312058740469, + "grad_norm": 4.464269887314775, + "learning_rate": 1.9833141215891253e-05, + "loss": 1.1284, "step": 1838 }, { - "epoch": 0.3856154330048228, - "grad_norm": 5.133315055230063, - "learning_rate": 1.9495553620457385e-05, - "loss": 2.0452, + "epoch": 0.25967240892403276, + "grad_norm": 4.614950166656232, + "learning_rate": 1.9832863779637958e-05, + "loss": 1.2166, "step": 1839 }, { - "epoch": 0.3858251205703502, - "grad_norm": 6.173405561313836, - "learning_rate": 1.9494843424169823e-05, - "loss": 2.3421, + "epoch": 0.25981361197401864, + "grad_norm": 4.548326789240738, + "learning_rate": 1.9832586114873154e-05, + "loss": 0.955, "step": 1840 }, { - "epoch": 0.38603480813587754, - "grad_norm": 5.4848310846079755, - "learning_rate": 1.9494132741255067e-05, - "loss": 2.2359, + "epoch": 0.2599548150240045, + "grad_norm": 3.997095965911288, + "learning_rate": 1.9832308221603296e-05, + "loss": 1.1058, "step": 1841 }, { - "epoch": 0.3862444957014049, - "grad_norm": 6.331471632008918, - "learning_rate": 1.9493421571749543e-05, - "loss": 2.5103, + "epoch": 0.2600960180739904, + "grad_norm": 4.660185467602098, + "learning_rate": 1.983203009983484e-05, + "loss": 1.1507, "step": 1842 }, { - "epoch": 0.38645418326693226, - "grad_norm": 5.174664578086117, - "learning_rate": 1.9492709915689693e-05, - "loss": 2.023, + "epoch": 0.2602372211239763, + "grad_norm": 4.079378078114112, + "learning_rate": 1.9831751749574256e-05, + "loss": 0.9784, "step": 1843 }, { - "epoch": 0.3866638708324596, - "grad_norm": 5.5631940789043295, - "learning_rate": 1.9491997773111997e-05, - "loss": 2.2882, + "epoch": 0.2603784241739622, + "grad_norm": 4.564282973872989, + "learning_rate": 1.9831473170828003e-05, + "loss": 1.1426, "step": 1844 }, { - "epoch": 0.386873558397987, - "grad_norm": 5.804478665474135, - "learning_rate": 1.9491285144052955e-05, - "loss": 2.2521, + "epoch": 0.26051962722394806, + "grad_norm": 4.942052127544571, + "learning_rate": 1.9831194363602558e-05, + "loss": 1.114, "step": 1845 }, { - "epoch": 0.38708324596351434, - "grad_norm": 5.513779357743572, - "learning_rate": 1.9490572028549086e-05, - "loss": 2.2151, + "epoch": 0.26066083027393394, + "grad_norm": 4.055289426871556, + "learning_rate": 1.9830915327904402e-05, + "loss": 1.0268, "step": 1846 }, { - "epoch": 0.3872929335290417, - "grad_norm": 5.770079966300676, - "learning_rate": 1.948985842663694e-05, - "loss": 2.1534, + "epoch": 0.2608020333239198, + "grad_norm": 4.429536312295024, + "learning_rate": 1.9830636063740023e-05, + "loss": 1.207, "step": 1847 }, { - "epoch": 0.38750262109456907, - "grad_norm": 5.732785030799801, - "learning_rate": 1.948914433835309e-05, - "loss": 2.1023, + "epoch": 0.26094323637390565, + "grad_norm": 4.200766234184153, + "learning_rate": 1.9830356571115904e-05, + "loss": 1.2327, "step": 1848 }, { - "epoch": 0.3877123086600965, - "grad_norm": 6.392519378479588, - "learning_rate": 1.9488429763734132e-05, - "loss": 2.0394, + "epoch": 0.26108443942389153, + "grad_norm": 3.9245031102356123, + "learning_rate": 1.9830076850038545e-05, + "loss": 0.9127, "step": 1849 }, { - "epoch": 0.38792199622562384, - "grad_norm": 4.798667761130312, - "learning_rate": 1.9487714702816696e-05, - "loss": 2.3865, + "epoch": 0.2612256424738774, + "grad_norm": 3.867561791187962, + "learning_rate": 1.9829796900514445e-05, + "loss": 1.2064, "step": 1850 }, { - "epoch": 0.3881316837911512, - "grad_norm": 5.429639162390453, - "learning_rate": 1.9486999155637424e-05, - "loss": 2.0597, + "epoch": 0.2613668455238633, + "grad_norm": 3.668051280232123, + "learning_rate": 1.9829516722550113e-05, + "loss": 0.8755, "step": 1851 }, { - "epoch": 0.38834137135667857, - "grad_norm": 5.392244512945511, - "learning_rate": 1.948628312223299e-05, - "loss": 2.2082, + "epoch": 0.2615080485738492, + "grad_norm": 5.0322788153662295, + "learning_rate": 1.982923631615205e-05, + "loss": 1.2376, "step": 1852 }, { - "epoch": 0.3885510589222059, - "grad_norm": 6.755034231590265, - "learning_rate": 1.9485566602640095e-05, - "loss": 2.4778, + "epoch": 0.26164925162383507, + "grad_norm": 3.8740407222660154, + "learning_rate": 1.982895568132679e-05, + "loss": 0.9204, "step": 1853 }, { - "epoch": 0.3887607464877333, - "grad_norm": 4.953659009463895, - "learning_rate": 1.948484959689546e-05, - "loss": 2.0787, + "epoch": 0.26179045467382095, + "grad_norm": 4.558662679720923, + "learning_rate": 1.9828674818080837e-05, + "loss": 1.1142, "step": 1854 }, { - "epoch": 0.38897043405326065, - "grad_norm": 5.807587794949061, - "learning_rate": 1.9484132105035832e-05, - "loss": 2.0223, + "epoch": 0.26193165772380683, + "grad_norm": 4.532216931048915, + "learning_rate": 1.982839372642073e-05, + "loss": 1.0541, "step": 1855 }, { - "epoch": 0.389180121618788, - "grad_norm": 5.350079350058516, - "learning_rate": 1.948341412709798e-05, - "loss": 2.5058, + "epoch": 0.2620728607737927, + "grad_norm": 3.4928341904820592, + "learning_rate": 1.9828112406352994e-05, + "loss": 0.9201, "step": 1856 }, { - "epoch": 0.38938980918431537, - "grad_norm": 5.23715133920839, - "learning_rate": 1.948269566311871e-05, - "loss": 1.9987, + "epoch": 0.2622140638237786, + "grad_norm": 4.182121407039527, + "learning_rate": 1.9827830857884173e-05, + "loss": 1.2461, "step": 1857 }, { - "epoch": 0.38959949674984273, - "grad_norm": 5.593821027473954, - "learning_rate": 1.948197671313484e-05, - "loss": 2.3818, + "epoch": 0.2623552668737645, + "grad_norm": 3.830371471883557, + "learning_rate": 1.9827549081020806e-05, + "loss": 0.9488, "step": 1858 }, { - "epoch": 0.3898091843153701, - "grad_norm": 5.119605514207335, - "learning_rate": 1.948125727718322e-05, - "loss": 2.3033, + "epoch": 0.26249646992375036, + "grad_norm": 3.6586193188431904, + "learning_rate": 1.9827267075769444e-05, + "loss": 0.8693, "step": 1859 }, { - "epoch": 0.39001887188089746, - "grad_norm": 5.620300100417322, - "learning_rate": 1.9480537355300715e-05, - "loss": 2.3298, + "epoch": 0.26263767297373625, + "grad_norm": 4.098906291559029, + "learning_rate": 1.9826984842136637e-05, + "loss": 1.0552, "step": 1860 }, { - "epoch": 0.3902285594464248, - "grad_norm": 7.210124954699667, - "learning_rate": 1.947981694752423e-05, - "loss": 2.4181, + "epoch": 0.26277887602372213, + "grad_norm": 4.1658586058030656, + "learning_rate": 1.9826702380128946e-05, + "loss": 1.0166, "step": 1861 }, { - "epoch": 0.3904382470119522, - "grad_norm": 6.222535625493142, - "learning_rate": 1.9479096053890686e-05, - "loss": 2.4226, + "epoch": 0.262920079073708, + "grad_norm": 4.4573469226027935, + "learning_rate": 1.9826419689752935e-05, + "loss": 0.9906, "step": 1862 }, { - "epoch": 0.39064793457747954, - "grad_norm": 5.411448404953495, - "learning_rate": 1.9478374674437027e-05, - "loss": 2.3347, + "epoch": 0.2630612821236939, + "grad_norm": 4.0536958678435004, + "learning_rate": 1.9826136771015177e-05, + "loss": 0.9548, "step": 1863 }, { - "epoch": 0.3908576221430069, - "grad_norm": 5.912691002972943, - "learning_rate": 1.9477652809200224e-05, - "loss": 2.2209, + "epoch": 0.2632024851736798, + "grad_norm": 4.328741615896123, + "learning_rate": 1.9825853623922244e-05, + "loss": 1.3807, "step": 1864 }, { - "epoch": 0.39106730970853426, - "grad_norm": 5.801364757590116, - "learning_rate": 1.947693045821728e-05, - "loss": 2.1197, + "epoch": 0.2633436882236656, + "grad_norm": 4.965706208648264, + "learning_rate": 1.9825570248480713e-05, + "loss": 1.2048, "step": 1865 }, { - "epoch": 0.3912769972740616, - "grad_norm": 6.185036871439721, - "learning_rate": 1.9476207621525215e-05, - "loss": 2.4563, + "epoch": 0.2634848912736515, + "grad_norm": 4.404130566579816, + "learning_rate": 1.9825286644697176e-05, + "loss": 0.958, "step": 1866 }, { - "epoch": 0.39148668483958904, - "grad_norm": 5.63604263565064, - "learning_rate": 1.9475484299161066e-05, - "loss": 2.2743, + "epoch": 0.26362609432363737, + "grad_norm": 3.9337423491594024, + "learning_rate": 1.982500281257822e-05, + "loss": 0.903, "step": 1867 }, { - "epoch": 0.3916963724051164, - "grad_norm": 5.459923500836046, - "learning_rate": 1.947476049116192e-05, - "loss": 2.0538, + "epoch": 0.26376729737362326, + "grad_norm": 5.5165073415084, + "learning_rate": 1.982471875213044e-05, + "loss": 1.1091, "step": 1868 }, { - "epoch": 0.39190605997064376, - "grad_norm": 5.662428623856443, - "learning_rate": 1.9474036197564864e-05, - "loss": 2.0575, + "epoch": 0.26390850042360914, + "grad_norm": 5.458028776760574, + "learning_rate": 1.9824434463360442e-05, + "loss": 1.2246, "step": 1869 }, { - "epoch": 0.3921157475361711, - "grad_norm": 5.28329185545302, - "learning_rate": 1.9473311418407017e-05, - "loss": 1.9175, + "epoch": 0.264049703473595, + "grad_norm": 3.644002562072783, + "learning_rate": 1.9824149946274827e-05, + "loss": 0.9188, "step": 1870 }, { - "epoch": 0.3923254351016985, - "grad_norm": 5.269757960336864, - "learning_rate": 1.9472586153725532e-05, - "loss": 2.4371, + "epoch": 0.2641909065235809, + "grad_norm": 5.070646160407135, + "learning_rate": 1.9823865200880212e-05, + "loss": 1.2341, "step": 1871 }, { - "epoch": 0.39253512266722584, - "grad_norm": 5.325441176060469, - "learning_rate": 1.947186040355758e-05, - "loss": 2.099, + "epoch": 0.2643321095735668, + "grad_norm": 4.998665095140983, + "learning_rate": 1.982358022718321e-05, + "loss": 1.2827, "step": 1872 }, { - "epoch": 0.3927448102327532, - "grad_norm": 6.168658986315181, - "learning_rate": 1.947113416794035e-05, - "loss": 2.0369, + "epoch": 0.26447331262355267, + "grad_norm": 5.41471968244836, + "learning_rate": 1.9823295025190448e-05, + "loss": 1.1906, "step": 1873 }, { - "epoch": 0.39295449779828057, - "grad_norm": 5.144962237885244, - "learning_rate": 1.9470407446911068e-05, - "loss": 2.1908, + "epoch": 0.26461451567353855, + "grad_norm": 4.767947283912256, + "learning_rate": 1.9823009594908553e-05, + "loss": 1.0733, "step": 1874 }, { - "epoch": 0.3931641853638079, - "grad_norm": 6.138871633142044, - "learning_rate": 1.9469680240506982e-05, - "loss": 2.5676, + "epoch": 0.26475571872352444, + "grad_norm": 4.690024521267456, + "learning_rate": 1.9822723936344154e-05, + "loss": 1.0014, "step": 1875 }, { - "epoch": 0.3933738729293353, - "grad_norm": 5.658040325645907, - "learning_rate": 1.9468952548765353e-05, - "loss": 2.002, + "epoch": 0.2648969217735103, + "grad_norm": 4.456701659906732, + "learning_rate": 1.9822438049503894e-05, + "loss": 1.2019, "step": 1876 }, { - "epoch": 0.39358356049486265, - "grad_norm": 5.370314318369707, - "learning_rate": 1.946822437172349e-05, - "loss": 2.0515, + "epoch": 0.2650381248234962, + "grad_norm": 4.495809605495094, + "learning_rate": 1.9822151934394415e-05, + "loss": 1.1006, "step": 1877 }, { - "epoch": 0.39379324806039, - "grad_norm": 5.945691469761393, - "learning_rate": 1.94674957094187e-05, - "loss": 2.252, + "epoch": 0.2651793278734821, + "grad_norm": 4.340686436182212, + "learning_rate": 1.982186559102237e-05, + "loss": 1.0079, "step": 1878 }, { - "epoch": 0.39400293562591737, - "grad_norm": 5.470246503296191, - "learning_rate": 1.9466766561888337e-05, - "loss": 1.7592, + "epoch": 0.26532053092346797, + "grad_norm": 4.027422854136589, + "learning_rate": 1.982157901939441e-05, + "loss": 0.8951, "step": 1879 }, { - "epoch": 0.39421262319144473, - "grad_norm": 5.840271399222884, - "learning_rate": 1.946603692916977e-05, - "loss": 2.4053, + "epoch": 0.26546173397345385, + "grad_norm": 4.113046938285744, + "learning_rate": 1.982129221951719e-05, + "loss": 1.1811, "step": 1880 }, { - "epoch": 0.3944223107569721, - "grad_norm": 5.852093743622925, - "learning_rate": 1.946530681130039e-05, - "loss": 2.0543, + "epoch": 0.26560293702343973, + "grad_norm": 4.074988999428498, + "learning_rate": 1.9821005191397387e-05, + "loss": 1.007, "step": 1881 }, { - "epoch": 0.39463199832249946, - "grad_norm": 6.135305852856054, - "learning_rate": 1.9464576208317617e-05, - "loss": 2.2118, + "epoch": 0.26574414007342556, + "grad_norm": 4.806825603790409, + "learning_rate": 1.982071793504166e-05, + "loss": 1.2616, "step": 1882 }, { - "epoch": 0.3948416858880268, - "grad_norm": 6.140807251464246, - "learning_rate": 1.9463845120258903e-05, - "loss": 2.3049, + "epoch": 0.26588534312341144, + "grad_norm": 4.828424122877705, + "learning_rate": 1.982043045045669e-05, + "loss": 1.1724, "step": 1883 }, { - "epoch": 0.3950513734535542, - "grad_norm": 5.422938242095375, - "learning_rate": 1.946311354716171e-05, - "loss": 1.88, + "epoch": 0.2660265461733973, + "grad_norm": 4.317464637359753, + "learning_rate": 1.982014273764916e-05, + "loss": 0.9919, "step": 1884 }, { - "epoch": 0.3952610610190816, - "grad_norm": 5.251847127764769, - "learning_rate": 1.9462381489063532e-05, - "loss": 2.0867, + "epoch": 0.2661677492233832, + "grad_norm": 3.8615390443583806, + "learning_rate": 1.9819854796625756e-05, + "loss": 1.0182, "step": 1885 }, { - "epoch": 0.39547074858460896, - "grad_norm": 6.303060814259278, - "learning_rate": 1.9461648946001898e-05, - "loss": 2.3058, + "epoch": 0.2663089522733691, + "grad_norm": 4.7700047863813175, + "learning_rate": 1.981956662739316e-05, + "loss": 1.2221, "step": 1886 }, { - "epoch": 0.3956804361501363, - "grad_norm": 6.013341286899624, - "learning_rate": 1.946091591801434e-05, - "loss": 2.3494, + "epoch": 0.266450155323355, + "grad_norm": 4.166544717120669, + "learning_rate": 1.981927822995808e-05, + "loss": 1.4011, "step": 1887 }, { - "epoch": 0.3958901237156637, - "grad_norm": 6.4192967776392615, - "learning_rate": 1.946018240513843e-05, - "loss": 2.3006, + "epoch": 0.26659135837334086, + "grad_norm": 4.532693743281657, + "learning_rate": 1.9818989604327218e-05, + "loss": 1.1463, "step": 1888 }, { - "epoch": 0.39609981128119104, - "grad_norm": 6.281004130476324, - "learning_rate": 1.945944840741177e-05, - "loss": 2.0074, + "epoch": 0.26673256142332674, + "grad_norm": 5.886451921880403, + "learning_rate": 1.9818700750507275e-05, + "loss": 1.2252, "step": 1889 }, { - "epoch": 0.3963094988467184, - "grad_norm": 5.909475354444969, - "learning_rate": 1.945871392487197e-05, - "loss": 2.3995, + "epoch": 0.2668737644733126, + "grad_norm": 4.411944526600107, + "learning_rate": 1.9818411668504965e-05, + "loss": 1.1067, "step": 1890 }, { - "epoch": 0.39651918641224576, - "grad_norm": 5.748361824524642, - "learning_rate": 1.9457978957556675e-05, - "loss": 1.9936, + "epoch": 0.2670149675232985, + "grad_norm": 3.5556559459789727, + "learning_rate": 1.981812235832701e-05, + "loss": 0.841, "step": 1891 }, { - "epoch": 0.3967288739777731, - "grad_norm": 5.589512218717744, - "learning_rate": 1.9457243505503552e-05, - "loss": 2.1116, + "epoch": 0.2671561705732844, + "grad_norm": 3.98631227097006, + "learning_rate": 1.981783281998013e-05, + "loss": 1.0951, "step": 1892 }, { - "epoch": 0.3969385615433005, - "grad_norm": 4.813067779704906, - "learning_rate": 1.9456507568750303e-05, - "loss": 2.0623, + "epoch": 0.2672973736232703, + "grad_norm": 4.297593422012009, + "learning_rate": 1.9817543053471058e-05, + "loss": 1.2269, "step": 1893 }, { - "epoch": 0.39714824910882784, - "grad_norm": 5.8312918681549775, - "learning_rate": 1.9455771147334634e-05, - "loss": 2.3951, + "epoch": 0.26743857667325616, + "grad_norm": 4.330913247914617, + "learning_rate": 1.9817253058806525e-05, + "loss": 0.9833, "step": 1894 }, { - "epoch": 0.3973579366743552, - "grad_norm": 5.630188593868802, - "learning_rate": 1.9455034241294298e-05, - "loss": 1.9422, + "epoch": 0.26757977972324204, + "grad_norm": 4.353124030317538, + "learning_rate": 1.9816962835993268e-05, + "loss": 1.0562, "step": 1895 }, { - "epoch": 0.39756762423988257, - "grad_norm": 5.726957346586343, - "learning_rate": 1.9454296850667058e-05, - "loss": 2.1655, + "epoch": 0.2677209827732279, + "grad_norm": 5.140174186532284, + "learning_rate": 1.9816672385038033e-05, + "loss": 1.1521, "step": 1896 }, { - "epoch": 0.3977773118054099, - "grad_norm": 5.710283570253907, - "learning_rate": 1.94535589754907e-05, - "loss": 2.0759, + "epoch": 0.2678621858232138, + "grad_norm": 4.84021068206007, + "learning_rate": 1.9816381705947575e-05, + "loss": 1.066, "step": 1897 }, { - "epoch": 0.3979869993709373, - "grad_norm": 6.018976066968295, - "learning_rate": 1.9452820615803055e-05, - "loss": 2.5303, + "epoch": 0.2680033888731997, + "grad_norm": 4.655159459646432, + "learning_rate": 1.9816090798728648e-05, + "loss": 1.0596, "step": 1898 }, { - "epoch": 0.39819668693646465, - "grad_norm": 6.404827914917179, - "learning_rate": 1.9452081771641954e-05, - "loss": 2.1774, + "epoch": 0.2681445919231855, + "grad_norm": 3.855920025729369, + "learning_rate": 1.9815799663388003e-05, + "loss": 0.961, "step": 1899 }, { - "epoch": 0.398406374501992, - "grad_norm": 5.989486646424691, - "learning_rate": 1.9451342443045275e-05, - "loss": 2.1306, + "epoch": 0.2682857949731714, + "grad_norm": 4.596026299347673, + "learning_rate": 1.9815508299932417e-05, + "loss": 1.038, "step": 1900 }, { - "epoch": 0.39861606206751937, - "grad_norm": 6.162279013491712, - "learning_rate": 1.9450602630050898e-05, - "loss": 2.0919, + "epoch": 0.2684269980231573, + "grad_norm": 4.310839975237934, + "learning_rate": 1.981521670836866e-05, + "loss": 1.0237, "step": 1901 }, { - "epoch": 0.39882574963304673, - "grad_norm": 5.882485437451917, - "learning_rate": 1.9449862332696745e-05, - "loss": 2.4434, + "epoch": 0.26856820107314316, + "grad_norm": 5.173806071000135, + "learning_rate": 1.98149248887035e-05, + "loss": 1.1202, "step": 1902 }, { - "epoch": 0.39903543719857415, - "grad_norm": 5.618885858075298, - "learning_rate": 1.9449121551020757e-05, - "loss": 2.0226, + "epoch": 0.26870940412312905, + "grad_norm": 4.97568068247098, + "learning_rate": 1.9814632840943728e-05, + "loss": 1.328, "step": 1903 }, { - "epoch": 0.3992451247641015, - "grad_norm": 8.12403816550133, - "learning_rate": 1.9448380285060902e-05, - "loss": 2.2967, + "epoch": 0.26885060717311493, + "grad_norm": 5.68666850582161, + "learning_rate": 1.9814340565096124e-05, + "loss": 1.2292, "step": 1904 }, { - "epoch": 0.39945481232962887, - "grad_norm": 6.0100040213393235, - "learning_rate": 1.944763853485517e-05, - "loss": 1.907, + "epoch": 0.2689918102231008, + "grad_norm": 4.3267050761984756, + "learning_rate": 1.9814048061167486e-05, + "loss": 0.9448, "step": 1905 }, { - "epoch": 0.39966449989515623, - "grad_norm": 5.430596194283071, - "learning_rate": 1.9446896300441578e-05, - "loss": 1.9985, + "epoch": 0.2691330132730867, + "grad_norm": 4.969852177416566, + "learning_rate": 1.981375532916461e-05, + "loss": 1.183, "step": 1906 }, { - "epoch": 0.3998741874606836, - "grad_norm": 5.409521757371335, - "learning_rate": 1.9446153581858165e-05, - "loss": 1.8089, + "epoch": 0.2692742163230726, + "grad_norm": 5.102118002696035, + "learning_rate": 1.9813462369094297e-05, + "loss": 1.3245, "step": 1907 }, { - "epoch": 0.40008387502621096, - "grad_norm": 6.467306966086017, - "learning_rate": 1.9445410379142995e-05, - "loss": 2.3695, + "epoch": 0.26941541937305846, + "grad_norm": 4.3326818741031214, + "learning_rate": 1.981316918096336e-05, + "loss": 1.125, "step": 1908 }, { - "epoch": 0.4002935625917383, - "grad_norm": 5.908066545279196, - "learning_rate": 1.9444666692334164e-05, - "loss": 2.1107, + "epoch": 0.26955662242304435, + "grad_norm": 4.1085574487523875, + "learning_rate": 1.9812875764778604e-05, + "loss": 1.0606, "step": 1909 }, { - "epoch": 0.4005032501572657, - "grad_norm": 5.3699400708350975, - "learning_rate": 1.944392252146978e-05, - "loss": 2.2557, + "epoch": 0.26969782547303023, + "grad_norm": 4.072834574455018, + "learning_rate": 1.9812582120546854e-05, + "loss": 0.9583, "step": 1910 }, { - "epoch": 0.40071293772279304, - "grad_norm": 5.9692236072071285, - "learning_rate": 1.9443177866587995e-05, - "loss": 2.2331, + "epoch": 0.2698390285230161, + "grad_norm": 4.279078397477056, + "learning_rate": 1.981228824827494e-05, + "loss": 0.8896, "step": 1911 }, { - "epoch": 0.4009226252883204, - "grad_norm": 5.663327693465919, - "learning_rate": 1.944243272772696e-05, - "loss": 2.131, + "epoch": 0.269980231573002, + "grad_norm": 3.5694853898929417, + "learning_rate": 1.9811994147969676e-05, + "loss": 1.0055, "step": 1912 }, { - "epoch": 0.40113231285384776, - "grad_norm": 5.742374662670714, - "learning_rate": 1.944168710492487e-05, - "loss": 2.1401, + "epoch": 0.2701214346229879, + "grad_norm": 4.110524682268194, + "learning_rate": 1.981169981963791e-05, + "loss": 1.0007, "step": 1913 }, { - "epoch": 0.4013420004193751, - "grad_norm": 5.540911240883638, - "learning_rate": 1.9440940998219944e-05, - "loss": 2.3733, + "epoch": 0.27026263767297376, + "grad_norm": 4.0376177016082435, + "learning_rate": 1.9811405263286475e-05, + "loss": 1.0296, "step": 1914 }, { - "epoch": 0.4015516879849025, - "grad_norm": 5.787970281732358, - "learning_rate": 1.9440194407650416e-05, - "loss": 1.8939, + "epoch": 0.27040384072295964, + "grad_norm": 4.411827189006288, + "learning_rate": 1.981111047892222e-05, + "loss": 1.2874, "step": 1915 }, { - "epoch": 0.40176137555042984, - "grad_norm": 6.5950426253030985, - "learning_rate": 1.943944733325455e-05, - "loss": 2.239, + "epoch": 0.27054504377294547, + "grad_norm": 4.058164664932261, + "learning_rate": 1.9810815466551996e-05, + "loss": 1.0238, "step": 1916 }, { - "epoch": 0.4019710631159572, - "grad_norm": 5.41092777500551, - "learning_rate": 1.943869977507064e-05, - "loss": 2.094, + "epoch": 0.27068624682293135, + "grad_norm": 3.59293615374635, + "learning_rate": 1.9810520226182657e-05, + "loss": 0.8512, "step": 1917 }, { - "epoch": 0.40218075068148457, - "grad_norm": 5.275996145545977, - "learning_rate": 1.943795173313699e-05, - "loss": 2.0839, + "epoch": 0.27082744987291724, + "grad_norm": 4.279823982298696, + "learning_rate": 1.9810224757821063e-05, + "loss": 1.119, "step": 1918 }, { - "epoch": 0.40239043824701193, - "grad_norm": 5.845261694296803, - "learning_rate": 1.9437203207491948e-05, - "loss": 2.2976, + "epoch": 0.2709686529229031, + "grad_norm": 4.691157088386239, + "learning_rate": 1.9809929061474084e-05, + "loss": 1.2303, "step": 1919 }, { - "epoch": 0.4026001258125393, - "grad_norm": 5.623578486698355, - "learning_rate": 1.943645419817387e-05, - "loss": 1.96, + "epoch": 0.271109855972889, + "grad_norm": 4.905502708349846, + "learning_rate": 1.980963313714859e-05, + "loss": 1.1388, "step": 1920 }, { - "epoch": 0.4028098133780667, - "grad_norm": 6.04765277627018, - "learning_rate": 1.9435704705221155e-05, - "loss": 2.4378, + "epoch": 0.2712510590228749, + "grad_norm": 4.264705545238572, + "learning_rate": 1.980933698485146e-05, + "loss": 0.949, "step": 1921 }, { - "epoch": 0.40301950094359407, - "grad_norm": 5.141463983765473, - "learning_rate": 1.9434954728672202e-05, - "loss": 2.0341, + "epoch": 0.27139226207286077, + "grad_norm": 3.955521900346505, + "learning_rate": 1.9809040604589572e-05, + "loss": 0.9812, "step": 1922 }, { - "epoch": 0.40322918850912143, - "grad_norm": 5.801059022026269, - "learning_rate": 1.943420426856546e-05, - "loss": 2.4334, + "epoch": 0.27153346512284665, + "grad_norm": 4.301451343495329, + "learning_rate": 1.9808743996369816e-05, + "loss": 1.0911, "step": 1923 }, { - "epoch": 0.4034388760746488, - "grad_norm": 6.088325178561093, - "learning_rate": 1.9433453324939383e-05, - "loss": 2.3452, + "epoch": 0.27167466817283253, + "grad_norm": 4.333781540715173, + "learning_rate": 1.9808447160199087e-05, + "loss": 1.0575, "step": 1924 }, { - "epoch": 0.40364856364017615, - "grad_norm": 5.282094073983423, - "learning_rate": 1.943270189783246e-05, - "loss": 2.15, + "epoch": 0.2718158712228184, + "grad_norm": 4.434276229226678, + "learning_rate": 1.980815009608428e-05, + "loss": 1.1284, "step": 1925 }, { - "epoch": 0.4038582512057035, - "grad_norm": 6.54386698151231, - "learning_rate": 1.9431949987283206e-05, - "loss": 2.239, + "epoch": 0.2719570742728043, + "grad_norm": 4.266299412214705, + "learning_rate": 1.9807852804032306e-05, + "loss": 0.9993, "step": 1926 }, { - "epoch": 0.4040679387712309, - "grad_norm": 5.713375697124021, - "learning_rate": 1.943119759333016e-05, - "loss": 2.2938, + "epoch": 0.2720982773227902, + "grad_norm": 4.864722831595278, + "learning_rate": 1.9807555284050063e-05, + "loss": 1.2809, "step": 1927 }, { - "epoch": 0.40427762633675823, - "grad_norm": 5.8333048924465185, - "learning_rate": 1.9430444716011875e-05, - "loss": 1.942, + "epoch": 0.27223948037277607, + "grad_norm": 5.0037588339866605, + "learning_rate": 1.9807257536144474e-05, + "loss": 1.0746, "step": 1928 }, { - "epoch": 0.4044873139022856, - "grad_norm": 6.517545971492462, - "learning_rate": 1.9429691355366944e-05, - "loss": 2.2491, + "epoch": 0.27238068342276195, + "grad_norm": 4.687973094368147, + "learning_rate": 1.9806959560322455e-05, + "loss": 1.2387, "step": 1929 }, { - "epoch": 0.40469700146781296, - "grad_norm": 5.437227110157038, - "learning_rate": 1.9428937511433975e-05, - "loss": 2.3364, + "epoch": 0.27252188647274783, + "grad_norm": 4.050124522510865, + "learning_rate": 1.980666135659093e-05, + "loss": 1.1225, "step": 1930 }, { - "epoch": 0.4049066890333403, - "grad_norm": 6.310440477579955, - "learning_rate": 1.9428183184251606e-05, - "loss": 1.8814, + "epoch": 0.2726630895227337, + "grad_norm": 5.044334944078193, + "learning_rate": 1.980636292495683e-05, + "loss": 1.2669, "step": 1931 }, { - "epoch": 0.4051163765988677, - "grad_norm": 6.330108793455676, - "learning_rate": 1.9427428373858496e-05, - "loss": 2.7728, + "epoch": 0.2728042925727196, + "grad_norm": 3.820292265866119, + "learning_rate": 1.9806064265427093e-05, + "loss": 1.1048, "step": 1932 }, { - "epoch": 0.40532606416439504, - "grad_norm": 5.700414575768137, - "learning_rate": 1.942667308029333e-05, - "loss": 2.0245, + "epoch": 0.2729454956227054, + "grad_norm": 4.45970707568549, + "learning_rate": 1.9805765378008653e-05, + "loss": 1.2018, "step": 1933 }, { - "epoch": 0.4055357517299224, - "grad_norm": 5.972185828031188, - "learning_rate": 1.9425917303594823e-05, - "loss": 2.2138, + "epoch": 0.2730866986726913, + "grad_norm": 4.393577316139637, + "learning_rate": 1.9805466262708464e-05, + "loss": 1.0597, "step": 1934 }, { - "epoch": 0.40574543929544976, - "grad_norm": 5.827876684316709, - "learning_rate": 1.94251610438017e-05, - "loss": 2.3002, + "epoch": 0.2732279017226772, + "grad_norm": 4.575523466843277, + "learning_rate": 1.9805166919533474e-05, + "loss": 1.0458, "step": 1935 }, { - "epoch": 0.4059551268609771, - "grad_norm": 6.242194809509504, - "learning_rate": 1.942440430095273e-05, - "loss": 2.3103, + "epoch": 0.2733691047726631, + "grad_norm": 4.618107131171763, + "learning_rate": 1.980486734849064e-05, + "loss": 1.2403, "step": 1936 }, { - "epoch": 0.4061648144265045, - "grad_norm": 6.014832790031313, - "learning_rate": 1.9423647075086693e-05, - "loss": 2.0443, + "epoch": 0.27351030782264896, + "grad_norm": 4.6686069169116, + "learning_rate": 1.980456754958692e-05, + "loss": 1.3606, "step": 1937 }, { - "epoch": 0.4063745019920319, - "grad_norm": 6.731533423516322, - "learning_rate": 1.94228893662424e-05, - "loss": 2.4112, + "epoch": 0.27365151087263484, + "grad_norm": 4.13624548917745, + "learning_rate": 1.9804267522829287e-05, + "loss": 1.0938, "step": 1938 }, { - "epoch": 0.40658418955755926, - "grad_norm": 5.100344149858468, - "learning_rate": 1.942213117445868e-05, - "loss": 1.9673, + "epoch": 0.2737927139226207, + "grad_norm": 4.194531072087034, + "learning_rate": 1.9803967268224708e-05, + "loss": 0.9957, "step": 1939 }, { - "epoch": 0.4067938771230866, - "grad_norm": 5.920650543366484, - "learning_rate": 1.94213724997744e-05, - "loss": 2.2348, + "epoch": 0.2739339169726066, + "grad_norm": 4.122994480672871, + "learning_rate": 1.9803666785780165e-05, + "loss": 0.9382, "step": 1940 }, { - "epoch": 0.407003564688614, - "grad_norm": 5.745769287027729, - "learning_rate": 1.9420613342228437e-05, - "loss": 2.0051, + "epoch": 0.2740751200225925, + "grad_norm": 5.075566278725887, + "learning_rate": 1.9803366075502638e-05, + "loss": 0.9363, "step": 1941 }, { - "epoch": 0.40721325225414134, - "grad_norm": 5.25576506290497, - "learning_rate": 1.9419853701859698e-05, - "loss": 2.1942, + "epoch": 0.27421632307257837, + "grad_norm": 3.7067382502660537, + "learning_rate": 1.9803065137399123e-05, + "loss": 0.9876, "step": 1942 }, { - "epoch": 0.4074229398196687, - "grad_norm": 5.602093551048213, - "learning_rate": 1.9419093578707125e-05, - "loss": 2.1569, + "epoch": 0.27435752612256425, + "grad_norm": 4.09596158947342, + "learning_rate": 1.98027639714766e-05, + "loss": 1.2029, "step": 1943 }, { - "epoch": 0.40763262738519607, - "grad_norm": 6.038675721324789, - "learning_rate": 1.9418332972809667e-05, - "loss": 2.1447, + "epoch": 0.27449872917255014, + "grad_norm": 4.766167113287923, + "learning_rate": 1.980246257774208e-05, + "loss": 1.0014, "step": 1944 }, { - "epoch": 0.40784231495072343, - "grad_norm": 5.568682957563024, - "learning_rate": 1.941757188420631e-05, - "loss": 2.0811, + "epoch": 0.274639932222536, + "grad_norm": 4.920310314345624, + "learning_rate": 1.9802160956202564e-05, + "loss": 1.0501, "step": 1945 }, { - "epoch": 0.4080520025162508, - "grad_norm": 6.560601566957782, - "learning_rate": 1.9416810312936056e-05, - "loss": 2.1096, + "epoch": 0.2747811352725219, + "grad_norm": 4.173753343779286, + "learning_rate": 1.980185910686506e-05, + "loss": 1.1264, "step": 1946 }, { - "epoch": 0.40826169008177815, - "grad_norm": 5.938554965271541, - "learning_rate": 1.9416048259037942e-05, - "loss": 2.2737, + "epoch": 0.2749223383225078, + "grad_norm": 4.145207220069322, + "learning_rate": 1.9801557029736585e-05, + "loss": 1.1966, "step": 1947 }, { - "epoch": 0.4084713776473055, - "grad_norm": 5.649517221423893, - "learning_rate": 1.9415285722551026e-05, - "loss": 2.02, + "epoch": 0.27506354137249367, + "grad_norm": 4.646764195670235, + "learning_rate": 1.9801254724824152e-05, + "loss": 1.0586, "step": 1948 }, { - "epoch": 0.4086810652128329, - "grad_norm": 5.872569513695348, - "learning_rate": 1.9414522703514384e-05, - "loss": 2.1347, + "epoch": 0.27520474442247955, + "grad_norm": 3.89261256015617, + "learning_rate": 1.98009521921348e-05, + "loss": 0.9301, "step": 1949 }, { - "epoch": 0.40889075277836023, - "grad_norm": 5.376845176851961, - "learning_rate": 1.9413759201967124e-05, - "loss": 1.8583, + "epoch": 0.2753459474724654, + "grad_norm": 3.914085594450724, + "learning_rate": 1.9800649431675544e-05, + "loss": 1.0692, "step": 1950 }, { - "epoch": 0.4091004403438876, - "grad_norm": 4.832515896166294, - "learning_rate": 1.941299521794838e-05, - "loss": 1.7673, + "epoch": 0.27548715052245126, + "grad_norm": 6.217100541119199, + "learning_rate": 1.9800346443453434e-05, + "loss": 1.2925, "step": 1951 }, { - "epoch": 0.40931012790941496, - "grad_norm": 5.6830955250949104, - "learning_rate": 1.9412230751497306e-05, - "loss": 2.1055, + "epoch": 0.27562835357243715, + "grad_norm": 4.180937250009694, + "learning_rate": 1.98000432274755e-05, + "loss": 1.0339, "step": 1952 }, { - "epoch": 0.4095198154749423, - "grad_norm": 6.3690993252994135, - "learning_rate": 1.941146580265308e-05, - "loss": 1.9798, + "epoch": 0.27576955662242303, + "grad_norm": 4.833108318940379, + "learning_rate": 1.9799739783748798e-05, + "loss": 1.286, "step": 1953 }, { - "epoch": 0.4097295030404697, - "grad_norm": 5.5432250469034985, - "learning_rate": 1.941070037145491e-05, - "loss": 2.092, + "epoch": 0.2759107596724089, + "grad_norm": 4.19034590252324, + "learning_rate": 1.9799436112280374e-05, + "loss": 0.9185, "step": 1954 }, { - "epoch": 0.40993919060599704, - "grad_norm": 5.002611567338543, - "learning_rate": 1.940993445794202e-05, - "loss": 2.316, + "epoch": 0.2760519627223948, + "grad_norm": 3.9467514311165757, + "learning_rate": 1.9799132213077283e-05, + "loss": 1.175, "step": 1955 }, { - "epoch": 0.41014887817152446, - "grad_norm": 5.482867095252288, - "learning_rate": 1.9409168062153668e-05, - "loss": 2.2611, + "epoch": 0.2761931657723807, + "grad_norm": 4.342706629159306, + "learning_rate": 1.9798828086146596e-05, + "loss": 1.2331, "step": 1956 }, { - "epoch": 0.4103585657370518, - "grad_norm": 5.865665536175434, - "learning_rate": 1.9408401184129135e-05, - "loss": 2.3242, + "epoch": 0.27633436882236656, + "grad_norm": 3.9251272153361216, + "learning_rate": 1.9798523731495373e-05, + "loss": 0.9976, "step": 1957 }, { - "epoch": 0.4105682533025792, - "grad_norm": 5.631661599101595, - "learning_rate": 1.9407633823907725e-05, - "loss": 2.3295, + "epoch": 0.27647557187235244, + "grad_norm": 4.9915444721580045, + "learning_rate": 1.9798219149130692e-05, + "loss": 1.3574, "step": 1958 }, { - "epoch": 0.41077794086810654, - "grad_norm": 6.734560103345229, - "learning_rate": 1.9406865981528765e-05, - "loss": 2.0642, + "epoch": 0.2766167749223383, + "grad_norm": 4.668249178903173, + "learning_rate": 1.979791433905963e-05, + "loss": 1.2374, "step": 1959 }, { - "epoch": 0.4109876284336339, - "grad_norm": 5.286191746354845, - "learning_rate": 1.9406097657031604e-05, - "loss": 1.9251, + "epoch": 0.2767579779723242, + "grad_norm": 4.081275883545955, + "learning_rate": 1.979760930128927e-05, + "loss": 1.0287, "step": 1960 }, { - "epoch": 0.41119731599916126, - "grad_norm": 5.484666641168899, - "learning_rate": 1.940532885045563e-05, - "loss": 2.3152, + "epoch": 0.2768991810223101, + "grad_norm": 4.38518891166007, + "learning_rate": 1.97973040358267e-05, + "loss": 1.1765, "step": 1961 }, { - "epoch": 0.4114070035646886, - "grad_norm": 5.510693198200412, - "learning_rate": 1.9404559561840235e-05, - "loss": 2.1775, + "epoch": 0.277040384072296, + "grad_norm": 4.024707125769459, + "learning_rate": 1.9796998542679015e-05, + "loss": 1.0753, "step": 1962 }, { - "epoch": 0.411616691130216, - "grad_norm": 6.082396449135801, - "learning_rate": 1.9403789791224855e-05, - "loss": 2.415, + "epoch": 0.27718158712228186, + "grad_norm": 4.525752594534333, + "learning_rate": 1.9796692821853315e-05, + "loss": 1.0752, "step": 1963 }, { - "epoch": 0.41182637869574334, - "grad_norm": 5.450417707000446, - "learning_rate": 1.940301953864893e-05, - "loss": 2.2307, + "epoch": 0.27732279017226774, + "grad_norm": 4.426957831487955, + "learning_rate": 1.979638687335671e-05, + "loss": 1.0836, "step": 1964 }, { - "epoch": 0.4120360662612707, - "grad_norm": 5.605150020655879, - "learning_rate": 1.9402248804151952e-05, - "loss": 2.125, + "epoch": 0.2774639932222536, + "grad_norm": 4.161187631948007, + "learning_rate": 1.97960806971963e-05, + "loss": 0.9516, "step": 1965 }, { - "epoch": 0.41224575382679807, - "grad_norm": 4.615372468950363, - "learning_rate": 1.9401477587773415e-05, - "loss": 2.3, + "epoch": 0.2776051962722395, + "grad_norm": 4.837229134664591, + "learning_rate": 1.9795774293379206e-05, + "loss": 1.1914, "step": 1966 }, { - "epoch": 0.41245544139232543, - "grad_norm": 5.912093454099453, - "learning_rate": 1.9400705889552845e-05, - "loss": 2.3672, + "epoch": 0.27774639932222533, + "grad_norm": 4.590584776593588, + "learning_rate": 1.9795467661912545e-05, + "loss": 1.1588, "step": 1967 }, { - "epoch": 0.4126651289578528, - "grad_norm": 4.885158072336608, - "learning_rate": 1.9399933709529793e-05, - "loss": 2.1908, + "epoch": 0.2778876023722112, + "grad_norm": 4.755577760977489, + "learning_rate": 1.979516080280345e-05, + "loss": 1.0861, "step": 1968 }, { - "epoch": 0.41287481652338015, - "grad_norm": 5.488791015816429, - "learning_rate": 1.9399161047743838e-05, - "loss": 2.4788, + "epoch": 0.2780288054221971, + "grad_norm": 3.8656949427527536, + "learning_rate": 1.9794853716059045e-05, + "loss": 0.9719, "step": 1969 }, { - "epoch": 0.4130845040889075, - "grad_norm": 5.373212277401722, - "learning_rate": 1.939838790423457e-05, - "loss": 2.1084, + "epoch": 0.278170008472183, + "grad_norm": 4.575697976512219, + "learning_rate": 1.979454640168647e-05, + "loss": 1.1418, "step": 1970 }, { - "epoch": 0.4132941916544349, - "grad_norm": 5.147488078337295, - "learning_rate": 1.939761427904163e-05, - "loss": 2.1311, + "epoch": 0.27831121152216887, + "grad_norm": 4.900714937784135, + "learning_rate": 1.9794238859692866e-05, + "loss": 1.1897, "step": 1971 }, { - "epoch": 0.41350387921996223, - "grad_norm": 5.878288971852596, - "learning_rate": 1.9396840172204654e-05, - "loss": 2.4611, + "epoch": 0.27845241457215475, + "grad_norm": 4.640687438285169, + "learning_rate": 1.9793931090085385e-05, + "loss": 0.9471, "step": 1972 }, { - "epoch": 0.4137135667854896, - "grad_norm": 5.512726405067035, - "learning_rate": 1.9396065583763323e-05, - "loss": 2.3146, + "epoch": 0.27859361762214063, + "grad_norm": 4.5373388357865165, + "learning_rate": 1.9793623092871172e-05, + "loss": 1.2946, "step": 1973 }, { - "epoch": 0.413923254351017, - "grad_norm": 6.948480560974792, - "learning_rate": 1.939529051375733e-05, - "loss": 2.3548, + "epoch": 0.2787348206721265, + "grad_norm": 4.642345560210742, + "learning_rate": 1.9793314868057387e-05, + "loss": 0.9214, "step": 1974 }, { - "epoch": 0.4141329419165444, - "grad_norm": 5.760753375338554, - "learning_rate": 1.9394514962226406e-05, - "loss": 2.2447, + "epoch": 0.2788760237221124, + "grad_norm": 4.390810116610071, + "learning_rate": 1.9793006415651198e-05, + "loss": 1.1299, "step": 1975 }, { - "epoch": 0.41434262948207173, - "grad_norm": 4.665031705307105, - "learning_rate": 1.9393738929210296e-05, - "loss": 2.2618, + "epoch": 0.2790172267720983, + "grad_norm": 4.438961443781038, + "learning_rate": 1.9792697735659766e-05, + "loss": 1.2401, "step": 1976 }, { - "epoch": 0.4145523170475991, - "grad_norm": 5.662796359610734, - "learning_rate": 1.9392962414748776e-05, - "loss": 2.4672, + "epoch": 0.27915842982208416, + "grad_norm": 4.110013407261194, + "learning_rate": 1.979238882809027e-05, + "loss": 0.9505, "step": 1977 }, { - "epoch": 0.41476200461312646, - "grad_norm": 5.603834743461014, - "learning_rate": 1.9392185418881637e-05, - "loss": 2.1682, + "epoch": 0.27929963287207005, + "grad_norm": 4.240267762400407, + "learning_rate": 1.979207969294988e-05, + "loss": 1.005, "step": 1978 }, { - "epoch": 0.4149716921786538, - "grad_norm": 5.761363284897148, - "learning_rate": 1.9391407941648704e-05, - "loss": 2.2492, + "epoch": 0.27944083592205593, + "grad_norm": 4.0388072578216905, + "learning_rate": 1.9791770330245793e-05, + "loss": 0.8163, "step": 1979 }, { - "epoch": 0.4151813797441812, - "grad_norm": 5.55979120910232, - "learning_rate": 1.939062998308983e-05, - "loss": 2.478, + "epoch": 0.2795820389720418, + "grad_norm": 4.050622215379461, + "learning_rate": 1.979146073998519e-05, + "loss": 0.9849, "step": 1980 }, { - "epoch": 0.41539106730970854, - "grad_norm": 5.256870641705434, - "learning_rate": 1.938985154324488e-05, - "loss": 2.2195, + "epoch": 0.2797232420220277, + "grad_norm": 4.696401766465645, + "learning_rate": 1.979115092217527e-05, + "loss": 1.328, "step": 1981 }, { - "epoch": 0.4156007548752359, - "grad_norm": 4.598153324651477, - "learning_rate": 1.9389072622153754e-05, - "loss": 2.3643, + "epoch": 0.2798644450720136, + "grad_norm": 4.005898408070976, + "learning_rate": 1.979084087682323e-05, + "loss": 1.0332, "step": 1982 }, { - "epoch": 0.41581044244076326, - "grad_norm": 5.478001173789781, - "learning_rate": 1.9388293219856373e-05, - "loss": 2.0816, + "epoch": 0.28000564812199946, + "grad_norm": 4.176778626936255, + "learning_rate": 1.9790530603936275e-05, + "loss": 0.9744, "step": 1983 }, { - "epoch": 0.4160201300062906, - "grad_norm": 5.228408349610673, - "learning_rate": 1.9387513336392676e-05, - "loss": 1.9742, + "epoch": 0.2801468511719853, + "grad_norm": 3.9965116131100897, + "learning_rate": 1.979022010352162e-05, + "loss": 1.0151, "step": 1984 }, { - "epoch": 0.416229817571818, - "grad_norm": 4.808294444991597, - "learning_rate": 1.938673297180264e-05, - "loss": 2.2271, + "epoch": 0.2802880542219712, + "grad_norm": 4.508909087531889, + "learning_rate": 1.9789909375586477e-05, + "loss": 1.3784, "step": 1985 }, { - "epoch": 0.41643950513734534, - "grad_norm": 5.438841530953087, - "learning_rate": 1.938595212612626e-05, - "loss": 2.2377, + "epoch": 0.28042925727195706, + "grad_norm": 3.999101969427345, + "learning_rate": 1.9789598420138065e-05, + "loss": 0.9298, "step": 1986 }, { - "epoch": 0.4166491927028727, - "grad_norm": 5.375547713308862, - "learning_rate": 1.9385170799403553e-05, - "loss": 2.2204, + "epoch": 0.28057046032194294, + "grad_norm": 4.861343308960142, + "learning_rate": 1.9789287237183616e-05, + "loss": 1.1876, "step": 1987 }, { - "epoch": 0.41685888026840007, - "grad_norm": 5.790129632703097, - "learning_rate": 1.9384388991674568e-05, - "loss": 2.1856, + "epoch": 0.2807116633719288, + "grad_norm": 5.267061242652416, + "learning_rate": 1.9788975826730362e-05, + "loss": 1.3012, "step": 1988 }, { - "epoch": 0.41706856783392743, - "grad_norm": 5.9808512888628185, - "learning_rate": 1.938360670297937e-05, - "loss": 2.0137, + "epoch": 0.2808528664219147, + "grad_norm": 5.259085379914763, + "learning_rate": 1.9788664188785535e-05, + "loss": 1.271, "step": 1989 }, { - "epoch": 0.4172782553994548, - "grad_norm": 5.359704491298829, - "learning_rate": 1.9382823933358052e-05, - "loss": 1.8506, + "epoch": 0.2809940694719006, + "grad_norm": 4.418607763924726, + "learning_rate": 1.9788352323356376e-05, + "loss": 1.1713, "step": 1990 }, { - "epoch": 0.41748794296498215, - "grad_norm": 5.746329330604571, - "learning_rate": 1.9382040682850736e-05, - "loss": 2.353, + "epoch": 0.28113527252188647, + "grad_norm": 4.352207990373677, + "learning_rate": 1.978804023045014e-05, + "loss": 1.137, "step": 1991 }, { - "epoch": 0.41769763053050957, - "grad_norm": 6.7324940984961605, - "learning_rate": 1.938125695149756e-05, - "loss": 2.2354, + "epoch": 0.28127647557187235, + "grad_norm": 4.061215820668716, + "learning_rate": 1.9787727910074077e-05, + "loss": 0.9199, "step": 1992 }, { - "epoch": 0.41790731809603693, - "grad_norm": 5.549159840172159, - "learning_rate": 1.9380472739338697e-05, - "loss": 1.8577, + "epoch": 0.28141767862185824, + "grad_norm": 5.02335803297421, + "learning_rate": 1.978741536223544e-05, + "loss": 1.3946, "step": 1993 }, { - "epoch": 0.4181170056615643, - "grad_norm": 6.567428796883728, - "learning_rate": 1.9379688046414333e-05, - "loss": 2.1882, + "epoch": 0.2815588816718441, + "grad_norm": 4.521580657225586, + "learning_rate": 1.97871025869415e-05, + "loss": 1.1222, "step": 1994 }, { - "epoch": 0.41832669322709165, - "grad_norm": 6.442302177292334, - "learning_rate": 1.9378902872764687e-05, - "loss": 2.1867, + "epoch": 0.28170008472183, + "grad_norm": 3.8191148360109244, + "learning_rate": 1.9786789584199523e-05, + "loss": 1.0476, "step": 1995 }, { - "epoch": 0.418536380792619, - "grad_norm": 5.721773668328924, - "learning_rate": 1.9378117218430007e-05, - "loss": 1.9088, + "epoch": 0.2818412877718159, + "grad_norm": 4.174635577704492, + "learning_rate": 1.9786476354016782e-05, + "loss": 0.997, "step": 1996 }, { - "epoch": 0.4187460683581464, - "grad_norm": 7.116147412325039, - "learning_rate": 1.9377331083450553e-05, - "loss": 2.4424, + "epoch": 0.28198249082180177, + "grad_norm": 3.4299898513764697, + "learning_rate": 1.9786162896400558e-05, + "loss": 0.8523, "step": 1997 }, { - "epoch": 0.41895575592367373, - "grad_norm": 6.294320289918357, - "learning_rate": 1.9376544467866618e-05, - "loss": 2.4735, + "epoch": 0.28212369387178765, + "grad_norm": 3.795614369695515, + "learning_rate": 1.9785849211358133e-05, + "loss": 1.0443, "step": 1998 }, { - "epoch": 0.4191654434892011, - "grad_norm": 5.862530198939572, - "learning_rate": 1.9375757371718512e-05, - "loss": 2.1935, + "epoch": 0.28226489692177353, + "grad_norm": 4.534375150432486, + "learning_rate": 1.9785535298896797e-05, + "loss": 0.9503, "step": 1999 }, { - "epoch": 0.41937513105472846, - "grad_norm": 6.265295477683987, - "learning_rate": 1.937496979504658e-05, - "loss": 2.2382, + "epoch": 0.2824060999717594, + "grad_norm": 4.337289273239031, + "learning_rate": 1.9785221159023852e-05, + "loss": 1.1207, "step": 2000 }, { - "epoch": 0.4195848186202558, - "grad_norm": 6.060609809494632, - "learning_rate": 1.937418173789119e-05, - "loss": 2.0672, + "epoch": 0.28254730302174524, + "grad_norm": 4.026007673664495, + "learning_rate": 1.978490679174659e-05, + "loss": 0.9742, "step": 2001 }, { - "epoch": 0.4197945061857832, - "grad_norm": 5.832967778638011, - "learning_rate": 1.937339320029272e-05, - "loss": 2.1739, + "epoch": 0.2826885060717311, + "grad_norm": 4.402877866545439, + "learning_rate": 1.9784592197072317e-05, + "loss": 1.0959, "step": 2002 }, { - "epoch": 0.42000419375131054, - "grad_norm": 5.9144866941147, - "learning_rate": 1.93726041822916e-05, - "loss": 2.42, + "epoch": 0.282829709121717, + "grad_norm": 4.145075755409406, + "learning_rate": 1.978427737500835e-05, + "loss": 0.8702, "step": 2003 }, { - "epoch": 0.4202138813168379, - "grad_norm": 5.769492824448438, - "learning_rate": 1.9371814683928254e-05, - "loss": 2.107, + "epoch": 0.2829709121717029, + "grad_norm": 4.473533883781703, + "learning_rate": 1.9783962325562004e-05, + "loss": 1.0151, "step": 2004 }, { - "epoch": 0.42042356888236526, - "grad_norm": 6.128745604677519, - "learning_rate": 1.9371024705243153e-05, - "loss": 2.2926, + "epoch": 0.2831121152216888, + "grad_norm": 4.248658560673944, + "learning_rate": 1.9783647048740597e-05, + "loss": 1.1081, "step": 2005 }, { - "epoch": 0.4206332564478926, - "grad_norm": 6.94799669224784, - "learning_rate": 1.937023424627678e-05, - "loss": 2.0711, + "epoch": 0.28325331827167466, + "grad_norm": 4.760186713189362, + "learning_rate": 1.9783331544551457e-05, + "loss": 1.3244, "step": 2006 }, { - "epoch": 0.42084294401342, - "grad_norm": 6.253542380198567, - "learning_rate": 1.9369443307069652e-05, - "loss": 2.1404, + "epoch": 0.28339452132166054, + "grad_norm": 3.89054324974062, + "learning_rate": 1.978301581300192e-05, + "loss": 1.0176, "step": 2007 }, { - "epoch": 0.42105263157894735, - "grad_norm": 5.731252920061354, - "learning_rate": 1.9368651887662305e-05, - "loss": 1.9719, + "epoch": 0.2835357243716464, + "grad_norm": 4.416503973128699, + "learning_rate": 1.9782699854099316e-05, + "loss": 0.9002, "step": 2008 }, { - "epoch": 0.4212623191444747, - "grad_norm": 5.54887162896101, - "learning_rate": 1.93678599880953e-05, - "loss": 2.1454, + "epoch": 0.2836769274216323, + "grad_norm": 4.539685188810968, + "learning_rate": 1.9782383667850994e-05, + "loss": 0.9914, "step": 2009 }, { - "epoch": 0.4214720067100021, - "grad_norm": 6.9384749252134865, - "learning_rate": 1.936706760840922e-05, - "loss": 2.3265, + "epoch": 0.2838181304716182, + "grad_norm": 4.557279010744634, + "learning_rate": 1.97820672542643e-05, + "loss": 1.1256, "step": 2010 }, { - "epoch": 0.4216816942755295, - "grad_norm": 5.880858818188466, - "learning_rate": 1.936627474864468e-05, - "loss": 2.356, + "epoch": 0.2839593335216041, + "grad_norm": 4.934417060893699, + "learning_rate": 1.978175061334659e-05, + "loss": 1.3845, "step": 2011 }, { - "epoch": 0.42189138184105685, - "grad_norm": 5.834522963809873, - "learning_rate": 1.9365481408842316e-05, - "loss": 2.2803, + "epoch": 0.28410053657158996, + "grad_norm": 4.17693016203107, + "learning_rate": 1.9781433745105218e-05, + "loss": 1.0407, "step": 2012 }, { - "epoch": 0.4221010694065842, - "grad_norm": 5.890280060192829, - "learning_rate": 1.9364687589042784e-05, - "loss": 2.2733, + "epoch": 0.28424173962157584, + "grad_norm": 3.744653586266438, + "learning_rate": 1.978111664954755e-05, + "loss": 0.9955, "step": 2013 }, { - "epoch": 0.42231075697211157, - "grad_norm": 6.7025715581683825, - "learning_rate": 1.936389328928677e-05, - "loss": 2.2507, + "epoch": 0.2843829426715617, + "grad_norm": 4.561772943998098, + "learning_rate": 1.9780799326680956e-05, + "loss": 1.1598, "step": 2014 }, { - "epoch": 0.42252044453763893, - "grad_norm": 5.862394687737632, - "learning_rate": 1.9363098509614986e-05, - "loss": 2.3053, + "epoch": 0.2845241457215476, + "grad_norm": 5.080010390830661, + "learning_rate": 1.978048177651281e-05, + "loss": 1.361, "step": 2015 }, { - "epoch": 0.4227301321031663, - "grad_norm": 6.616369166435254, - "learning_rate": 1.9362303250068166e-05, - "loss": 2.2514, + "epoch": 0.2846653487715335, + "grad_norm": 3.82776282777789, + "learning_rate": 1.978016399905049e-05, + "loss": 0.9883, "step": 2016 }, { - "epoch": 0.42293981966869365, - "grad_norm": 5.314512617857007, - "learning_rate": 1.936150751068706e-05, - "loss": 1.8628, + "epoch": 0.28480655182151937, + "grad_norm": 4.32869082156598, + "learning_rate": 1.977984599430138e-05, + "loss": 1.289, "step": 2017 }, { - "epoch": 0.423149507234221, - "grad_norm": 6.13395910387215, - "learning_rate": 1.9360711291512463e-05, - "loss": 2.4728, + "epoch": 0.2849477548715052, + "grad_norm": 4.3485794939525055, + "learning_rate": 1.9779527762272877e-05, + "loss": 1.1465, "step": 2018 }, { - "epoch": 0.4233591947997484, - "grad_norm": 6.6003835680659115, - "learning_rate": 1.9359914592585174e-05, - "loss": 2.0128, + "epoch": 0.2850889579214911, + "grad_norm": 4.040964286303334, + "learning_rate": 1.9779209302972372e-05, + "loss": 1.0906, "step": 2019 }, { - "epoch": 0.42356888236527573, - "grad_norm": 5.536355983436875, - "learning_rate": 1.935911741394603e-05, - "loss": 2.0144, + "epoch": 0.28523016097147696, + "grad_norm": 4.7333264742203705, + "learning_rate": 1.9778890616407266e-05, + "loss": 1.3597, "step": 2020 }, { - "epoch": 0.4237785699308031, - "grad_norm": 6.770656421196363, - "learning_rate": 1.9358319755635886e-05, - "loss": 2.237, + "epoch": 0.28537136402146285, + "grad_norm": 4.120846192004582, + "learning_rate": 1.9778571702584964e-05, + "loss": 1.1739, "step": 2021 }, { - "epoch": 0.42398825749633046, - "grad_norm": 5.178819318223887, - "learning_rate": 1.9357521617695622e-05, - "loss": 2.2974, + "epoch": 0.28551256707144873, + "grad_norm": 4.516244367261513, + "learning_rate": 1.977825256151288e-05, + "loss": 1.3577, "step": 2022 }, { - "epoch": 0.4241979450618578, - "grad_norm": 5.249639613602149, - "learning_rate": 1.9356723000166144e-05, - "loss": 2.2677, + "epoch": 0.2856537701214346, + "grad_norm": 4.0108925589159306, + "learning_rate": 1.977793319319843e-05, + "loss": 0.8988, "step": 2023 }, { - "epoch": 0.4244076326273852, - "grad_norm": 6.109689196967141, - "learning_rate": 1.9355923903088387e-05, - "loss": 1.96, + "epoch": 0.2857949731714205, + "grad_norm": 4.3321504713073935, + "learning_rate": 1.9777613597649033e-05, + "loss": 1.0573, "step": 2024 }, { - "epoch": 0.42461732019291254, - "grad_norm": 4.92541630073264, - "learning_rate": 1.93551243265033e-05, - "loss": 2.2351, + "epoch": 0.2859361762214064, + "grad_norm": 4.285484247734015, + "learning_rate": 1.977729377487212e-05, + "loss": 1.1399, "step": 2025 }, { - "epoch": 0.4248270077584399, - "grad_norm": 5.602064620773708, - "learning_rate": 1.9354324270451868e-05, - "loss": 2.1125, + "epoch": 0.28607737927139226, + "grad_norm": 3.681731421199415, + "learning_rate": 1.9776973724875123e-05, + "loss": 1.0067, "step": 2026 }, { - "epoch": 0.42503669532396726, - "grad_norm": 6.330968911541508, - "learning_rate": 1.9353523734975093e-05, - "loss": 2.2718, + "epoch": 0.28621858232137815, + "grad_norm": 4.50057295846331, + "learning_rate": 1.9776653447665476e-05, + "loss": 1.091, "step": 2027 }, { - "epoch": 0.4252463828894947, - "grad_norm": 5.135501745144233, - "learning_rate": 1.9352722720114e-05, - "loss": 2.3348, + "epoch": 0.28635978537136403, + "grad_norm": 4.377690403930399, + "learning_rate": 1.9776332943250628e-05, + "loss": 0.871, "step": 2028 }, { - "epoch": 0.42545607045502204, - "grad_norm": 5.126734120913945, - "learning_rate": 1.935192122590965e-05, - "loss": 1.8358, + "epoch": 0.2865009884213499, + "grad_norm": 3.917457818102609, + "learning_rate": 1.977601221163802e-05, + "loss": 1.1456, "step": 2029 }, { - "epoch": 0.4256657580205494, - "grad_norm": 6.506488498787985, - "learning_rate": 1.9351119252403117e-05, - "loss": 2.07, + "epoch": 0.2866421914713358, + "grad_norm": 4.334577106906182, + "learning_rate": 1.9775691252835113e-05, + "loss": 1.1127, "step": 2030 }, { - "epoch": 0.42587544558607676, - "grad_norm": 5.996247350017663, - "learning_rate": 1.93503167996355e-05, - "loss": 2.3962, + "epoch": 0.2867833945213217, + "grad_norm": 4.310776613987475, + "learning_rate": 1.9775370066849363e-05, + "loss": 1.2173, "step": 2031 }, { - "epoch": 0.4260851331516041, - "grad_norm": 6.091321862307605, - "learning_rate": 1.9349513867647934e-05, - "loss": 2.3194, + "epoch": 0.28692459757130756, + "grad_norm": 3.6107278214409466, + "learning_rate": 1.9775048653688235e-05, + "loss": 1.1559, "step": 2032 }, { - "epoch": 0.4262948207171315, - "grad_norm": 6.715491028435017, - "learning_rate": 1.9348710456481567e-05, - "loss": 2.1027, + "epoch": 0.28706580062129344, + "grad_norm": 4.50530080782248, + "learning_rate": 1.97747270133592e-05, + "loss": 1.1955, "step": 2033 }, { - "epoch": 0.42650450828265885, - "grad_norm": 5.94320774840823, - "learning_rate": 1.9347906566177572e-05, - "loss": 2.2841, + "epoch": 0.2872070036712793, + "grad_norm": 4.288788220244859, + "learning_rate": 1.9774405145869728e-05, + "loss": 1.2162, "step": 2034 }, { - "epoch": 0.4267141958481862, - "grad_norm": 5.734420617348297, - "learning_rate": 1.9347102196777153e-05, - "loss": 2.0446, + "epoch": 0.28734820672126515, + "grad_norm": 3.808717115242739, + "learning_rate": 1.97740830512273e-05, + "loss": 0.9448, "step": 2035 }, { - "epoch": 0.42692388341371357, - "grad_norm": 5.824742045198555, - "learning_rate": 1.9346297348321534e-05, - "loss": 2.2614, + "epoch": 0.28748940977125104, + "grad_norm": 4.009799020437934, + "learning_rate": 1.9773760729439405e-05, + "loss": 0.9945, "step": 2036 }, { - "epoch": 0.42713357097924093, - "grad_norm": 6.252753944137568, - "learning_rate": 1.9345492020851967e-05, - "loss": 2.1476, + "epoch": 0.2876306128212369, + "grad_norm": 4.840755939516986, + "learning_rate": 1.9773438180513533e-05, + "loss": 1.1422, "step": 2037 }, { - "epoch": 0.4273432585447683, - "grad_norm": 6.717286218553124, - "learning_rate": 1.9344686214409724e-05, - "loss": 2.253, + "epoch": 0.2877718158712228, + "grad_norm": 3.441429572592453, + "learning_rate": 1.9773115404457175e-05, + "loss": 0.8611, "step": 2038 }, { - "epoch": 0.42755294611029565, - "grad_norm": 5.373113340246127, - "learning_rate": 1.9343879929036103e-05, - "loss": 2.1445, + "epoch": 0.2879130189212087, + "grad_norm": 4.3531540814975855, + "learning_rate": 1.9772792401277837e-05, + "loss": 1.1632, "step": 2039 }, { - "epoch": 0.427762633675823, - "grad_norm": 6.3643725544219505, - "learning_rate": 1.9343073164772432e-05, - "loss": 2.1806, + "epoch": 0.28805422197119457, + "grad_norm": 4.345000000297585, + "learning_rate": 1.9772469170983026e-05, + "loss": 0.9961, "step": 2040 }, { - "epoch": 0.4279723212413504, - "grad_norm": 6.199200701371801, - "learning_rate": 1.9342265921660054e-05, - "loss": 2.143, + "epoch": 0.28819542502118045, + "grad_norm": 4.194915802734927, + "learning_rate": 1.977214571358025e-05, + "loss": 1.094, "step": 2041 }, { - "epoch": 0.42818200880687773, - "grad_norm": 5.66314781975456, - "learning_rate": 1.934145819974035e-05, - "loss": 1.9979, + "epoch": 0.28833662807116633, + "grad_norm": 4.028493876038733, + "learning_rate": 1.9771822029077028e-05, + "loss": 1.0783, "step": 2042 }, { - "epoch": 0.4283916963724051, - "grad_norm": 6.286261967743655, - "learning_rate": 1.9340649999054707e-05, - "loss": 2.1439, + "epoch": 0.2884778311211522, + "grad_norm": 3.923437266480302, + "learning_rate": 1.9771498117480885e-05, + "loss": 0.9019, "step": 2043 }, { - "epoch": 0.42860138393793246, - "grad_norm": 5.476334925418123, - "learning_rate": 1.933984131964455e-05, - "loss": 2.3297, + "epoch": 0.2886190341711381, + "grad_norm": 4.730233937110911, + "learning_rate": 1.977117397879934e-05, + "loss": 1.1551, "step": 2044 }, { - "epoch": 0.4288110715034598, - "grad_norm": 5.656426868090549, - "learning_rate": 1.9339032161551328e-05, - "loss": 2.2493, + "epoch": 0.288760237221124, + "grad_norm": 4.082521639866751, + "learning_rate": 1.9770849613039934e-05, + "loss": 0.8055, "step": 2045 }, { - "epoch": 0.42902075906898723, - "grad_norm": 5.283818501014784, - "learning_rate": 1.933822252481651e-05, - "loss": 2.3128, + "epoch": 0.28890144027110987, + "grad_norm": 4.326972895394156, + "learning_rate": 1.9770525020210204e-05, + "loss": 1.0815, "step": 2046 }, { - "epoch": 0.4292304466345146, - "grad_norm": 5.579397238106657, - "learning_rate": 1.933741240948159e-05, - "loss": 2.0862, + "epoch": 0.28904264332109575, + "grad_norm": 4.404864862808989, + "learning_rate": 1.9770200200317694e-05, + "loss": 1.2375, "step": 2047 }, { - "epoch": 0.42944013420004196, - "grad_norm": 5.35244711965899, - "learning_rate": 1.9336601815588092e-05, - "loss": 2.3516, + "epoch": 0.28918384637108163, + "grad_norm": 4.484117726255576, + "learning_rate": 1.976987515336995e-05, + "loss": 1.2743, "step": 2048 }, { - "epoch": 0.4296498217655693, - "grad_norm": 5.848286279440088, - "learning_rate": 1.9335790743177555e-05, - "loss": 2.2656, + "epoch": 0.2893250494210675, + "grad_norm": 4.063025502518518, + "learning_rate": 1.9769549879374524e-05, + "loss": 1.1252, "step": 2049 }, { - "epoch": 0.4298595093310967, - "grad_norm": 5.326979090134375, - "learning_rate": 1.933497919229155e-05, - "loss": 2.0895, + "epoch": 0.2894662524710534, + "grad_norm": 3.983980411580159, + "learning_rate": 1.9769224378338978e-05, + "loss": 1.0919, "step": 2050 }, { - "epoch": 0.43006919689662404, - "grad_norm": 5.032408912348559, - "learning_rate": 1.9334167162971672e-05, - "loss": 2.1415, + "epoch": 0.2896074555210393, + "grad_norm": 4.759913262493748, + "learning_rate": 1.9768898650270877e-05, + "loss": 1.2276, "step": 2051 }, { - "epoch": 0.4302788844621514, - "grad_norm": 5.537158925475578, - "learning_rate": 1.9333354655259536e-05, - "loss": 2.3665, + "epoch": 0.2897486585710251, + "grad_norm": 4.544744615479969, + "learning_rate": 1.976857269517779e-05, + "loss": 1.1281, "step": 2052 }, { - "epoch": 0.43048857202767876, - "grad_norm": 5.88610943559457, - "learning_rate": 1.9332541669196788e-05, - "loss": 2.1844, + "epoch": 0.289889861621011, + "grad_norm": 3.7370236760771416, + "learning_rate": 1.9768246513067295e-05, + "loss": 0.9291, "step": 2053 }, { - "epoch": 0.4306982595932061, - "grad_norm": 6.30399584432925, - "learning_rate": 1.9331728204825092e-05, - "loss": 1.8749, + "epoch": 0.2900310646709969, + "grad_norm": 4.492300710270915, + "learning_rate": 1.976792010394697e-05, + "loss": 1.318, "step": 2054 }, { - "epoch": 0.4309079471587335, - "grad_norm": 5.597292511090274, - "learning_rate": 1.933091426218614e-05, - "loss": 2.0928, + "epoch": 0.29017226772098276, + "grad_norm": 4.498157558033843, + "learning_rate": 1.97675934678244e-05, + "loss": 1.0936, "step": 2055 }, { - "epoch": 0.43111763472426085, - "grad_norm": 5.638930277821436, - "learning_rate": 1.933009984132165e-05, - "loss": 2.1757, + "epoch": 0.29031347077096864, + "grad_norm": 3.6316301131730526, + "learning_rate": 1.9767266604707172e-05, + "loss": 0.8455, "step": 2056 }, { - "epoch": 0.4313273222897882, - "grad_norm": 5.275767560046034, - "learning_rate": 1.932928494227336e-05, - "loss": 2.0879, + "epoch": 0.2904546738209545, + "grad_norm": 3.9246863697555536, + "learning_rate": 1.9766939514602885e-05, + "loss": 1.0643, "step": 2057 }, { - "epoch": 0.43153700985531557, - "grad_norm": 5.891912118126988, - "learning_rate": 1.9328469565083033e-05, - "loss": 2.2518, + "epoch": 0.2905958768709404, + "grad_norm": 4.314612904258364, + "learning_rate": 1.9766612197519146e-05, + "loss": 1.0944, "step": 2058 }, { - "epoch": 0.43174669742084293, - "grad_norm": 6.131723412163059, - "learning_rate": 1.9327653709792467e-05, - "loss": 2.0922, + "epoch": 0.2907370799209263, + "grad_norm": 4.323314277102569, + "learning_rate": 1.9766284653463558e-05, + "loss": 1.0158, "step": 2059 }, { - "epoch": 0.4319563849863703, - "grad_norm": 4.806275608474459, - "learning_rate": 1.9326837376443465e-05, - "loss": 2.0747, + "epoch": 0.29087828297091217, + "grad_norm": 4.822479394109334, + "learning_rate": 1.976595688244373e-05, + "loss": 1.0973, "step": 2060 }, { - "epoch": 0.43216607255189765, - "grad_norm": 5.65893793256204, - "learning_rate": 1.9326020565077877e-05, - "loss": 2.1196, + "epoch": 0.29101948602089805, + "grad_norm": 3.922924559815942, + "learning_rate": 1.9765628884467283e-05, + "loss": 1.0773, "step": 2061 }, { - "epoch": 0.432375760117425, - "grad_norm": 7.573833858858292, - "learning_rate": 1.9325203275737557e-05, - "loss": 2.7746, + "epoch": 0.29116068907088394, + "grad_norm": 4.341775403330064, + "learning_rate": 1.9765300659541837e-05, + "loss": 1.1477, "step": 2062 }, { - "epoch": 0.43258544768295243, - "grad_norm": 6.734672915264905, - "learning_rate": 1.932438550846439e-05, - "loss": 2.359, + "epoch": 0.2913018921208698, + "grad_norm": 4.788375099739378, + "learning_rate": 1.976497220767502e-05, + "loss": 1.319, "step": 2063 }, { - "epoch": 0.4327951352484798, - "grad_norm": 5.601178993413717, - "learning_rate": 1.93235672633003e-05, - "loss": 1.9529, + "epoch": 0.2914430951708557, + "grad_norm": 3.9212088446350326, + "learning_rate": 1.976464352887447e-05, + "loss": 0.9932, "step": 2064 }, { - "epoch": 0.43300482281400715, - "grad_norm": 5.49168067409043, - "learning_rate": 1.9322748540287214e-05, - "loss": 2.0043, + "epoch": 0.2915842982208416, + "grad_norm": 4.585040004039015, + "learning_rate": 1.976431462314782e-05, + "loss": 1.1709, "step": 2065 }, { - "epoch": 0.4332145103795345, - "grad_norm": 6.622302844621882, - "learning_rate": 1.9321929339467097e-05, - "loss": 2.218, + "epoch": 0.29172550127082747, + "grad_norm": 4.291513162972788, + "learning_rate": 1.9763985490502714e-05, + "loss": 1.3046, "step": 2066 }, { - "epoch": 0.4334241979450619, - "grad_norm": 5.99192606304044, - "learning_rate": 1.9321109660881934e-05, - "loss": 1.9851, + "epoch": 0.29186670432081335, + "grad_norm": 5.093619370039751, + "learning_rate": 1.97636561309468e-05, + "loss": 1.2358, "step": 2067 }, { - "epoch": 0.43363388551058923, - "grad_norm": 6.003115940602221, - "learning_rate": 1.9320289504573734e-05, - "loss": 2.327, + "epoch": 0.29200790737079924, + "grad_norm": 3.754190466944284, + "learning_rate": 1.976332654448774e-05, + "loss": 1.002, "step": 2068 }, { - "epoch": 0.4338435730761166, - "grad_norm": 5.616618635469566, - "learning_rate": 1.9319468870584532e-05, - "loss": 2.4658, + "epoch": 0.29214911042078506, + "grad_norm": 4.141108481796237, + "learning_rate": 1.976299673113318e-05, + "loss": 1.1374, "step": 2069 }, { - "epoch": 0.43405326064164396, - "grad_norm": 5.783200780107306, - "learning_rate": 1.9318647758956385e-05, - "loss": 2.0661, + "epoch": 0.29229031347077095, + "grad_norm": 4.886492875931723, + "learning_rate": 1.9762666690890796e-05, + "loss": 1.1219, "step": 2070 }, { - "epoch": 0.4342629482071713, - "grad_norm": 5.5309950475475524, - "learning_rate": 1.9317826169731377e-05, - "loss": 2.5602, + "epoch": 0.29243151652075683, + "grad_norm": 3.452987479332645, + "learning_rate": 1.9762336423768257e-05, + "loss": 0.9034, "step": 2071 }, { - "epoch": 0.4344726357726987, - "grad_norm": 5.818403769826662, - "learning_rate": 1.931700410295162e-05, - "loss": 2.0612, + "epoch": 0.2925727195707427, + "grad_norm": 4.266678687110639, + "learning_rate": 1.976200592977323e-05, + "loss": 1.2724, "step": 2072 }, { - "epoch": 0.43468232333822604, - "grad_norm": 5.94637769575716, - "learning_rate": 1.9316181558659243e-05, - "loss": 2.4089, + "epoch": 0.2927139226207286, + "grad_norm": 4.0257082259290735, + "learning_rate": 1.9761675208913408e-05, + "loss": 0.9951, "step": 2073 }, { - "epoch": 0.4348920109037534, - "grad_norm": 4.700776204946042, - "learning_rate": 1.9315358536896397e-05, - "loss": 2.0653, + "epoch": 0.2928551256707145, + "grad_norm": 3.96832581908812, + "learning_rate": 1.9761344261196464e-05, + "loss": 1.0852, "step": 2074 }, { - "epoch": 0.43510169846928076, - "grad_norm": 5.681035212156418, - "learning_rate": 1.9314535037705274e-05, - "loss": 2.1765, + "epoch": 0.29299632872070036, + "grad_norm": 4.617685000382881, + "learning_rate": 1.9761013086630096e-05, + "loss": 1.1784, "step": 2075 }, { - "epoch": 0.4353113860348081, - "grad_norm": 5.431748804059685, - "learning_rate": 1.9313711061128075e-05, - "loss": 2.1142, + "epoch": 0.29313753177068624, + "grad_norm": 3.6319940014803267, + "learning_rate": 1.9760681685222e-05, + "loss": 0.9583, "step": 2076 }, { - "epoch": 0.4355210736003355, - "grad_norm": 6.300833276473555, - "learning_rate": 1.931288660720703e-05, - "loss": 2.1151, + "epoch": 0.2932787348206721, + "grad_norm": 4.2324410829378944, + "learning_rate": 1.9760350056979877e-05, + "loss": 1.0565, "step": 2077 }, { - "epoch": 0.43573076116586285, - "grad_norm": 5.635273946679019, - "learning_rate": 1.9312061675984396e-05, - "loss": 2.4139, + "epoch": 0.293419937870658, + "grad_norm": 3.7029137727175483, + "learning_rate": 1.976001820191143e-05, + "loss": 1.0016, "step": 2078 }, { - "epoch": 0.4359404487313902, - "grad_norm": 6.235775951649308, - "learning_rate": 1.931123626750245e-05, - "loss": 2.3655, + "epoch": 0.2935611409206439, + "grad_norm": 3.650246675487592, + "learning_rate": 1.975968612002438e-05, + "loss": 0.9138, "step": 2079 }, { - "epoch": 0.43615013629691757, - "grad_norm": 5.710130425109169, - "learning_rate": 1.9310410381803494e-05, - "loss": 2.0791, + "epoch": 0.2937023439706298, + "grad_norm": 3.9568489080451936, + "learning_rate": 1.975935381132644e-05, + "loss": 1.0436, "step": 2080 }, { - "epoch": 0.436359823862445, - "grad_norm": 5.158089292368677, - "learning_rate": 1.9309584018929857e-05, - "loss": 2.32, + "epoch": 0.29384354702061566, + "grad_norm": 4.9449641178102635, + "learning_rate": 1.975902127582533e-05, + "loss": 1.2078, "step": 2081 }, { - "epoch": 0.43656951142797235, - "grad_norm": 5.378579223958286, - "learning_rate": 1.9308757178923892e-05, - "loss": 1.9725, + "epoch": 0.29398475007060154, + "grad_norm": 4.121101354372049, + "learning_rate": 1.9758688513528783e-05, + "loss": 1.0161, "step": 2082 }, { - "epoch": 0.4367791989934997, - "grad_norm": 6.345658724832935, - "learning_rate": 1.9307929861827978e-05, - "loss": 2.3201, + "epoch": 0.2941259531205874, + "grad_norm": 4.94270977761333, + "learning_rate": 1.9758355524444526e-05, + "loss": 1.1251, "step": 2083 }, { - "epoch": 0.43698888655902707, - "grad_norm": 5.32513022247471, - "learning_rate": 1.9307102067684515e-05, - "loss": 2.0496, + "epoch": 0.2942671561705733, + "grad_norm": 4.345004231911718, + "learning_rate": 1.9758022308580306e-05, + "loss": 1.019, "step": 2084 }, { - "epoch": 0.43719857412455443, - "grad_norm": 5.187660334016663, - "learning_rate": 1.9306273796535927e-05, - "loss": 1.8756, + "epoch": 0.2944083592205592, + "grad_norm": 4.668617086823921, + "learning_rate": 1.9757688865943855e-05, + "loss": 1.0925, "step": 2085 }, { - "epoch": 0.4374082616900818, - "grad_norm": 5.696723864006713, - "learning_rate": 1.930544504842467e-05, - "loss": 2.3171, + "epoch": 0.294549562270545, + "grad_norm": 3.8205038776183136, + "learning_rate": 1.975735519654293e-05, + "loss": 0.9605, "step": 2086 }, { - "epoch": 0.43761794925560915, - "grad_norm": 5.7490410534754215, - "learning_rate": 1.9304615823393212e-05, - "loss": 2.2151, + "epoch": 0.2946907653205309, + "grad_norm": 3.6174909433375433, + "learning_rate": 1.9757021300385288e-05, + "loss": 0.9056, "step": 2087 }, { - "epoch": 0.4378276368211365, - "grad_norm": 6.042000149835223, - "learning_rate": 1.9303786121484055e-05, - "loss": 2.1976, + "epoch": 0.2948319683705168, + "grad_norm": 5.204993341242176, + "learning_rate": 1.9756687177478683e-05, + "loss": 1.3593, "step": 2088 }, { - "epoch": 0.4380373243866639, - "grad_norm": 6.244626165358541, - "learning_rate": 1.930295594273972e-05, - "loss": 1.9764, + "epoch": 0.29497317142050267, + "grad_norm": 3.6509927194113008, + "learning_rate": 1.9756352827830878e-05, + "loss": 0.8878, "step": 2089 }, { - "epoch": 0.43824701195219123, - "grad_norm": 6.660763283582525, - "learning_rate": 1.930212528720276e-05, - "loss": 2.4895, + "epoch": 0.29511437447048855, + "grad_norm": 4.715387960943878, + "learning_rate": 1.975601825144965e-05, + "loss": 1.3048, "step": 2090 }, { - "epoch": 0.4384566995177186, - "grad_norm": 5.010957273057531, - "learning_rate": 1.9301294154915748e-05, - "loss": 2.0326, + "epoch": 0.29525557752047443, + "grad_norm": 4.391515569665896, + "learning_rate": 1.9755683448342774e-05, + "loss": 1.0547, "step": 2091 }, { - "epoch": 0.43866638708324596, - "grad_norm": 5.642691429878223, - "learning_rate": 1.9300462545921275e-05, - "loss": 2.0941, + "epoch": 0.2953967805704603, + "grad_norm": 4.718727020149414, + "learning_rate": 1.9755348418518022e-05, + "loss": 1.2065, "step": 2092 }, { - "epoch": 0.4388760746487733, - "grad_norm": 5.587524958435849, - "learning_rate": 1.9299630460261965e-05, - "loss": 1.7575, + "epoch": 0.2955379836204462, + "grad_norm": 4.6274706615496, + "learning_rate": 1.9755013161983188e-05, + "loss": 1.1108, "step": 2093 }, { - "epoch": 0.4390857622143007, - "grad_norm": 6.126055005929078, - "learning_rate": 1.929879789798047e-05, - "loss": 2.3099, + "epoch": 0.2956791866704321, + "grad_norm": 4.262219880039032, + "learning_rate": 1.9754677678746064e-05, + "loss": 1.1746, "step": 2094 }, { - "epoch": 0.43929544977982804, - "grad_norm": 6.3326167998416585, - "learning_rate": 1.9297964859119452e-05, - "loss": 2.3565, + "epoch": 0.29582038972041796, + "grad_norm": 4.973375591970985, + "learning_rate": 1.975434196881444e-05, + "loss": 1.0213, "step": 2095 }, { - "epoch": 0.4395051373453554, - "grad_norm": 5.8339022278469805, - "learning_rate": 1.9297131343721606e-05, - "loss": 2.0313, + "epoch": 0.29596159277040385, + "grad_norm": 4.247965421169264, + "learning_rate": 1.9754006032196123e-05, + "loss": 1.201, "step": 2096 }, { - "epoch": 0.43971482491088276, - "grad_norm": 5.912496507784254, - "learning_rate": 1.9296297351829655e-05, - "loss": 1.998, + "epoch": 0.29610279582038973, + "grad_norm": 3.6558771372991523, + "learning_rate": 1.9753669868898915e-05, + "loss": 0.9072, "step": 2097 }, { - "epoch": 0.4399245124764101, - "grad_norm": 5.518182485277831, - "learning_rate": 1.9295462883486343e-05, - "loss": 1.719, + "epoch": 0.2962439988703756, + "grad_norm": 3.790561171440481, + "learning_rate": 1.9753333478930632e-05, + "loss": 1.1078, "step": 2098 }, { - "epoch": 0.44013420004193754, - "grad_norm": 5.225034649020508, - "learning_rate": 1.9294627938734432e-05, - "loss": 2.1939, + "epoch": 0.2963852019203615, + "grad_norm": 3.815647120930167, + "learning_rate": 1.9752996862299098e-05, + "loss": 1.0469, "step": 2099 }, { - "epoch": 0.4403438876074649, - "grad_norm": 6.3373451638869716, - "learning_rate": 1.929379251761672e-05, - "loss": 2.1479, + "epoch": 0.2965264049703474, + "grad_norm": 4.219261450992644, + "learning_rate": 1.975266001901212e-05, + "loss": 0.8996, "step": 2100 }, { - "epoch": 0.44055357517299226, - "grad_norm": 6.676127481438523, - "learning_rate": 1.9292956620176026e-05, - "loss": 1.8878, + "epoch": 0.29666760802033326, + "grad_norm": 4.214037712732588, + "learning_rate": 1.975232294907754e-05, + "loss": 1.0994, "step": 2101 }, { - "epoch": 0.4407632627385196, - "grad_norm": 5.469884784332731, - "learning_rate": 1.9292120246455182e-05, - "loss": 2.4373, + "epoch": 0.29680881107031915, + "grad_norm": 4.157645276184329, + "learning_rate": 1.9751985652503187e-05, + "loss": 1.0049, "step": 2102 }, { - "epoch": 0.440972950304047, - "grad_norm": 5.64658135255303, - "learning_rate": 1.9291283396497064e-05, - "loss": 1.9612, + "epoch": 0.296950014120305, + "grad_norm": 4.285734499050421, + "learning_rate": 1.9751648129296893e-05, + "loss": 1.2352, "step": 2103 }, { - "epoch": 0.44118263786957435, - "grad_norm": 6.221941424803897, - "learning_rate": 1.929044607034455e-05, - "loss": 2.3523, + "epoch": 0.29709121717029086, + "grad_norm": 4.3516889160874745, + "learning_rate": 1.9751310379466514e-05, + "loss": 1.011, "step": 2104 }, { - "epoch": 0.4413923254351017, - "grad_norm": 6.549847020720109, - "learning_rate": 1.928960826804057e-05, - "loss": 2.0915, + "epoch": 0.29723242022027674, + "grad_norm": 5.264245001109766, + "learning_rate": 1.9750972403019894e-05, + "loss": 1.3632, "step": 2105 }, { - "epoch": 0.44160201300062907, - "grad_norm": 6.46659717443009, - "learning_rate": 1.928876998962805e-05, - "loss": 1.9885, + "epoch": 0.2973736232702626, + "grad_norm": 3.842938326979059, + "learning_rate": 1.9750634199964882e-05, + "loss": 0.9896, "step": 2106 }, { - "epoch": 0.44181170056615643, - "grad_norm": 7.692818894809973, - "learning_rate": 1.9287931235149953e-05, - "loss": 2.2936, + "epoch": 0.2975148263202485, + "grad_norm": 4.099463182172017, + "learning_rate": 1.9750295770309346e-05, + "loss": 1.1322, "step": 2107 }, { - "epoch": 0.4420213881316838, - "grad_norm": 6.194831982241896, - "learning_rate": 1.9287092004649278e-05, - "loss": 2.1111, + "epoch": 0.2976560293702344, + "grad_norm": 5.260603302006069, + "learning_rate": 1.9749957114061143e-05, + "loss": 1.2615, "step": 2108 }, { - "epoch": 0.44223107569721115, - "grad_norm": 5.381950824437453, - "learning_rate": 1.928625229816903e-05, - "loss": 2.1017, + "epoch": 0.29779723242022027, + "grad_norm": 4.844109036544185, + "learning_rate": 1.974961823122815e-05, + "loss": 1.1653, "step": 2109 }, { - "epoch": 0.4424407632627385, - "grad_norm": 6.069627677798655, - "learning_rate": 1.928541211575224e-05, - "loss": 2.0528, + "epoch": 0.29793843547020615, + "grad_norm": 4.729861458467353, + "learning_rate": 1.9749279121818235e-05, + "loss": 1.1966, "step": 2110 }, { - "epoch": 0.4426504508282659, - "grad_norm": 6.196281401837651, - "learning_rate": 1.928457145744198e-05, - "loss": 2.068, + "epoch": 0.29807963852019204, + "grad_norm": 4.44295162211147, + "learning_rate": 1.974893978583929e-05, + "loss": 1.0891, "step": 2111 }, { - "epoch": 0.44286013839379323, - "grad_norm": 6.47798398596338, - "learning_rate": 1.9283730323281324e-05, - "loss": 2.1255, + "epoch": 0.2982208415701779, + "grad_norm": 4.483507072957521, + "learning_rate": 1.9748600223299192e-05, + "loss": 1.1678, "step": 2112 }, { - "epoch": 0.4430698259593206, - "grad_norm": 5.206729071527936, - "learning_rate": 1.928288871331339e-05, - "loss": 2.1531, + "epoch": 0.2983620446201638, + "grad_norm": 3.940404450878208, + "learning_rate": 1.9748260434205835e-05, + "loss": 0.9854, "step": 2113 }, { - "epoch": 0.44327951352484796, - "grad_norm": 5.936718978146401, - "learning_rate": 1.9282046627581312e-05, - "loss": 2.5002, + "epoch": 0.2985032476701497, + "grad_norm": 3.9262605679028932, + "learning_rate": 1.9747920418567117e-05, + "loss": 1.0451, "step": 2114 }, { - "epoch": 0.4434892010903753, - "grad_norm": 6.1021245039425445, - "learning_rate": 1.9281204066128244e-05, - "loss": 2.332, + "epoch": 0.29864445072013557, + "grad_norm": 4.096441516935058, + "learning_rate": 1.9747580176390938e-05, + "loss": 1.1508, "step": 2115 }, { - "epoch": 0.4436988886559027, - "grad_norm": 6.406287733699576, - "learning_rate": 1.9280361028997366e-05, - "loss": 2.2513, + "epoch": 0.29878565377012145, + "grad_norm": 3.6950068845578503, + "learning_rate": 1.9747239707685205e-05, + "loss": 1.0449, "step": 2116 }, { - "epoch": 0.4439085762214301, - "grad_norm": 5.330564933676017, - "learning_rate": 1.9279517516231894e-05, - "loss": 2.2691, + "epoch": 0.29892685682010733, + "grad_norm": 4.814733536298415, + "learning_rate": 1.9746899012457828e-05, + "loss": 1.3223, "step": 2117 }, { - "epoch": 0.44411826378695746, - "grad_norm": 6.514738203209957, - "learning_rate": 1.927867352787505e-05, - "loss": 2.1958, + "epoch": 0.2990680598700932, + "grad_norm": 4.710908762560615, + "learning_rate": 1.974655809071673e-05, + "loss": 1.0588, "step": 2118 }, { - "epoch": 0.4443279513524848, - "grad_norm": 6.08413407798268, - "learning_rate": 1.92778290639701e-05, - "loss": 2.0121, + "epoch": 0.2992092629200791, + "grad_norm": 5.874353984735284, + "learning_rate": 1.974621694246983e-05, + "loss": 1.4003, "step": 2119 }, { - "epoch": 0.4445376389180122, - "grad_norm": 6.192734970302713, - "learning_rate": 1.9276984124560316e-05, - "loss": 2.4204, + "epoch": 0.2993504659700649, + "grad_norm": 4.733486325167167, + "learning_rate": 1.9745875567725058e-05, + "loss": 1.0456, "step": 2120 }, { - "epoch": 0.44474732648353954, - "grad_norm": 5.519097254092197, - "learning_rate": 1.927613870968901e-05, - "loss": 2.0228, + "epoch": 0.2994916690200508, + "grad_norm": 4.388512374531519, + "learning_rate": 1.9745533966490346e-05, + "loss": 1.1836, "step": 2121 }, { - "epoch": 0.4449570140490669, - "grad_norm": 5.802823216417202, - "learning_rate": 1.9275292819399497e-05, - "loss": 2.2928, + "epoch": 0.2996328720700367, + "grad_norm": 5.472906473352105, + "learning_rate": 1.9745192138773633e-05, + "loss": 1.3995, "step": 2122 }, { - "epoch": 0.44516670161459426, - "grad_norm": 5.758362580275623, - "learning_rate": 1.927444645373515e-05, - "loss": 2.2429, + "epoch": 0.2997740751200226, + "grad_norm": 4.214611236572622, + "learning_rate": 1.9744850084582868e-05, + "loss": 1.0988, "step": 2123 }, { - "epoch": 0.4453763891801216, - "grad_norm": 5.783980183378622, - "learning_rate": 1.927359961273933e-05, - "loss": 2.1413, + "epoch": 0.29991527817000846, + "grad_norm": 4.6535323988970605, + "learning_rate": 1.974450780392599e-05, + "loss": 1.2336, "step": 2124 }, { - "epoch": 0.445586076745649, - "grad_norm": 6.518825745108229, - "learning_rate": 1.9272752296455445e-05, - "loss": 2.299, + "epoch": 0.30005648121999434, + "grad_norm": 4.216973525657969, + "learning_rate": 1.974416529681096e-05, + "loss": 1.0138, "step": 2125 }, { - "epoch": 0.44579576431117635, - "grad_norm": 5.761284680422236, - "learning_rate": 1.9271904504926926e-05, - "loss": 2.1173, + "epoch": 0.3001976842699802, + "grad_norm": 4.54751853001935, + "learning_rate": 1.9743822563245738e-05, + "loss": 1.0632, "step": 2126 }, { - "epoch": 0.4460054518767037, - "grad_norm": 5.353737782145257, - "learning_rate": 1.927105623819722e-05, - "loss": 2.0069, + "epoch": 0.3003388873199661, + "grad_norm": 4.410058274448994, + "learning_rate": 1.9743479603238285e-05, + "loss": 1.0406, "step": 2127 }, { - "epoch": 0.44621513944223107, - "grad_norm": 5.607547248719613, - "learning_rate": 1.9270207496309797e-05, - "loss": 1.6257, + "epoch": 0.300480090369952, + "grad_norm": 4.505072932558089, + "learning_rate": 1.9743136416796578e-05, + "loss": 1.1924, "step": 2128 }, { - "epoch": 0.44642482700775843, - "grad_norm": 5.763310483879941, - "learning_rate": 1.9269358279308166e-05, - "loss": 2.3788, + "epoch": 0.3006212934199379, + "grad_norm": 4.259655297474813, + "learning_rate": 1.9742793003928587e-05, + "loss": 1.0439, "step": 2129 }, { - "epoch": 0.4466345145732858, - "grad_norm": 6.098194229322987, - "learning_rate": 1.9268508587235845e-05, - "loss": 2.3351, + "epoch": 0.30076249646992376, + "grad_norm": 4.0170902072458246, + "learning_rate": 1.9742449364642293e-05, + "loss": 1.1054, "step": 2130 }, { - "epoch": 0.44684420213881315, - "grad_norm": 5.56995439916535, - "learning_rate": 1.9267658420136382e-05, - "loss": 1.9447, + "epoch": 0.30090369951990964, + "grad_norm": 3.85904065084625, + "learning_rate": 1.9742105498945685e-05, + "loss": 0.9522, "step": 2131 }, { - "epoch": 0.4470538897043405, - "grad_norm": 5.648570887849179, - "learning_rate": 1.9266807778053355e-05, - "loss": 2.0965, + "epoch": 0.3010449025698955, + "grad_norm": 4.149821997292952, + "learning_rate": 1.974176140684675e-05, + "loss": 1.0282, "step": 2132 }, { - "epoch": 0.4472635772698679, - "grad_norm": 6.061262143542905, - "learning_rate": 1.9265956661030354e-05, - "loss": 2.3161, + "epoch": 0.3011861056198814, + "grad_norm": 3.895860611066988, + "learning_rate": 1.974141708835349e-05, + "loss": 1.0095, "step": 2133 }, { - "epoch": 0.44747326483539523, - "grad_norm": 6.22562323241189, - "learning_rate": 1.9265105069111004e-05, - "loss": 1.9543, + "epoch": 0.3013273086698673, + "grad_norm": 3.490149077007251, + "learning_rate": 1.97410725434739e-05, + "loss": 0.7841, "step": 2134 }, { - "epoch": 0.44768295240092265, - "grad_norm": 6.338761354478839, - "learning_rate": 1.926425300233895e-05, - "loss": 2.2737, + "epoch": 0.30146851171985317, + "grad_norm": 3.601206672543684, + "learning_rate": 1.9740727772215994e-05, + "loss": 0.9907, "step": 2135 }, { - "epoch": 0.44789263996645, - "grad_norm": 5.425742059783069, - "learning_rate": 1.9263400460757862e-05, - "loss": 2.0995, + "epoch": 0.30160971476983905, + "grad_norm": 4.086020251916973, + "learning_rate": 1.974038277458778e-05, + "loss": 1.0585, "step": 2136 }, { - "epoch": 0.4481023275319774, - "grad_norm": 5.580454103356368, - "learning_rate": 1.9262547444411434e-05, - "loss": 2.0169, + "epoch": 0.3017509178198249, + "grad_norm": 4.43966582064688, + "learning_rate": 1.974003755059728e-05, + "loss": 1.1171, "step": 2137 }, { - "epoch": 0.44831201509750473, - "grad_norm": 5.699511976228824, - "learning_rate": 1.926169395334338e-05, - "loss": 2.4202, + "epoch": 0.30189212086981076, + "grad_norm": 4.299078171831713, + "learning_rate": 1.973969210025251e-05, + "loss": 0.8867, "step": 2138 }, { - "epoch": 0.4485217026630321, - "grad_norm": 5.252596678127221, - "learning_rate": 1.9260839987597452e-05, - "loss": 2.0182, + "epoch": 0.30203332391979665, + "grad_norm": 4.267444521800434, + "learning_rate": 1.97393464235615e-05, + "loss": 0.9956, "step": 2139 }, { - "epoch": 0.44873139022855946, - "grad_norm": 5.859247990503234, - "learning_rate": 1.925998554721741e-05, - "loss": 2.0346, + "epoch": 0.30217452696978253, + "grad_norm": 4.824881506561139, + "learning_rate": 1.973900052053229e-05, + "loss": 1.2953, "step": 2140 }, { - "epoch": 0.4489410777940868, - "grad_norm": 6.153703690951179, - "learning_rate": 1.9259130632247047e-05, - "loss": 2.4379, + "epoch": 0.3023157300197684, + "grad_norm": 4.841472336560749, + "learning_rate": 1.9738654391172912e-05, + "loss": 1.0828, "step": 2141 }, { - "epoch": 0.4491507653596142, - "grad_norm": 5.954251759696108, - "learning_rate": 1.9258275242730186e-05, - "loss": 2.1387, + "epoch": 0.3024569330697543, + "grad_norm": 3.963816648644586, + "learning_rate": 1.973830803549141e-05, + "loss": 0.7916, "step": 2142 }, { - "epoch": 0.44936045292514154, - "grad_norm": 6.412397700313111, - "learning_rate": 1.9257419378710655e-05, - "loss": 2.3891, + "epoch": 0.3025981361197402, + "grad_norm": 3.7120311200752756, + "learning_rate": 1.973796145349584e-05, + "loss": 1.0114, "step": 2143 }, { - "epoch": 0.4495701404906689, - "grad_norm": 5.275799310870207, - "learning_rate": 1.9256563040232327e-05, - "loss": 2.0199, + "epoch": 0.30273933916972606, + "grad_norm": 4.566427698894491, + "learning_rate": 1.9737614645194246e-05, + "loss": 1.1034, "step": 2144 }, { - "epoch": 0.44977982805619626, - "grad_norm": 5.788502863681894, - "learning_rate": 1.925570622733909e-05, - "loss": 2.1101, + "epoch": 0.30288054221971195, + "grad_norm": 3.658668801593328, + "learning_rate": 1.9737267610594696e-05, + "loss": 0.9294, "step": 2145 }, { - "epoch": 0.4499895156217236, - "grad_norm": 6.281865542887358, - "learning_rate": 1.925484894007485e-05, - "loss": 1.9435, + "epoch": 0.30302174526969783, + "grad_norm": 3.7909626617417804, + "learning_rate": 1.9736920349705252e-05, + "loss": 1.0643, "step": 2146 }, { - "epoch": 0.450199203187251, - "grad_norm": 5.029955211994221, - "learning_rate": 1.9253991178483558e-05, - "loss": 1.6969, + "epoch": 0.3031629483196837, + "grad_norm": 4.723961687939991, + "learning_rate": 1.9736572862533982e-05, + "loss": 1.2165, "step": 2147 }, { - "epoch": 0.45040889075277835, - "grad_norm": 6.226058554567945, - "learning_rate": 1.925313294260916e-05, - "loss": 2.323, + "epoch": 0.3033041513696696, + "grad_norm": 3.454731927272283, + "learning_rate": 1.9736225149088964e-05, + "loss": 1.163, "step": 2148 }, { - "epoch": 0.4506185783183057, - "grad_norm": 6.173703084842196, - "learning_rate": 1.9252274232495657e-05, - "loss": 2.1683, + "epoch": 0.3034453544196555, + "grad_norm": 4.54516822071072, + "learning_rate": 1.973587720937828e-05, + "loss": 1.3348, "step": 2149 }, { - "epoch": 0.45082826588383307, - "grad_norm": 5.191343436819127, - "learning_rate": 1.9251415048187048e-05, - "loss": 1.894, + "epoch": 0.30358655746964136, + "grad_norm": 4.129944101989155, + "learning_rate": 1.9735529043410012e-05, + "loss": 1.0159, "step": 2150 }, { - "epoch": 0.45103795344936043, - "grad_norm": 5.883486803450713, - "learning_rate": 1.9250555389727372e-05, - "loss": 2.1668, + "epoch": 0.30372776051962724, + "grad_norm": 4.421217810744664, + "learning_rate": 1.9735180651192258e-05, + "loss": 1.0217, "step": 2151 }, { - "epoch": 0.4512476410148878, - "grad_norm": 7.076499150662879, - "learning_rate": 1.924969525716069e-05, - "loss": 2.3622, + "epoch": 0.3038689635696131, + "grad_norm": 5.510581582628757, + "learning_rate": 1.97348320327331e-05, + "loss": 0.8774, "step": 2152 }, { - "epoch": 0.4514573285804152, - "grad_norm": 6.0310037323600865, - "learning_rate": 1.9248834650531082e-05, - "loss": 2.0964, + "epoch": 0.304010166619599, + "grad_norm": 4.563096046896744, + "learning_rate": 1.973448318804066e-05, + "loss": 1.4281, "step": 2153 }, { - "epoch": 0.45166701614594257, - "grad_norm": 7.175554750332626, - "learning_rate": 1.9247973569882658e-05, - "loss": 2.0424, + "epoch": 0.30415136966958484, + "grad_norm": 4.57215781177901, + "learning_rate": 1.9734134117123028e-05, + "loss": 1.121, "step": 2154 }, { - "epoch": 0.45187670371146993, - "grad_norm": 6.105869658042161, - "learning_rate": 1.924711201525955e-05, - "loss": 1.7389, + "epoch": 0.3042925727195707, + "grad_norm": 3.6378546353345285, + "learning_rate": 1.9733784819988327e-05, + "loss": 0.9235, "step": 2155 }, { - "epoch": 0.4520863912769973, - "grad_norm": 5.691638384452805, - "learning_rate": 1.9246249986705908e-05, - "loss": 1.8976, + "epoch": 0.3044337757695566, + "grad_norm": 4.355323387033869, + "learning_rate": 1.973343529664467e-05, + "loss": 1.3777, "step": 2156 }, { - "epoch": 0.45229607884252465, - "grad_norm": 5.42756617344828, - "learning_rate": 1.9245387484265922e-05, - "loss": 1.9626, + "epoch": 0.3045749788195425, + "grad_norm": 4.1589182416077675, + "learning_rate": 1.9733085547100178e-05, + "loss": 0.9132, "step": 2157 }, { - "epoch": 0.452505766408052, - "grad_norm": 6.038008616429139, - "learning_rate": 1.924452450798379e-05, - "loss": 2.1422, + "epoch": 0.30471618186952837, + "grad_norm": 4.73090358416012, + "learning_rate": 1.9732735571362985e-05, + "loss": 1.0293, "step": 2158 }, { - "epoch": 0.4527154539735794, - "grad_norm": 5.704012336271476, - "learning_rate": 1.9243661057903745e-05, - "loss": 1.8792, + "epoch": 0.30485738491951425, + "grad_norm": 3.58171323511884, + "learning_rate": 1.973238536944122e-05, + "loss": 1.0627, "step": 2159 }, { - "epoch": 0.45292514153910673, - "grad_norm": 5.8213075043338645, - "learning_rate": 1.924279713407004e-05, - "loss": 2.1707, + "epoch": 0.30499858796950013, + "grad_norm": 3.2489930587960694, + "learning_rate": 1.973203494134302e-05, + "loss": 0.8192, "step": 2160 }, { - "epoch": 0.4531348291046341, - "grad_norm": 5.93516429303292, - "learning_rate": 1.9241932736526946e-05, - "loss": 2.0073, + "epoch": 0.305139791019486, + "grad_norm": 4.257174402786828, + "learning_rate": 1.973168428707653e-05, + "loss": 1.0641, "step": 2161 }, { - "epoch": 0.45334451667016146, - "grad_norm": 7.380665068681982, - "learning_rate": 1.9241067865318773e-05, - "loss": 2.4388, + "epoch": 0.3052809940694719, + "grad_norm": 5.007663980347736, + "learning_rate": 1.9731333406649905e-05, + "loss": 1.2569, "step": 2162 }, { - "epoch": 0.4535542042356888, - "grad_norm": 6.302986263222068, - "learning_rate": 1.924020252048984e-05, - "loss": 2.0992, + "epoch": 0.3054221971194578, + "grad_norm": 4.062986544743405, + "learning_rate": 1.973098230007129e-05, + "loss": 1.0777, "step": 2163 }, { - "epoch": 0.4537638918012162, - "grad_norm": 5.156662616331745, - "learning_rate": 1.9239336702084504e-05, - "loss": 1.9568, + "epoch": 0.30556340016944367, + "grad_norm": 4.746232951452049, + "learning_rate": 1.973063096734885e-05, + "loss": 1.0871, "step": 2164 }, { - "epoch": 0.45397357936674354, - "grad_norm": 6.538949072160556, - "learning_rate": 1.923847041014714e-05, - "loss": 2.328, + "epoch": 0.30570460321942955, + "grad_norm": 3.8301889599639938, + "learning_rate": 1.9730279408490745e-05, + "loss": 0.8697, "step": 2165 }, { - "epoch": 0.4541832669322709, - "grad_norm": 5.785433967010931, - "learning_rate": 1.923760364472214e-05, - "loss": 2.5033, + "epoch": 0.30584580626941543, + "grad_norm": 4.728308124948648, + "learning_rate": 1.9729927623505153e-05, + "loss": 1.1847, "step": 2166 }, { - "epoch": 0.45439295449779826, - "grad_norm": 5.9903884281129205, - "learning_rate": 1.9236736405853936e-05, - "loss": 1.9587, + "epoch": 0.3059870093194013, + "grad_norm": 4.703547765788962, + "learning_rate": 1.9729575612400243e-05, + "loss": 1.0545, "step": 2167 }, { - "epoch": 0.4546026420633256, - "grad_norm": 5.515737482442287, - "learning_rate": 1.9235868693586968e-05, - "loss": 2.0225, + "epoch": 0.3061282123693872, + "grad_norm": 4.543642508071172, + "learning_rate": 1.97292233751842e-05, + "loss": 0.9916, "step": 2168 }, { - "epoch": 0.454812329628853, - "grad_norm": 6.707401275980805, - "learning_rate": 1.923500050796571e-05, - "loss": 2.3053, + "epoch": 0.3062694154193731, + "grad_norm": 4.737480553838843, + "learning_rate": 1.9728870911865206e-05, + "loss": 1.1312, "step": 2169 }, { - "epoch": 0.45502201719438035, - "grad_norm": 6.207836087239906, - "learning_rate": 1.923413184903466e-05, - "loss": 2.2179, + "epoch": 0.30641061846935896, + "grad_norm": 5.511363765219922, + "learning_rate": 1.9728518222451454e-05, + "loss": 1.278, "step": 2170 }, { - "epoch": 0.45523170475990776, - "grad_norm": 5.888412981526102, - "learning_rate": 1.9233262716838336e-05, - "loss": 2.3701, + "epoch": 0.3065518215193448, + "grad_norm": 4.803924273845113, + "learning_rate": 1.9728165306951143e-05, + "loss": 1.2992, "step": 2171 }, { - "epoch": 0.4554413923254351, - "grad_norm": 7.4226005879639745, - "learning_rate": 1.9232393111421285e-05, - "loss": 2.2473, + "epoch": 0.3066930245693307, + "grad_norm": 4.383592224379284, + "learning_rate": 1.9727812165372463e-05, + "loss": 0.9418, "step": 2172 }, { - "epoch": 0.4556510798909625, - "grad_norm": 5.018636253827584, - "learning_rate": 1.9231523032828073e-05, - "loss": 2.167, + "epoch": 0.30683422761931656, + "grad_norm": 3.9911199863584956, + "learning_rate": 1.9727458797723638e-05, + "loss": 1.1835, "step": 2173 }, { - "epoch": 0.45586076745648985, - "grad_norm": 6.685540707178862, - "learning_rate": 1.9230652481103297e-05, - "loss": 2.3116, + "epoch": 0.30697543066930244, + "grad_norm": 4.495905276183238, + "learning_rate": 1.972710520401287e-05, + "loss": 1.1202, "step": 2174 }, { - "epoch": 0.4560704550220172, - "grad_norm": 5.402939053047352, - "learning_rate": 1.922978145629157e-05, - "loss": 2.2652, + "epoch": 0.3071166337192883, + "grad_norm": 4.134165598526877, + "learning_rate": 1.9726751384248373e-05, + "loss": 0.9348, "step": 2175 }, { - "epoch": 0.45628014258754457, - "grad_norm": 5.75831079413424, - "learning_rate": 1.9228909958437532e-05, - "loss": 1.5863, + "epoch": 0.3072578367692742, + "grad_norm": 4.249087624018567, + "learning_rate": 1.972639733843838e-05, + "loss": 1.1545, "step": 2176 }, { - "epoch": 0.45648983015307193, - "grad_norm": 5.308506535451557, - "learning_rate": 1.9228037987585855e-05, - "loss": 2.1101, + "epoch": 0.3073990398192601, + "grad_norm": 3.8063788381456836, + "learning_rate": 1.9726043066591106e-05, + "loss": 1.0045, "step": 2177 }, { - "epoch": 0.4566995177185993, - "grad_norm": 5.958508989244659, - "learning_rate": 1.9227165543781226e-05, - "loss": 2.0866, + "epoch": 0.30754024286924597, + "grad_norm": 4.2023709573269095, + "learning_rate": 1.97256885687148e-05, + "loss": 1.0069, "step": 2178 }, { - "epoch": 0.45690920528412665, - "grad_norm": 5.930466722596854, - "learning_rate": 1.9226292627068358e-05, - "loss": 2.1167, + "epoch": 0.30768144591923186, + "grad_norm": 4.434431438532583, + "learning_rate": 1.9725333844817688e-05, + "loss": 0.938, "step": 2179 }, { - "epoch": 0.457118892849654, - "grad_norm": 6.41026921843967, - "learning_rate": 1.9225419237491988e-05, - "loss": 2.6108, + "epoch": 0.30782264896921774, + "grad_norm": 4.514349061897778, + "learning_rate": 1.9724978894908017e-05, + "loss": 1.148, "step": 2180 }, { - "epoch": 0.4573285804151814, - "grad_norm": 5.9257686378480585, - "learning_rate": 1.9224545375096887e-05, - "loss": 2.1733, + "epoch": 0.3079638520192036, + "grad_norm": 3.94349057640504, + "learning_rate": 1.9724623718994038e-05, + "loss": 0.944, "step": 2181 }, { - "epoch": 0.45753826798070873, - "grad_norm": 5.801829726257146, - "learning_rate": 1.9223671039927833e-05, - "loss": 2.2511, + "epoch": 0.3081050550691895, + "grad_norm": 4.360945344245002, + "learning_rate": 1.9724268317084e-05, + "loss": 1.1838, "step": 2182 }, { - "epoch": 0.4577479555462361, - "grad_norm": 5.549752188009174, - "learning_rate": 1.922279623202964e-05, - "loss": 2.1756, + "epoch": 0.3082462581191754, + "grad_norm": 3.9156721591909878, + "learning_rate": 1.972391268918617e-05, + "loss": 1.0602, "step": 2183 }, { - "epoch": 0.45795764311176346, - "grad_norm": 5.501722891793999, - "learning_rate": 1.9221920951447147e-05, - "loss": 2.0517, + "epoch": 0.30838746116916127, + "grad_norm": 3.901685389641189, + "learning_rate": 1.9723556835308807e-05, + "loss": 0.9022, "step": 2184 }, { - "epoch": 0.4581673306772908, - "grad_norm": 6.710408918793639, - "learning_rate": 1.922104519822521e-05, - "loss": 2.2381, + "epoch": 0.30852866421914715, + "grad_norm": 5.265854896634157, + "learning_rate": 1.9723200755460183e-05, + "loss": 1.1246, "step": 2185 }, { - "epoch": 0.4583770182428182, - "grad_norm": 5.72671393575448, - "learning_rate": 1.9220168972408713e-05, - "loss": 2.2189, + "epoch": 0.30866986726913304, + "grad_norm": 3.922950898345806, + "learning_rate": 1.9722844449648572e-05, + "loss": 1.1506, "step": 2186 }, { - "epoch": 0.45858670580834554, - "grad_norm": 6.36804343786305, - "learning_rate": 1.9219292274042564e-05, - "loss": 2.1695, + "epoch": 0.3088110703191189, + "grad_norm": 4.982707597455844, + "learning_rate": 1.9722487917882257e-05, + "loss": 1.3951, "step": 2187 }, { - "epoch": 0.4587963933738729, - "grad_norm": 6.132848501859067, - "learning_rate": 1.9218415103171696e-05, - "loss": 2.2488, + "epoch": 0.30895227336910475, + "grad_norm": 4.2837207749021395, + "learning_rate": 1.972213116016952e-05, + "loss": 1.123, "step": 2188 }, { - "epoch": 0.4590060809394003, - "grad_norm": 6.421598509999375, - "learning_rate": 1.9217537459841066e-05, - "loss": 2.055, + "epoch": 0.30909347641909063, + "grad_norm": 6.972628218422345, + "learning_rate": 1.972177417651865e-05, + "loss": 1.0585, "step": 2189 }, { - "epoch": 0.4592157685049277, - "grad_norm": 5.594864388131729, - "learning_rate": 1.9216659344095652e-05, - "loss": 2.0946, + "epoch": 0.3092346794690765, + "grad_norm": 7.33086537059121, + "learning_rate": 1.972141696693795e-05, + "loss": 1.1828, "step": 2190 }, { - "epoch": 0.45942545607045504, - "grad_norm": 6.904996811110198, - "learning_rate": 1.9215780755980465e-05, - "loss": 2.4237, + "epoch": 0.3093758825190624, + "grad_norm": 4.723754246228618, + "learning_rate": 1.972105953143572e-05, + "loss": 1.3529, "step": 2191 }, { - "epoch": 0.4596351436359824, - "grad_norm": 6.545390301866025, - "learning_rate": 1.9214901695540526e-05, - "loss": 2.3833, + "epoch": 0.3095170855690483, + "grad_norm": 5.372719029088434, + "learning_rate": 1.972070187002026e-05, + "loss": 1.3714, "step": 2192 }, { - "epoch": 0.45984483120150976, - "grad_norm": 6.285725749432022, - "learning_rate": 1.9214022162820894e-05, - "loss": 2.0051, + "epoch": 0.30965828861903416, + "grad_norm": 4.490557409952471, + "learning_rate": 1.972034398269989e-05, + "loss": 1.2876, "step": 2193 }, { - "epoch": 0.4600545187670371, - "grad_norm": 5.914363234399766, - "learning_rate": 1.9213142157866645e-05, - "loss": 2.0454, + "epoch": 0.30979949166902004, + "grad_norm": 5.53244491342042, + "learning_rate": 1.9719985869482925e-05, + "loss": 1.2566, "step": 2194 }, { - "epoch": 0.4602642063325645, - "grad_norm": 6.004414256617915, - "learning_rate": 1.9212261680722882e-05, - "loss": 2.2039, + "epoch": 0.3099406947190059, + "grad_norm": 3.817446392756854, + "learning_rate": 1.9719627530377683e-05, + "loss": 1.0397, "step": 2195 }, { - "epoch": 0.46047389389809185, - "grad_norm": 5.345417968860954, - "learning_rate": 1.921138073143473e-05, - "loss": 2.2903, + "epoch": 0.3100818977689918, + "grad_norm": 4.359072692702619, + "learning_rate": 1.9719268965392495e-05, + "loss": 1.151, "step": 2196 }, { - "epoch": 0.4606835814636192, - "grad_norm": 6.728334912224999, - "learning_rate": 1.921049931004734e-05, - "loss": 2.2343, + "epoch": 0.3102231008189777, + "grad_norm": 4.105114312705257, + "learning_rate": 1.9718910174535695e-05, + "loss": 1.1014, "step": 2197 }, { - "epoch": 0.46089326902914657, - "grad_norm": 5.979838770395121, - "learning_rate": 1.9209617416605887e-05, - "loss": 2.2212, + "epoch": 0.3103643038689636, + "grad_norm": 4.039580388619772, + "learning_rate": 1.971855115781562e-05, + "loss": 1.0804, "step": 2198 }, { - "epoch": 0.46110295659467393, - "grad_norm": 5.2713312350823625, - "learning_rate": 1.9208735051155566e-05, - "loss": 2.2407, + "epoch": 0.31050550691894946, + "grad_norm": 4.273555151424891, + "learning_rate": 1.9718191915240613e-05, + "loss": 1.2258, "step": 2199 }, { - "epoch": 0.4613126441602013, - "grad_norm": 5.6957515091447, - "learning_rate": 1.92078522137416e-05, - "loss": 2.1771, + "epoch": 0.31064670996893534, + "grad_norm": 4.290573309578367, + "learning_rate": 1.971783244681902e-05, + "loss": 1.0384, "step": 2200 }, { - "epoch": 0.46152233172572865, - "grad_norm": 6.2806806757058595, - "learning_rate": 1.920696890440924e-05, - "loss": 2.1071, + "epoch": 0.3107879130189212, + "grad_norm": 5.588844665281358, + "learning_rate": 1.97174727525592e-05, + "loss": 1.1098, "step": 2201 }, { - "epoch": 0.461732019291256, - "grad_norm": 6.306937379373556, - "learning_rate": 1.9206085123203752e-05, - "loss": 2.3453, + "epoch": 0.3109291160689071, + "grad_norm": 4.147057914305952, + "learning_rate": 1.971711283246951e-05, + "loss": 1.0316, "step": 2202 }, { - "epoch": 0.4619417068567834, - "grad_norm": 6.864168241152256, - "learning_rate": 1.920520087017044e-05, - "loss": 2.3866, + "epoch": 0.311070319118893, + "grad_norm": 4.7445716258617345, + "learning_rate": 1.9716752686558316e-05, + "loss": 1.2063, "step": 2203 }, { - "epoch": 0.46215139442231074, - "grad_norm": 4.911490014827521, - "learning_rate": 1.9204316145354613e-05, - "loss": 2.0165, + "epoch": 0.3112115221688789, + "grad_norm": 4.50985472440183, + "learning_rate": 1.9716392314833982e-05, + "loss": 0.9951, "step": 2204 }, { - "epoch": 0.4623610819878381, - "grad_norm": 5.850954038316423, - "learning_rate": 1.9203430948801623e-05, - "loss": 2.0233, + "epoch": 0.3113527252188647, + "grad_norm": 4.599093905108446, + "learning_rate": 1.971603171730489e-05, + "loss": 1.1999, "step": 2205 }, { - "epoch": 0.4625707695533655, - "grad_norm": 4.887587741902803, - "learning_rate": 1.9202545280556832e-05, - "loss": 1.9609, + "epoch": 0.3114939282688506, + "grad_norm": 4.470245353988294, + "learning_rate": 1.9715670893979416e-05, + "loss": 0.9959, "step": 2206 }, { - "epoch": 0.4627804571188929, - "grad_norm": 6.35976619595104, - "learning_rate": 1.9201659140665634e-05, - "loss": 2.5015, + "epoch": 0.31163513131883647, + "grad_norm": 3.647674698123316, + "learning_rate": 1.9715309844865948e-05, + "loss": 0.9983, "step": 2207 }, { - "epoch": 0.46299014468442024, - "grad_norm": 5.513955918886161, - "learning_rate": 1.9200772529173447e-05, - "loss": 2.0362, + "epoch": 0.31177633436882235, + "grad_norm": 3.8325591088875135, + "learning_rate": 1.9714948569972873e-05, + "loss": 0.8896, "step": 2208 }, { - "epoch": 0.4631998322499476, - "grad_norm": 5.359570745551045, - "learning_rate": 1.919988544612571e-05, - "loss": 2.2955, + "epoch": 0.31191753741880823, + "grad_norm": 4.322446454861351, + "learning_rate": 1.9714587069308586e-05, + "loss": 1.1614, "step": 2209 }, { - "epoch": 0.46340951981547496, - "grad_norm": 5.435620889286572, - "learning_rate": 1.9198997891567885e-05, - "loss": 2.1157, + "epoch": 0.3120587404687941, + "grad_norm": 4.096968760546903, + "learning_rate": 1.9714225342881495e-05, + "loss": 0.9872, "step": 2210 }, { - "epoch": 0.4636192073810023, - "grad_norm": 5.132557438764978, - "learning_rate": 1.9198109865545465e-05, - "loss": 2.3518, + "epoch": 0.31219994351878, + "grad_norm": 4.43071482148293, + "learning_rate": 1.97138633907e-05, + "loss": 1.0658, "step": 2211 }, { - "epoch": 0.4638288949465297, - "grad_norm": 6.216866769826188, - "learning_rate": 1.919722136810396e-05, - "loss": 2.094, + "epoch": 0.3123411465687659, + "grad_norm": 4.460815617057209, + "learning_rate": 1.9713501212772518e-05, + "loss": 1.0141, "step": 2212 }, { - "epoch": 0.46403858251205704, - "grad_norm": 5.715510949972599, - "learning_rate": 1.919633239928891e-05, - "loss": 2.1414, + "epoch": 0.31248234961875176, + "grad_norm": 4.699188484042622, + "learning_rate": 1.971313880910746e-05, + "loss": 1.136, "step": 2213 }, { - "epoch": 0.4642482700775844, - "grad_norm": 6.05166003185328, - "learning_rate": 1.9195442959145873e-05, - "loss": 2.2933, + "epoch": 0.31262355266873765, + "grad_norm": 3.7511074647728297, + "learning_rate": 1.9712776179713252e-05, + "loss": 1.1509, "step": 2214 }, { - "epoch": 0.46445795764311176, - "grad_norm": 5.795164276777185, - "learning_rate": 1.919455304772044e-05, - "loss": 2.2086, + "epoch": 0.31276475571872353, + "grad_norm": 3.369012966882251, + "learning_rate": 1.971241332459832e-05, + "loss": 0.757, "step": 2215 }, { - "epoch": 0.4646676452086391, - "grad_norm": 5.5968244475981574, - "learning_rate": 1.919366266505821e-05, - "loss": 2.35, + "epoch": 0.3129059587687094, + "grad_norm": 4.6869804728779165, + "learning_rate": 1.9712050243771095e-05, + "loss": 0.9483, "step": 2216 }, { - "epoch": 0.4648773327741665, - "grad_norm": 5.231984952353256, - "learning_rate": 1.9192771811204823e-05, - "loss": 2.0885, + "epoch": 0.3130471618186953, + "grad_norm": 4.6090962133332845, + "learning_rate": 1.971168693724002e-05, + "loss": 1.0309, "step": 2217 }, { - "epoch": 0.46508702033969385, - "grad_norm": 5.507646860851503, - "learning_rate": 1.919188048620594e-05, - "loss": 1.9552, + "epoch": 0.3131883648686812, + "grad_norm": 4.275677536908616, + "learning_rate": 1.9711323405013535e-05, + "loss": 1.0831, "step": 2218 }, { - "epoch": 0.4652967079052212, - "grad_norm": 5.342541964172121, - "learning_rate": 1.919098869010724e-05, - "loss": 2.2781, + "epoch": 0.31332956791866706, + "grad_norm": 4.8223103568116255, + "learning_rate": 1.9710959647100086e-05, + "loss": 1.2202, "step": 2219 }, { - "epoch": 0.46550639547074857, - "grad_norm": 5.129520596276407, - "learning_rate": 1.9190096422954427e-05, - "loss": 2.0327, + "epoch": 0.31347077096865295, + "grad_norm": 4.12401500763805, + "learning_rate": 1.9710595663508125e-05, + "loss": 1.0846, "step": 2220 }, { - "epoch": 0.46571608303627593, - "grad_norm": 6.499360271125452, - "learning_rate": 1.9189203684793233e-05, - "loss": 2.0239, + "epoch": 0.31361197401863883, + "grad_norm": 3.570240360000423, + "learning_rate": 1.971023145424612e-05, + "loss": 0.9074, "step": 2221 }, { - "epoch": 0.4659257706018033, - "grad_norm": 5.407031658580738, - "learning_rate": 1.918831047566941e-05, - "loss": 2.126, + "epoch": 0.31375317706862466, + "grad_norm": 5.3735486917820126, + "learning_rate": 1.9709867019322528e-05, + "loss": 1.1143, "step": 2222 }, { - "epoch": 0.46613545816733065, - "grad_norm": 5.8079699961900655, - "learning_rate": 1.9187416795628743e-05, - "loss": 2.1462, + "epoch": 0.31389438011861054, + "grad_norm": 6.981542359363994, + "learning_rate": 1.970950235874582e-05, + "loss": 1.226, "step": 2223 }, { - "epoch": 0.46634514573285807, - "grad_norm": 6.990552814762791, - "learning_rate": 1.9186522644717027e-05, - "loss": 2.24, + "epoch": 0.3140355831685964, + "grad_norm": 4.733503256508291, + "learning_rate": 1.970913747252447e-05, + "loss": 1.2104, "step": 2224 }, { - "epoch": 0.46655483329838543, - "grad_norm": 5.912756516457348, - "learning_rate": 1.9185628022980093e-05, - "loss": 1.9577, + "epoch": 0.3141767862185823, + "grad_norm": 4.502802855125862, + "learning_rate": 1.9708772360666958e-05, + "loss": 0.9966, "step": 2225 }, { - "epoch": 0.4667645208639128, - "grad_norm": 5.535657852727496, - "learning_rate": 1.9184732930463796e-05, - "loss": 2.3345, + "epoch": 0.3143179892685682, + "grad_norm": 4.824824802941916, + "learning_rate": 1.970840702318177e-05, + "loss": 1.1623, "step": 2226 }, { - "epoch": 0.46697420842944015, - "grad_norm": 6.433725293753883, - "learning_rate": 1.9183837367214e-05, - "loss": 2.2902, + "epoch": 0.31445919231855407, + "grad_norm": 4.60102460178849, + "learning_rate": 1.970804146007739e-05, + "loss": 1.0669, "step": 2227 }, { - "epoch": 0.4671838959949675, - "grad_norm": 7.117438170345291, - "learning_rate": 1.9182941333276614e-05, - "loss": 2.0742, + "epoch": 0.31460039536853995, + "grad_norm": 4.102688051779516, + "learning_rate": 1.9707675671362328e-05, + "loss": 1.0422, "step": 2228 }, { - "epoch": 0.4673935835604949, - "grad_norm": 5.761837137751725, - "learning_rate": 1.918204482869755e-05, - "loss": 1.9299, + "epoch": 0.31474159841852584, + "grad_norm": 4.9530865705857305, + "learning_rate": 1.970730965704507e-05, + "loss": 1.2153, "step": 2229 }, { - "epoch": 0.46760327112602224, - "grad_norm": 6.246421759726161, - "learning_rate": 1.918114785352277e-05, - "loss": 2.2229, + "epoch": 0.3148828014685117, + "grad_norm": 5.647072408080908, + "learning_rate": 1.970694341713413e-05, + "loss": 1.3566, "step": 2230 }, { - "epoch": 0.4678129586915496, - "grad_norm": 5.404958116350511, - "learning_rate": 1.9180250407798242e-05, - "loss": 2.0502, + "epoch": 0.3150240045184976, + "grad_norm": 4.410612812401028, + "learning_rate": 1.970657695163802e-05, + "loss": 1.1777, "step": 2231 }, { - "epoch": 0.46802264625707696, - "grad_norm": 5.85599494447238, - "learning_rate": 1.9179352491569953e-05, - "loss": 2.3847, + "epoch": 0.3151652075684835, + "grad_norm": 4.179832746892072, + "learning_rate": 1.9706210260565246e-05, + "loss": 0.9849, "step": 2232 }, { - "epoch": 0.4682323338226043, - "grad_norm": 5.191862108195184, - "learning_rate": 1.917845410488393e-05, - "loss": 2.3421, + "epoch": 0.31530641061846937, + "grad_norm": 5.6119791472771166, + "learning_rate": 1.9705843343924343e-05, + "loss": 1.4604, "step": 2233 }, { - "epoch": 0.4684420213881317, - "grad_norm": 5.263309440272586, - "learning_rate": 1.9177555247786212e-05, - "loss": 2.0166, + "epoch": 0.31544761366845525, + "grad_norm": 4.531309335191492, + "learning_rate": 1.970547620172383e-05, + "loss": 0.9494, "step": 2234 }, { - "epoch": 0.46865170895365904, - "grad_norm": 5.866289917141282, - "learning_rate": 1.9176655920322872e-05, - "loss": 2.1082, + "epoch": 0.31558881671844113, + "grad_norm": 4.138794717710657, + "learning_rate": 1.9705108833972245e-05, + "loss": 1.1322, "step": 2235 }, { - "epoch": 0.4688613965191864, - "grad_norm": 5.873472828637023, - "learning_rate": 1.9175756122540002e-05, - "loss": 2.2015, + "epoch": 0.315730019768427, + "grad_norm": 3.723050722715303, + "learning_rate": 1.9704741240678117e-05, + "loss": 0.8949, "step": 2236 }, { - "epoch": 0.46907108408471376, - "grad_norm": 5.883851351902903, - "learning_rate": 1.9174855854483715e-05, - "loss": 2.0549, + "epoch": 0.3158712228184129, + "grad_norm": 4.308460466255025, + "learning_rate": 1.9704373421849997e-05, + "loss": 1.2013, "step": 2237 }, { - "epoch": 0.4692807716502411, - "grad_norm": 5.392928383936007, - "learning_rate": 1.917395511620015e-05, - "loss": 2.1924, + "epoch": 0.3160124258683988, + "grad_norm": 4.02847827917829, + "learning_rate": 1.9704005377496428e-05, + "loss": 0.8946, "step": 2238 }, { - "epoch": 0.4694904592157685, - "grad_norm": 5.5921929222847, - "learning_rate": 1.9173053907735475e-05, - "loss": 1.8906, + "epoch": 0.3161536289183846, + "grad_norm": 4.146422648055185, + "learning_rate": 1.9703637107625968e-05, + "loss": 0.9191, "step": 2239 }, { - "epoch": 0.46970014678129585, - "grad_norm": 5.9949091249008335, - "learning_rate": 1.9172152229135878e-05, - "loss": 2.3645, + "epoch": 0.3162948319683705, + "grad_norm": 5.242960843481294, + "learning_rate": 1.9703268612247172e-05, + "loss": 1.1627, "step": 2240 }, { - "epoch": 0.4699098343468232, - "grad_norm": 5.907666521846647, - "learning_rate": 1.917125008044757e-05, - "loss": 1.8574, + "epoch": 0.3164360350183564, + "grad_norm": 3.6915202857414284, + "learning_rate": 1.9702899891368598e-05, + "loss": 0.9252, "step": 2241 }, { - "epoch": 0.4701195219123506, - "grad_norm": 5.321537405818213, - "learning_rate": 1.917034746171679e-05, - "loss": 2.156, + "epoch": 0.31657723806834226, + "grad_norm": 3.900462169800725, + "learning_rate": 1.9702530944998825e-05, + "loss": 1.1165, "step": 2242 }, { - "epoch": 0.470329209477878, - "grad_norm": 5.113487971303939, - "learning_rate": 1.91694443729898e-05, - "loss": 2.1939, + "epoch": 0.31671844111832814, + "grad_norm": 4.23113707821588, + "learning_rate": 1.9702161773146425e-05, + "loss": 1.1995, "step": 2243 }, { - "epoch": 0.47053889704340535, - "grad_norm": 6.015557514778198, - "learning_rate": 1.9168540814312875e-05, - "loss": 2.1233, + "epoch": 0.316859644168314, + "grad_norm": 5.817870853456311, + "learning_rate": 1.9701792375819974e-05, + "loss": 1.3674, "step": 2244 }, { - "epoch": 0.4707485846089327, - "grad_norm": 7.187292944994101, - "learning_rate": 1.9167636785732337e-05, - "loss": 2.1076, + "epoch": 0.3170008472182999, + "grad_norm": 5.23726859943459, + "learning_rate": 1.9701422753028056e-05, + "loss": 1.2433, "step": 2245 }, { - "epoch": 0.47095827217446007, - "grad_norm": 5.731787413492678, - "learning_rate": 1.9166732287294512e-05, - "loss": 2.2406, + "epoch": 0.3171420502682858, + "grad_norm": 3.8637380779963397, + "learning_rate": 1.9701052904779265e-05, + "loss": 1.0092, "step": 2246 }, { - "epoch": 0.47116795973998743, - "grad_norm": 5.844299153066626, - "learning_rate": 1.9165827319045757e-05, - "loss": 1.8746, + "epoch": 0.3172832533182717, + "grad_norm": 3.8394782560806635, + "learning_rate": 1.9700682831082192e-05, + "loss": 0.8823, "step": 2247 }, { - "epoch": 0.4713776473055148, - "grad_norm": 6.6831618795984475, - "learning_rate": 1.9164921881032454e-05, - "loss": 2.4474, + "epoch": 0.31742445636825756, + "grad_norm": 3.531391740158467, + "learning_rate": 1.9700312531945444e-05, + "loss": 0.6829, "step": 2248 }, { - "epoch": 0.47158733487104215, - "grad_norm": 5.211801386725437, - "learning_rate": 1.9164015973301012e-05, - "loss": 1.8424, + "epoch": 0.31756565941824344, + "grad_norm": 4.321739130353061, + "learning_rate": 1.9699942007377615e-05, + "loss": 0.9913, "step": 2249 }, { - "epoch": 0.4717970224365695, - "grad_norm": 6.9212471379060085, - "learning_rate": 1.9163109595897854e-05, - "loss": 2.2099, + "epoch": 0.3177068624682293, + "grad_norm": 4.517541202923386, + "learning_rate": 1.969957125738733e-05, + "loss": 1.0155, "step": 2250 }, { - "epoch": 0.4720067100020969, - "grad_norm": 6.116129951239323, - "learning_rate": 1.916220274886944e-05, - "loss": 2.1847, + "epoch": 0.3178480655182152, + "grad_norm": 4.064080497860532, + "learning_rate": 1.9699200281983193e-05, + "loss": 1.1729, "step": 2251 }, { - "epoch": 0.47221639756762424, - "grad_norm": 5.490557160858078, - "learning_rate": 1.9161295432262243e-05, - "loss": 2.3115, + "epoch": 0.3179892685682011, + "grad_norm": 4.076528011985761, + "learning_rate": 1.969882908117383e-05, + "loss": 0.9392, "step": 2252 }, { - "epoch": 0.4724260851331516, - "grad_norm": 6.0974434254007255, - "learning_rate": 1.9160387646122765e-05, - "loss": 2.304, + "epoch": 0.31813047161818697, + "grad_norm": 6.294534121552598, + "learning_rate": 1.969845765496787e-05, + "loss": 1.0236, "step": 2253 }, { - "epoch": 0.47263577269867896, - "grad_norm": 6.225053555931331, - "learning_rate": 1.9159479390497535e-05, - "loss": 2.1636, + "epoch": 0.31827167466817285, + "grad_norm": 4.67106701428831, + "learning_rate": 1.969808600337394e-05, + "loss": 1.1509, "step": 2254 }, { - "epoch": 0.4728454602642063, - "grad_norm": 5.903043682410186, - "learning_rate": 1.9158570665433093e-05, - "loss": 2.2441, + "epoch": 0.31841287771815874, + "grad_norm": 5.387895298890177, + "learning_rate": 1.9697714126400685e-05, + "loss": 1.1714, "step": 2255 }, { - "epoch": 0.4730551478297337, - "grad_norm": 5.603766050656394, - "learning_rate": 1.9157661470976022e-05, - "loss": 1.9925, + "epoch": 0.31855408076814457, + "grad_norm": 4.062775613949614, + "learning_rate": 1.969734202405674e-05, + "loss": 1.0541, "step": 2256 }, { - "epoch": 0.47326483539526104, - "grad_norm": 5.458032051711044, - "learning_rate": 1.915675180717292e-05, - "loss": 2.0725, + "epoch": 0.31869528381813045, + "grad_norm": 3.8406219647303033, + "learning_rate": 1.969696969635075e-05, + "loss": 1.0246, "step": 2257 }, { - "epoch": 0.4734745229607884, - "grad_norm": 5.405296290141715, - "learning_rate": 1.9155841674070408e-05, - "loss": 1.8653, + "epoch": 0.31883648686811633, + "grad_norm": 3.7054376967390894, + "learning_rate": 1.9696597143291377e-05, + "loss": 1.1746, "step": 2258 }, { - "epoch": 0.47368421052631576, - "grad_norm": 5.724640047374485, - "learning_rate": 1.915493107171513e-05, - "loss": 2.0941, + "epoch": 0.3189776899181022, + "grad_norm": 5.534654597134407, + "learning_rate": 1.969622436488727e-05, + "loss": 1.1577, "step": 2259 }, { - "epoch": 0.4738938980918432, - "grad_norm": 6.01612171779029, - "learning_rate": 1.915402000015375e-05, - "loss": 2.2116, + "epoch": 0.3191188929680881, + "grad_norm": 5.208506771007611, + "learning_rate": 1.9695851361147098e-05, + "loss": 1.2104, "step": 2260 }, { - "epoch": 0.47410358565737054, - "grad_norm": 6.375110819849119, - "learning_rate": 1.915310845943297e-05, - "loss": 1.9799, + "epoch": 0.319260096018074, + "grad_norm": 4.781560300561307, + "learning_rate": 1.969547813207953e-05, + "loss": 1.0875, "step": 2261 }, { - "epoch": 0.4743132732228979, - "grad_norm": 5.713590711588439, - "learning_rate": 1.915219644959951e-05, - "loss": 2.2308, + "epoch": 0.31940129906805986, + "grad_norm": 3.120448096867317, + "learning_rate": 1.9695104677693234e-05, + "loss": 0.8372, "step": 2262 }, { - "epoch": 0.47452296078842526, - "grad_norm": 5.208890653753317, - "learning_rate": 1.9151283970700107e-05, - "loss": 2.2139, + "epoch": 0.31954250211804575, + "grad_norm": 4.460284041440932, + "learning_rate": 1.9694730997996894e-05, + "loss": 1.2279, "step": 2263 }, { - "epoch": 0.4747326483539526, - "grad_norm": 6.154497579160804, - "learning_rate": 1.915037102278153e-05, - "loss": 2.0922, + "epoch": 0.31968370516803163, + "grad_norm": 4.762989993413503, + "learning_rate": 1.969435709299919e-05, + "loss": 1.2951, "step": 2264 }, { - "epoch": 0.47494233591948, - "grad_norm": 5.844617358520422, - "learning_rate": 1.914945760589056e-05, - "loss": 2.3324, + "epoch": 0.3198249082180175, + "grad_norm": 3.7743808194138606, + "learning_rate": 1.9693982962708817e-05, + "loss": 1.0129, "step": 2265 }, { - "epoch": 0.47515202348500735, - "grad_norm": 5.6421170430866905, - "learning_rate": 1.9148543720074027e-05, - "loss": 2.1882, + "epoch": 0.3199661112680034, + "grad_norm": 3.853819806092262, + "learning_rate": 1.9693608607134466e-05, + "loss": 1.1274, "step": 2266 }, { - "epoch": 0.4753617110505347, - "grad_norm": 5.589636396631507, - "learning_rate": 1.914762936537876e-05, - "loss": 2.2831, + "epoch": 0.3201073143179893, + "grad_norm": 4.552559038536874, + "learning_rate": 1.9693234026284838e-05, + "loss": 1.1363, "step": 2267 }, { - "epoch": 0.47557139861606207, - "grad_norm": 5.7891177306391315, - "learning_rate": 1.9146714541851617e-05, - "loss": 2.1765, + "epoch": 0.32024851736797516, + "grad_norm": 4.083531282015146, + "learning_rate": 1.9692859220168634e-05, + "loss": 1.1818, "step": 2268 }, { - "epoch": 0.47578108618158943, - "grad_norm": 7.833356649071576, - "learning_rate": 1.9145799249539497e-05, - "loss": 2.6146, + "epoch": 0.32038972041796104, + "grad_norm": 3.801218347133587, + "learning_rate": 1.969248418879457e-05, + "loss": 0.9667, "step": 2269 }, { - "epoch": 0.4759907737471168, - "grad_norm": 6.164159591348915, - "learning_rate": 1.91448834884893e-05, - "loss": 2.3648, + "epoch": 0.3205309234679469, + "grad_norm": 4.3754034926446375, + "learning_rate": 1.969210893217136e-05, + "loss": 1.0672, "step": 2270 }, { - "epoch": 0.47620046131264415, - "grad_norm": 5.507057721589435, - "learning_rate": 1.914396725874796e-05, - "loss": 1.8405, + "epoch": 0.3206721265179328, + "grad_norm": 3.3930154921345475, + "learning_rate": 1.9691733450307723e-05, + "loss": 0.9784, "step": 2271 }, { - "epoch": 0.4764101488781715, - "grad_norm": 5.483572197943918, - "learning_rate": 1.914305056036244e-05, - "loss": 2.125, + "epoch": 0.3208133295679187, + "grad_norm": 3.617325007809873, + "learning_rate": 1.9691357743212385e-05, + "loss": 0.9056, "step": 2272 }, { - "epoch": 0.4766198364436989, - "grad_norm": 6.035955026227375, - "learning_rate": 1.9142133393379724e-05, - "loss": 2.5276, + "epoch": 0.3209545326179045, + "grad_norm": 3.7901050149973483, + "learning_rate": 1.969098181089408e-05, + "loss": 0.9735, "step": 2273 }, { - "epoch": 0.47682952400922624, - "grad_norm": 5.229814449143713, - "learning_rate": 1.9141215757846812e-05, - "loss": 2.1619, + "epoch": 0.3210957356678904, + "grad_norm": 3.4871191393006806, + "learning_rate": 1.969060565336154e-05, + "loss": 0.8325, "step": 2274 }, { - "epoch": 0.4770392115747536, - "grad_norm": 5.662789258735806, - "learning_rate": 1.914029765381074e-05, - "loss": 2.1363, + "epoch": 0.3212369387178763, + "grad_norm": 4.037254679862918, + "learning_rate": 1.9690229270623512e-05, + "loss": 1.066, "step": 2275 }, { - "epoch": 0.47724889914028096, - "grad_norm": 5.239699392383437, - "learning_rate": 1.913937908131856e-05, - "loss": 2.148, + "epoch": 0.32137814176786217, + "grad_norm": 5.024526437284346, + "learning_rate": 1.9689852662688743e-05, + "loss": 1.0779, "step": 2276 }, { - "epoch": 0.4774585867058083, - "grad_norm": 5.488282216570894, - "learning_rate": 1.9138460040417346e-05, - "loss": 2.4493, + "epoch": 0.32151934481784805, + "grad_norm": 4.014150004299187, + "learning_rate": 1.9689475829565983e-05, + "loss": 0.9446, "step": 2277 }, { - "epoch": 0.47766827427133574, - "grad_norm": 6.315343853868752, - "learning_rate": 1.913754053115421e-05, - "loss": 2.3307, + "epoch": 0.32166054786783393, + "grad_norm": 5.310095403068603, + "learning_rate": 1.9689098771263982e-05, + "loss": 1.3517, "step": 2278 }, { - "epoch": 0.4778779618368631, - "grad_norm": 5.6080925670579465, - "learning_rate": 1.9136620553576272e-05, - "loss": 2.107, + "epoch": 0.3218017509178198, + "grad_norm": 3.932002534868036, + "learning_rate": 1.9688721487791516e-05, + "loss": 1.0366, "step": 2279 }, { - "epoch": 0.47808764940239046, - "grad_norm": 5.831982304680028, - "learning_rate": 1.9135700107730682e-05, - "loss": 2.2452, + "epoch": 0.3219429539678057, + "grad_norm": 4.314535282748354, + "learning_rate": 1.9688343979157348e-05, + "loss": 1.0116, "step": 2280 }, { - "epoch": 0.4782973369679178, - "grad_norm": 5.787905327077409, - "learning_rate": 1.913477919366462e-05, - "loss": 2.2639, + "epoch": 0.3220841570177916, + "grad_norm": 4.922067903596106, + "learning_rate": 1.9687966245370248e-05, + "loss": 1.0343, "step": 2281 }, { - "epoch": 0.4785070245334452, - "grad_norm": 5.442313003708371, - "learning_rate": 1.913385781142528e-05, - "loss": 2.141, + "epoch": 0.32222536006777747, + "grad_norm": 5.30805813701868, + "learning_rate": 1.9687588286438995e-05, + "loss": 1.2836, "step": 2282 }, { - "epoch": 0.47871671209897254, - "grad_norm": 5.547397250284771, - "learning_rate": 1.9132935961059886e-05, - "loss": 2.0073, + "epoch": 0.32236656311776335, + "grad_norm": 4.3862063056249605, + "learning_rate": 1.9687210102372376e-05, + "loss": 1.1088, "step": 2283 }, { - "epoch": 0.4789263996644999, - "grad_norm": 6.108542161008461, - "learning_rate": 1.9132013642615682e-05, - "loss": 2.09, + "epoch": 0.32250776616774923, + "grad_norm": 4.7312264962376505, + "learning_rate": 1.9686831693179178e-05, + "loss": 1.2889, "step": 2284 }, { - "epoch": 0.47913608723002726, - "grad_norm": 5.398926224578731, - "learning_rate": 1.9131090856139944e-05, - "loss": 2.201, + "epoch": 0.3226489692177351, + "grad_norm": 4.444645986117651, + "learning_rate": 1.9686453058868194e-05, + "loss": 1.0056, "step": 2285 }, { - "epoch": 0.4793457747955546, - "grad_norm": 5.52596060057175, - "learning_rate": 1.9130167601679956e-05, - "loss": 2.221, + "epoch": 0.322790172267721, + "grad_norm": 3.5145813062527655, + "learning_rate": 1.9686074199448222e-05, + "loss": 0.8307, "step": 2286 }, { - "epoch": 0.479555462361082, - "grad_norm": 6.949766376300676, - "learning_rate": 1.9129243879283046e-05, - "loss": 2.3306, + "epoch": 0.3229313753177069, + "grad_norm": 4.531713080558804, + "learning_rate": 1.9685695114928073e-05, + "loss": 1.1106, "step": 2287 }, { - "epoch": 0.47976514992660935, - "grad_norm": 5.886875888686392, - "learning_rate": 1.9128319688996555e-05, - "loss": 2.0702, + "epoch": 0.32307257836769276, + "grad_norm": 4.376349285533183, + "learning_rate": 1.9685315805316548e-05, + "loss": 1.1299, "step": 2288 }, { - "epoch": 0.4799748374921367, - "grad_norm": 6.6835449840955965, - "learning_rate": 1.9127395030867845e-05, - "loss": 2.3101, + "epoch": 0.32321378141767865, + "grad_norm": 4.613639883725166, + "learning_rate": 1.968493627062247e-05, + "loss": 1.133, "step": 2289 }, { - "epoch": 0.48018452505766407, - "grad_norm": 5.8480708469635685, - "learning_rate": 1.912646990494431e-05, - "loss": 2.0345, + "epoch": 0.3233549844676645, + "grad_norm": 4.565096596178036, + "learning_rate": 1.9684556510854655e-05, + "loss": 1.2705, "step": 2290 }, { - "epoch": 0.48039421262319143, - "grad_norm": 5.840093448971638, - "learning_rate": 1.912554431127336e-05, - "loss": 2.0652, + "epoch": 0.32349618751765036, + "grad_norm": 4.97049887256306, + "learning_rate": 1.968417652602193e-05, + "loss": 1.0895, "step": 2291 }, { - "epoch": 0.4806039001887188, - "grad_norm": 5.739225106424425, - "learning_rate": 1.9124618249902444e-05, - "loss": 2.1156, + "epoch": 0.32363739056763624, + "grad_norm": 3.8781297688908, + "learning_rate": 1.968379631613312e-05, + "loss": 0.9398, "step": 2292 }, { - "epoch": 0.48081358775424615, - "grad_norm": 6.867760260883154, - "learning_rate": 1.912369172087901e-05, - "loss": 2.2225, + "epoch": 0.3237785936176221, + "grad_norm": 3.6134142846663124, + "learning_rate": 1.968341588119707e-05, + "loss": 0.9916, "step": 2293 }, { - "epoch": 0.4810232753197735, - "grad_norm": 6.028072151540249, - "learning_rate": 1.9122764724250555e-05, - "loss": 1.9526, + "epoch": 0.323919796667608, + "grad_norm": 4.816024744581271, + "learning_rate": 1.9683035221222617e-05, + "loss": 1.3077, "step": 2294 }, { - "epoch": 0.4812329628853009, - "grad_norm": 5.9129118702387045, - "learning_rate": 1.9121837260064585e-05, - "loss": 2.1773, + "epoch": 0.3240609997175939, + "grad_norm": 4.112581548716897, + "learning_rate": 1.9682654336218606e-05, + "loss": 1.0834, "step": 2295 }, { - "epoch": 0.4814426504508283, - "grad_norm": 6.4484740762525945, - "learning_rate": 1.9120909328368636e-05, - "loss": 1.8367, + "epoch": 0.3242022027675798, + "grad_norm": 4.179499416866548, + "learning_rate": 1.968227322619389e-05, + "loss": 0.9383, "step": 2296 }, { - "epoch": 0.48165233801635565, - "grad_norm": 6.639897003379941, - "learning_rate": 1.911998092921026e-05, - "loss": 2.1958, + "epoch": 0.32434340581756566, + "grad_norm": 3.7838338554642688, + "learning_rate": 1.9681891891157324e-05, + "loss": 0.7676, "step": 2297 }, { - "epoch": 0.481862025581883, - "grad_norm": 6.37943282307499, - "learning_rate": 1.9119052062637046e-05, - "loss": 2.1032, + "epoch": 0.32448460886755154, + "grad_norm": 4.479553073138848, + "learning_rate": 1.968151033111777e-05, + "loss": 1.4906, "step": 2298 }, { - "epoch": 0.4820717131474104, - "grad_norm": 6.378506483538332, - "learning_rate": 1.9118122728696596e-05, - "loss": 2.3076, + "epoch": 0.3246258119175374, + "grad_norm": 3.7491469144109204, + "learning_rate": 1.9681128546084106e-05, + "loss": 1.0551, "step": 2299 }, { - "epoch": 0.48228140071293774, - "grad_norm": 7.335221080156312, - "learning_rate": 1.9117192927436546e-05, - "loss": 2.2956, + "epoch": 0.3247670149675233, + "grad_norm": 4.298377019188501, + "learning_rate": 1.968074653606519e-05, + "loss": 1.159, "step": 2300 }, { - "epoch": 0.4824910882784651, - "grad_norm": 5.463400886485292, - "learning_rate": 1.9116262658904542e-05, - "loss": 2.0164, + "epoch": 0.3249082180175092, + "grad_norm": 4.928129958302385, + "learning_rate": 1.9680364301069903e-05, + "loss": 1.4335, "step": 2301 }, { - "epoch": 0.48270077584399246, - "grad_norm": 5.043648173543413, - "learning_rate": 1.9115331923148264e-05, - "loss": 1.9418, + "epoch": 0.32504942106749507, + "grad_norm": 4.412452828258116, + "learning_rate": 1.967998184110713e-05, + "loss": 1.1969, "step": 2302 }, { - "epoch": 0.4829104634095198, - "grad_norm": 6.11473623913284, - "learning_rate": 1.9114400720215416e-05, - "loss": 1.9098, + "epoch": 0.32519062411748095, + "grad_norm": 3.9405662994701096, + "learning_rate": 1.9679599156185765e-05, + "loss": 1.0777, "step": 2303 }, { - "epoch": 0.4831201509750472, - "grad_norm": 6.045354085085585, - "learning_rate": 1.9113469050153727e-05, - "loss": 1.9945, + "epoch": 0.32533182716746684, + "grad_norm": 4.2657046789376, + "learning_rate": 1.9679216246314694e-05, + "loss": 1.0045, "step": 2304 }, { - "epoch": 0.48332983854057454, - "grad_norm": 5.897279169964477, - "learning_rate": 1.9112536913010938e-05, - "loss": 2.1018, + "epoch": 0.3254730302174527, + "grad_norm": 3.597822329291592, + "learning_rate": 1.9678833111502816e-05, + "loss": 0.9852, "step": 2305 }, { - "epoch": 0.4835395261061019, - "grad_norm": 7.209973801693477, - "learning_rate": 1.911160430883483e-05, - "loss": 2.4253, + "epoch": 0.3256142332674386, + "grad_norm": 3.6559826198117493, + "learning_rate": 1.9678449751759036e-05, + "loss": 0.9243, "step": 2306 }, { - "epoch": 0.48374921367162926, - "grad_norm": 6.068208849899813, - "learning_rate": 1.9110671237673193e-05, - "loss": 2.1066, + "epoch": 0.32575543631742443, + "grad_norm": 4.519659469856864, + "learning_rate": 1.9678066167092266e-05, + "loss": 1.323, "step": 2307 }, { - "epoch": 0.4839589012371566, - "grad_norm": 7.019115825772522, - "learning_rate": 1.9109737699573858e-05, - "loss": 1.9369, + "epoch": 0.3258966393674103, + "grad_norm": 3.736198557751935, + "learning_rate": 1.9677682357511415e-05, + "loss": 0.9683, "step": 2308 }, { - "epoch": 0.484168588802684, - "grad_norm": 5.980490709073885, - "learning_rate": 1.9108803694584666e-05, - "loss": 2.1301, + "epoch": 0.3260378424173962, + "grad_norm": 4.819416542090107, + "learning_rate": 1.9677298323025406e-05, + "loss": 1.1789, "step": 2309 }, { - "epoch": 0.48437827636821135, - "grad_norm": 7.093500850835242, - "learning_rate": 1.9107869222753486e-05, - "loss": 2.3909, + "epoch": 0.3261790454673821, + "grad_norm": 3.9161058305992733, + "learning_rate": 1.9676914063643165e-05, + "loss": 1.1343, "step": 2310 }, { - "epoch": 0.4845879639337387, - "grad_norm": 5.809965110153048, - "learning_rate": 1.910693428412821e-05, - "loss": 1.8726, + "epoch": 0.32632024851736796, + "grad_norm": 4.0034884591293425, + "learning_rate": 1.967652957937362e-05, + "loss": 1.0775, "step": 2311 }, { - "epoch": 0.48479765149926607, - "grad_norm": 5.661632544245621, - "learning_rate": 1.9105998878756758e-05, - "loss": 2.18, + "epoch": 0.32646145156735384, + "grad_norm": 4.582151915522272, + "learning_rate": 1.9676144870225705e-05, + "loss": 1.194, "step": 2312 }, { - "epoch": 0.48500733906479343, - "grad_norm": 7.238416599463696, - "learning_rate": 1.910506300668707e-05, - "loss": 2.4346, + "epoch": 0.3266026546173397, + "grad_norm": 3.7953146737823333, + "learning_rate": 1.9675759936208366e-05, + "loss": 1.0392, "step": 2313 }, { - "epoch": 0.48521702663032085, - "grad_norm": 5.474727495118865, - "learning_rate": 1.910412666796711e-05, - "loss": 2.3721, + "epoch": 0.3267438576673256, + "grad_norm": 4.07181436837981, + "learning_rate": 1.967537477733054e-05, + "loss": 1.1028, "step": 2314 }, { - "epoch": 0.4854267141958482, - "grad_norm": 6.295115074822989, - "learning_rate": 1.910318986264487e-05, - "loss": 1.8663, + "epoch": 0.3268850607173115, + "grad_norm": 3.7774592887637493, + "learning_rate": 1.967498939360118e-05, + "loss": 1.1445, "step": 2315 }, { - "epoch": 0.48563640176137557, - "grad_norm": 5.689241407981616, - "learning_rate": 1.9102252590768357e-05, - "loss": 2.1643, + "epoch": 0.3270262637672974, + "grad_norm": 4.80717853239273, + "learning_rate": 1.9674603785029252e-05, + "loss": 1.3526, "step": 2316 }, { - "epoch": 0.48584608932690293, - "grad_norm": 5.270619351102859, - "learning_rate": 1.9101314852385614e-05, - "loss": 2.3577, + "epoch": 0.32716746681728326, + "grad_norm": 4.115197795540143, + "learning_rate": 1.967421795162371e-05, + "loss": 0.9775, "step": 2317 }, { - "epoch": 0.4860557768924303, - "grad_norm": 6.2924365733813445, - "learning_rate": 1.91003766475447e-05, - "loss": 2.1038, + "epoch": 0.32730866986726914, + "grad_norm": 4.2540951339164135, + "learning_rate": 1.967383189339352e-05, + "loss": 1.271, "step": 2318 }, { - "epoch": 0.48626546445795765, - "grad_norm": 5.870149865231637, - "learning_rate": 1.90994379762937e-05, - "loss": 2.2398, + "epoch": 0.327449872917255, + "grad_norm": 4.4396000163122515, + "learning_rate": 1.967344561034765e-05, + "loss": 1.1593, "step": 2319 }, { - "epoch": 0.486475152023485, - "grad_norm": 5.217401366307402, - "learning_rate": 1.9098498838680716e-05, - "loss": 2.2572, + "epoch": 0.3275910759672409, + "grad_norm": 3.8358837850269305, + "learning_rate": 1.9673059102495084e-05, + "loss": 1.0068, "step": 2320 }, { - "epoch": 0.4866848395890124, - "grad_norm": 4.934154555265756, - "learning_rate": 1.9097559234753893e-05, - "loss": 1.9839, + "epoch": 0.3277322790172268, + "grad_norm": 4.513169062795983, + "learning_rate": 1.9672672369844802e-05, + "loss": 1.154, "step": 2321 }, { - "epoch": 0.48689452715453974, - "grad_norm": 6.306084827825201, - "learning_rate": 1.9096619164561376e-05, - "loss": 2.3003, + "epoch": 0.3278734820672127, + "grad_norm": 3.6129045320024384, + "learning_rate": 1.967228541240579e-05, + "loss": 1.0916, "step": 2322 }, { - "epoch": 0.4871042147200671, - "grad_norm": 5.420472033131771, - "learning_rate": 1.909567862815135e-05, - "loss": 2.1472, + "epoch": 0.32801468511719856, + "grad_norm": 4.075394387412882, + "learning_rate": 1.9671898230187046e-05, + "loss": 1.0195, "step": 2323 }, { - "epoch": 0.48731390228559446, - "grad_norm": 5.766418672886656, - "learning_rate": 1.9094737625572022e-05, - "loss": 1.9894, + "epoch": 0.3281558881671844, + "grad_norm": 4.350846032119964, + "learning_rate": 1.9671510823197562e-05, + "loss": 0.9136, "step": 2324 }, { - "epoch": 0.4875235898511218, - "grad_norm": 5.4623096039110495, - "learning_rate": 1.9093796156871612e-05, - "loss": 1.9375, + "epoch": 0.32829709121717027, + "grad_norm": 4.356668821258284, + "learning_rate": 1.967112319144634e-05, + "loss": 1.0915, "step": 2325 }, { - "epoch": 0.4877332774166492, - "grad_norm": 5.387138463765321, - "learning_rate": 1.909285422209838e-05, - "loss": 2.1706, + "epoch": 0.32843829426715615, + "grad_norm": 5.094593064698161, + "learning_rate": 1.9670735334942398e-05, + "loss": 1.3087, "step": 2326 }, { - "epoch": 0.48794296498217654, - "grad_norm": 5.439546977728199, - "learning_rate": 1.9091911821300594e-05, - "loss": 1.9532, + "epoch": 0.32857949731714203, + "grad_norm": 3.8518625505789803, + "learning_rate": 1.967034725369474e-05, + "loss": 0.9872, "step": 2327 }, { - "epoch": 0.4881526525477039, - "grad_norm": 5.716428250886569, - "learning_rate": 1.9090968954526564e-05, - "loss": 2.2681, + "epoch": 0.3287207003671279, + "grad_norm": 4.2296180243734645, + "learning_rate": 1.966995894771239e-05, + "loss": 1.1364, "step": 2328 }, { - "epoch": 0.48836234011323126, - "grad_norm": 5.535149121498627, - "learning_rate": 1.9090025621824603e-05, - "loss": 1.9706, + "epoch": 0.3288619034171138, + "grad_norm": 3.77506336243467, + "learning_rate": 1.966957041700437e-05, + "loss": 0.9489, "step": 2329 }, { - "epoch": 0.4885720276787586, - "grad_norm": 9.22039450670552, - "learning_rate": 1.9089081823243066e-05, - "loss": 2.1087, + "epoch": 0.3290031064670997, + "grad_norm": 3.868957073053426, + "learning_rate": 1.9669181661579708e-05, + "loss": 1.0364, "step": 2330 }, { - "epoch": 0.48878171524428604, - "grad_norm": 5.310122222694555, - "learning_rate": 1.9088137558830318e-05, - "loss": 2.056, + "epoch": 0.32914430951708556, + "grad_norm": 4.646830027257799, + "learning_rate": 1.9668792681447437e-05, + "loss": 1.3238, "step": 2331 }, { - "epoch": 0.4889914028098134, - "grad_norm": 5.295513419045383, - "learning_rate": 1.908719282863476e-05, - "loss": 2.0356, + "epoch": 0.32928551256707145, + "grad_norm": 4.193223549682492, + "learning_rate": 1.9668403476616604e-05, + "loss": 1.3836, "step": 2332 }, { - "epoch": 0.48920109037534076, - "grad_norm": 5.425010593808835, - "learning_rate": 1.9086247632704808e-05, - "loss": 1.9661, + "epoch": 0.32942671561705733, + "grad_norm": 4.737718832370786, + "learning_rate": 1.966801404709625e-05, + "loss": 1.1446, "step": 2333 }, { - "epoch": 0.4894107779408681, - "grad_norm": 6.111997610064877, - "learning_rate": 1.9085301971088906e-05, - "loss": 1.879, + "epoch": 0.3295679186670432, + "grad_norm": 4.054518041853819, + "learning_rate": 1.9667624392895423e-05, + "loss": 1.0182, "step": 2334 }, { - "epoch": 0.4896204655063955, - "grad_norm": 6.421672708221043, - "learning_rate": 1.9084355843835516e-05, - "loss": 2.2508, + "epoch": 0.3297091217170291, + "grad_norm": 4.682605524282007, + "learning_rate": 1.966723451402318e-05, + "loss": 1.2124, "step": 2335 }, { - "epoch": 0.48983015307192285, - "grad_norm": 6.881533398074077, - "learning_rate": 1.908340925099314e-05, - "loss": 2.2109, + "epoch": 0.329850324767015, + "grad_norm": 4.144604458287708, + "learning_rate": 1.966684441048858e-05, + "loss": 1.0375, "step": 2336 }, { - "epoch": 0.4900398406374502, - "grad_norm": 6.518485068188767, - "learning_rate": 1.9082462192610283e-05, - "loss": 2.0763, + "epoch": 0.32999152781700086, + "grad_norm": 4.1442605119625355, + "learning_rate": 1.9666454082300692e-05, + "loss": 1.0873, "step": 2337 }, { - "epoch": 0.49024952820297757, - "grad_norm": 6.819380817404949, - "learning_rate": 1.9081514668735485e-05, - "loss": 2.1932, + "epoch": 0.33013273086698675, + "grad_norm": 4.474715100783228, + "learning_rate": 1.966606352946859e-05, + "loss": 1.0921, "step": 2338 }, { - "epoch": 0.49045921576850493, - "grad_norm": 7.3194014231403415, - "learning_rate": 1.9080566679417313e-05, - "loss": 1.9668, + "epoch": 0.33027393391697263, + "grad_norm": 4.19773923963997, + "learning_rate": 1.966567275200134e-05, + "loss": 1.1468, "step": 2339 }, { - "epoch": 0.4906689033340323, - "grad_norm": 6.851200789078422, - "learning_rate": 1.9079618224704345e-05, - "loss": 2.3139, + "epoch": 0.3304151369669585, + "grad_norm": 4.0253274923548155, + "learning_rate": 1.9665281749908034e-05, + "loss": 1.0117, "step": 2340 }, { - "epoch": 0.49087859089955965, - "grad_norm": 7.173304358486707, - "learning_rate": 1.907866930464519e-05, - "loss": 1.9716, + "epoch": 0.33055634001694434, + "grad_norm": 4.1035166483079815, + "learning_rate": 1.9664890523197752e-05, + "loss": 1.0293, "step": 2341 }, { - "epoch": 0.491088278465087, - "grad_norm": 6.690516006788188, - "learning_rate": 1.9077719919288497e-05, - "loss": 2.2947, + "epoch": 0.3306975430669302, + "grad_norm": 3.6697945159275984, + "learning_rate": 1.966449907187959e-05, + "loss": 1.0499, "step": 2342 }, { - "epoch": 0.4912979660306144, - "grad_norm": 6.13828021216144, - "learning_rate": 1.907677006868291e-05, - "loss": 1.8088, + "epoch": 0.3308387461169161, + "grad_norm": 4.600255890706265, + "learning_rate": 1.966410739596264e-05, + "loss": 1.2319, "step": 2343 }, { - "epoch": 0.49150765359614174, - "grad_norm": 6.6533509459098985, - "learning_rate": 1.907581975287711e-05, - "loss": 2.3695, + "epoch": 0.330979949166902, + "grad_norm": 4.716300359279863, + "learning_rate": 1.9663715495456012e-05, + "loss": 1.1395, "step": 2344 }, { - "epoch": 0.4917173411616691, - "grad_norm": 5.572810442397147, - "learning_rate": 1.9074868971919812e-05, - "loss": 2.258, + "epoch": 0.33112115221688787, + "grad_norm": 3.3502764908621665, + "learning_rate": 1.9663323370368807e-05, + "loss": 0.9503, "step": 2345 }, { - "epoch": 0.49192702872719646, - "grad_norm": 5.695927312028442, - "learning_rate": 1.9073917725859738e-05, - "loss": 2.3008, + "epoch": 0.33126235526687375, + "grad_norm": 3.5601084487564165, + "learning_rate": 1.9662931020710138e-05, + "loss": 0.903, "step": 2346 }, { - "epoch": 0.4921367162927238, - "grad_norm": 5.562874174324754, - "learning_rate": 1.9072966014745643e-05, - "loss": 1.6022, + "epoch": 0.33140355831685964, + "grad_norm": 3.961753823794761, + "learning_rate": 1.966253844648913e-05, + "loss": 0.9938, "step": 2347 }, { - "epoch": 0.4923464038582512, - "grad_norm": 5.487272229550957, - "learning_rate": 1.9072013838626303e-05, - "loss": 2.3415, + "epoch": 0.3315447613668455, + "grad_norm": 5.874227348884095, + "learning_rate": 1.9662145647714896e-05, + "loss": 1.5006, "step": 2348 }, { - "epoch": 0.4925560914237786, - "grad_norm": 5.074277148633861, - "learning_rate": 1.907106119755052e-05, - "loss": 2.1237, + "epoch": 0.3316859644168314, + "grad_norm": 3.9565349648325263, + "learning_rate": 1.9661752624396575e-05, + "loss": 1.2199, "step": 2349 }, { - "epoch": 0.49276577898930596, - "grad_norm": 6.517231037750821, - "learning_rate": 1.9070108091567115e-05, - "loss": 2.5566, + "epoch": 0.3318271674668173, + "grad_norm": 3.958515681867395, + "learning_rate": 1.9661359376543295e-05, + "loss": 0.9815, "step": 2350 }, { - "epoch": 0.4929754665548333, - "grad_norm": 5.624852956730777, - "learning_rate": 1.906915452072494e-05, - "loss": 1.9445, + "epoch": 0.33196837051680317, + "grad_norm": 4.32504214111788, + "learning_rate": 1.9660965904164193e-05, + "loss": 0.9117, "step": 2351 }, { - "epoch": 0.4931851541203607, - "grad_norm": 5.775054823995273, - "learning_rate": 1.9068200485072867e-05, - "loss": 2.1182, + "epoch": 0.33210957356678905, + "grad_norm": 3.9879223245020965, + "learning_rate": 1.9660572207268416e-05, + "loss": 1.0867, "step": 2352 }, { - "epoch": 0.49339484168588804, - "grad_norm": 5.296104029724121, - "learning_rate": 1.906724598465979e-05, - "loss": 2.2116, + "epoch": 0.33225077661677493, + "grad_norm": 3.9698258374326665, + "learning_rate": 1.9660178285865114e-05, + "loss": 1.0729, "step": 2353 }, { - "epoch": 0.4936045292514154, - "grad_norm": 5.025189306268243, - "learning_rate": 1.9066291019534632e-05, - "loss": 1.8211, + "epoch": 0.3323919796667608, + "grad_norm": 4.055386495571354, + "learning_rate": 1.9659784139963437e-05, + "loss": 1.0742, "step": 2354 }, { - "epoch": 0.49381421681694276, - "grad_norm": 6.545413003683983, - "learning_rate": 1.9065335589746335e-05, - "loss": 2.3161, + "epoch": 0.3325331827167467, + "grad_norm": 4.908828157744329, + "learning_rate": 1.9659389769572553e-05, + "loss": 1.2383, "step": 2355 }, { - "epoch": 0.4940239043824701, - "grad_norm": 6.415484420931244, - "learning_rate": 1.906437969534386e-05, - "loss": 2.1413, + "epoch": 0.3326743857667326, + "grad_norm": 4.640570287878632, + "learning_rate": 1.9658995174701622e-05, + "loss": 1.2139, "step": 2356 }, { - "epoch": 0.4942335919479975, - "grad_norm": 5.54241740507023, - "learning_rate": 1.906342333637621e-05, - "loss": 1.9692, + "epoch": 0.33281558881671847, + "grad_norm": 4.332213214017479, + "learning_rate": 1.965860035535981e-05, + "loss": 1.1257, "step": 2357 }, { - "epoch": 0.49444327951352485, - "grad_norm": 6.036925989066589, - "learning_rate": 1.9062466512892395e-05, - "loss": 2.2641, + "epoch": 0.3329567918667043, + "grad_norm": 4.461699314113292, + "learning_rate": 1.9658205311556304e-05, + "loss": 1.0254, "step": 2358 }, { - "epoch": 0.4946529670790522, - "grad_norm": 5.420006516972742, - "learning_rate": 1.906150922494145e-05, - "loss": 2.0105, + "epoch": 0.3330979949166902, + "grad_norm": 4.327645135556568, + "learning_rate": 1.9657810043300274e-05, + "loss": 1.2754, "step": 2359 }, { - "epoch": 0.49486265464457957, - "grad_norm": 5.864509212812893, - "learning_rate": 1.9060551472572442e-05, - "loss": 1.86, + "epoch": 0.33323919796667606, + "grad_norm": 5.204008545388776, + "learning_rate": 1.9657414550600907e-05, + "loss": 1.2085, "step": 2360 }, { - "epoch": 0.49507234221010693, - "grad_norm": 7.406032077685681, - "learning_rate": 1.9059593255834457e-05, - "loss": 1.9605, + "epoch": 0.33338040101666194, + "grad_norm": 4.167632287820464, + "learning_rate": 1.96570188334674e-05, + "loss": 1.071, "step": 2361 }, { - "epoch": 0.4952820297756343, - "grad_norm": 6.715096049536995, - "learning_rate": 1.9058634574776606e-05, - "loss": 2.19, + "epoch": 0.3335216040666478, + "grad_norm": 4.5991995940731645, + "learning_rate": 1.9656622891908944e-05, + "loss": 1.4672, "step": 2362 }, { - "epoch": 0.49549171734116165, - "grad_norm": 5.437844276662292, - "learning_rate": 1.9057675429448015e-05, - "loss": 2.019, + "epoch": 0.3336628071166337, + "grad_norm": 4.451950497792509, + "learning_rate": 1.9656226725934745e-05, + "loss": 1.2229, "step": 2363 }, { - "epoch": 0.495701404906689, - "grad_norm": 6.758132630549353, - "learning_rate": 1.9056715819897855e-05, - "loss": 2.2599, + "epoch": 0.3338040101666196, + "grad_norm": 4.359007743925706, + "learning_rate": 1.9655830335554004e-05, + "loss": 1.3637, "step": 2364 }, { - "epoch": 0.4959110924722164, - "grad_norm": 5.5133441340501, - "learning_rate": 1.90557557461753e-05, - "loss": 2.04, + "epoch": 0.3339452132166055, + "grad_norm": 4.312915061603823, + "learning_rate": 1.965543372077594e-05, + "loss": 1.2552, "step": 2365 }, { - "epoch": 0.49612078003774374, - "grad_norm": 5.7863830826148535, - "learning_rate": 1.9054795208329558e-05, - "loss": 1.7747, + "epoch": 0.33408641626659136, + "grad_norm": 4.0791258079300015, + "learning_rate": 1.9655036881609763e-05, + "loss": 1.107, "step": 2366 }, { - "epoch": 0.49633046760327115, - "grad_norm": 6.053456590647323, - "learning_rate": 1.9053834206409854e-05, - "loss": 2.272, + "epoch": 0.33422761931657724, + "grad_norm": 3.8918306662932447, + "learning_rate": 1.9654639818064698e-05, + "loss": 1.0658, "step": 2367 }, { - "epoch": 0.4965401551687985, - "grad_norm": 5.503077556820159, - "learning_rate": 1.9052872740465444e-05, - "loss": 2.1596, + "epoch": 0.3343688223665631, + "grad_norm": 4.07886903482661, + "learning_rate": 1.9654242530149974e-05, + "loss": 0.9243, "step": 2368 }, { - "epoch": 0.4967498427343259, - "grad_norm": 6.214830320920408, - "learning_rate": 1.9051910810545604e-05, - "loss": 2.0359, + "epoch": 0.334510025416549, + "grad_norm": 4.824168743994136, + "learning_rate": 1.9653845017874822e-05, + "loss": 0.9971, "step": 2369 }, { - "epoch": 0.49695953029985324, - "grad_norm": 6.70460844242227, - "learning_rate": 1.9050948416699636e-05, - "loss": 2.0042, + "epoch": 0.3346512284665349, + "grad_norm": 4.268365663035962, + "learning_rate": 1.9653447281248484e-05, + "loss": 1.2813, "step": 2370 }, { - "epoch": 0.4971692178653806, - "grad_norm": 7.981658960225294, - "learning_rate": 1.9049985558976864e-05, - "loss": 1.8125, + "epoch": 0.33479243151652077, + "grad_norm": 3.800499338421693, + "learning_rate": 1.9653049320280197e-05, + "loss": 1.0285, "step": 2371 }, { - "epoch": 0.49737890543090796, - "grad_norm": 6.424302636271175, - "learning_rate": 1.9049022237426635e-05, - "loss": 2.0033, + "epoch": 0.33493363456650665, + "grad_norm": 4.6797039905650255, + "learning_rate": 1.9652651134979215e-05, + "loss": 1.3236, "step": 2372 }, { - "epoch": 0.4975885929964353, - "grad_norm": 5.483404936748399, - "learning_rate": 1.9048058452098323e-05, - "loss": 2.0798, + "epoch": 0.33507483761649254, + "grad_norm": 3.5132544721615155, + "learning_rate": 1.9652252725354784e-05, + "loss": 0.9121, "step": 2373 }, { - "epoch": 0.4977982805619627, - "grad_norm": 6.178462083181409, - "learning_rate": 1.9047094203041322e-05, - "loss": 2.1715, + "epoch": 0.3352160406664784, + "grad_norm": 3.8595258647211748, + "learning_rate": 1.9651854091416175e-05, + "loss": 0.8959, "step": 2374 }, { - "epoch": 0.49800796812749004, - "grad_norm": 6.252924502536067, - "learning_rate": 1.904612949030505e-05, - "loss": 2.1147, + "epoch": 0.33535724371646425, + "grad_norm": 4.430499987359151, + "learning_rate": 1.9651455233172643e-05, + "loss": 1.0045, "step": 2375 }, { - "epoch": 0.4982176556930174, - "grad_norm": 5.8091544377416575, - "learning_rate": 1.9045164313938952e-05, - "loss": 2.1987, + "epoch": 0.33549844676645013, + "grad_norm": 4.401753502355797, + "learning_rate": 1.965105615063346e-05, + "loss": 1.1951, "step": 2376 }, { - "epoch": 0.49842734325854476, - "grad_norm": 7.532699761994082, - "learning_rate": 1.90441986739925e-05, - "loss": 2.398, + "epoch": 0.335639649816436, + "grad_norm": 4.038971070082858, + "learning_rate": 1.9650656843807897e-05, + "loss": 1.0643, "step": 2377 }, { - "epoch": 0.4986370308240721, - "grad_norm": 5.099261833607678, - "learning_rate": 1.9043232570515174e-05, - "loss": 1.9185, + "epoch": 0.3357808528664219, + "grad_norm": 3.4656831023291184, + "learning_rate": 1.965025731270524e-05, + "loss": 0.9263, "step": 2378 }, { - "epoch": 0.4988467183895995, - "grad_norm": 6.320403373369112, - "learning_rate": 1.9042266003556494e-05, - "loss": 2.0786, + "epoch": 0.3359220559164078, + "grad_norm": 4.0485532273108795, + "learning_rate": 1.964985755733477e-05, + "loss": 1.186, "step": 2379 }, { - "epoch": 0.49905640595512685, - "grad_norm": 5.203193542742107, - "learning_rate": 1.9041298973166e-05, - "loss": 2.1385, + "epoch": 0.33606325896639366, + "grad_norm": 4.698114735175097, + "learning_rate": 1.964945757770578e-05, + "loss": 1.1756, "step": 2380 }, { - "epoch": 0.4992660935206542, - "grad_norm": 6.249330609505911, - "learning_rate": 1.904033147939325e-05, - "loss": 1.9189, + "epoch": 0.33620446201637955, + "grad_norm": 4.678917493787567, + "learning_rate": 1.964905737382756e-05, + "loss": 1.2601, "step": 2381 }, { - "epoch": 0.49947578108618157, - "grad_norm": 5.664877339319939, - "learning_rate": 1.9039363522287835e-05, - "loss": 2.1726, + "epoch": 0.33634566506636543, + "grad_norm": 4.976761772979513, + "learning_rate": 1.9648656945709413e-05, + "loss": 1.2188, "step": 2382 }, { - "epoch": 0.49968546865170893, - "grad_norm": 5.740542861207297, - "learning_rate": 1.903839510189936e-05, - "loss": 2.0998, + "epoch": 0.3364868681163513, + "grad_norm": 4.433791513937911, + "learning_rate": 1.9648256293360645e-05, + "loss": 1.2602, "step": 2383 }, { - "epoch": 0.4998951562172363, - "grad_norm": 5.7689742490636355, - "learning_rate": 1.9037426218277463e-05, - "loss": 2.2739, + "epoch": 0.3366280711663372, + "grad_norm": 3.987883162727097, + "learning_rate": 1.964785541679057e-05, + "loss": 1.1293, "step": 2384 }, { - "epoch": 0.5001048437827637, - "grad_norm": 6.033687363725323, - "learning_rate": 1.9036456871471792e-05, - "loss": 2.1637, + "epoch": 0.3367692742163231, + "grad_norm": 4.400476555565281, + "learning_rate": 1.96474543160085e-05, + "loss": 1.109, "step": 2385 }, { - "epoch": 0.5003145313482911, - "grad_norm": 8.01699680841515, - "learning_rate": 1.903548706153204e-05, - "loss": 2.2429, + "epoch": 0.33691047726630896, + "grad_norm": 4.3928186133433655, + "learning_rate": 1.964705299102376e-05, + "loss": 1.1372, "step": 2386 }, { - "epoch": 0.5005242189138184, - "grad_norm": 5.2260689823153275, - "learning_rate": 1.90345167885079e-05, - "loss": 2.0867, + "epoch": 0.33705168031629484, + "grad_norm": 4.67497194054581, + "learning_rate": 1.9646651441845676e-05, + "loss": 1.1721, "step": 2387 }, { - "epoch": 0.5007339064793458, - "grad_norm": 5.581233110187377, - "learning_rate": 1.9033546052449106e-05, - "loss": 2.185, + "epoch": 0.3371928833662807, + "grad_norm": 3.5429347936901756, + "learning_rate": 1.9646249668483575e-05, + "loss": 0.8902, "step": 2388 }, { - "epoch": 0.5009435940448731, - "grad_norm": 8.356945135235096, - "learning_rate": 1.903257485340541e-05, - "loss": 2.5881, + "epoch": 0.3373340864162666, + "grad_norm": 3.6901184293780425, + "learning_rate": 1.9645847670946798e-05, + "loss": 0.9684, "step": 2389 }, { - "epoch": 0.5011532816104005, - "grad_norm": 5.293277979893477, - "learning_rate": 1.9031603191426587e-05, - "loss": 2.1925, + "epoch": 0.3374752894662525, + "grad_norm": 4.719066839363746, + "learning_rate": 1.964544544924469e-05, + "loss": 0.9956, "step": 2390 }, { - "epoch": 0.5013629691759278, - "grad_norm": 5.65899763464518, - "learning_rate": 1.9030631066562436e-05, - "loss": 2.5119, + "epoch": 0.3376164925162384, + "grad_norm": 4.40288359375305, + "learning_rate": 1.9645043003386594e-05, + "loss": 1.1719, "step": 2391 }, { - "epoch": 0.5015726567414552, - "grad_norm": 5.452565421328935, - "learning_rate": 1.9029658478862776e-05, - "loss": 1.7918, + "epoch": 0.3377576955662242, + "grad_norm": 4.203912816884141, + "learning_rate": 1.9644640333381862e-05, + "loss": 1.3073, "step": 2392 }, { - "epoch": 0.5017823443069825, - "grad_norm": 9.15098421579578, - "learning_rate": 1.9028685428377462e-05, - "loss": 2.4517, + "epoch": 0.3378988986162101, + "grad_norm": 4.6181023594890585, + "learning_rate": 1.9644237439239853e-05, + "loss": 1.2131, "step": 2393 }, { - "epoch": 0.50199203187251, - "grad_norm": 6.153944910687597, - "learning_rate": 1.902771191515636e-05, - "loss": 2.2359, + "epoch": 0.33804010166619597, + "grad_norm": 4.160814448700728, + "learning_rate": 1.964383432096993e-05, + "loss": 1.1531, "step": 2394 }, { - "epoch": 0.5022017194380374, - "grad_norm": 5.667062866273726, - "learning_rate": 1.9026737939249363e-05, - "loss": 1.91, + "epoch": 0.33818130471618185, + "grad_norm": 3.6153190407914564, + "learning_rate": 1.964343097858147e-05, + "loss": 0.9677, "step": 2395 }, { - "epoch": 0.5024114070035647, - "grad_norm": 5.9757697757745945, - "learning_rate": 1.902576350070639e-05, - "loss": 2.1999, + "epoch": 0.33832250776616773, + "grad_norm": 3.542416860970481, + "learning_rate": 1.964302741208383e-05, + "loss": 1.161, "step": 2396 }, { - "epoch": 0.5026210945690921, - "grad_norm": 5.887686237164335, - "learning_rate": 1.9024788599577383e-05, - "loss": 1.9772, + "epoch": 0.3384637108161536, + "grad_norm": 4.076836005248402, + "learning_rate": 1.9642623621486403e-05, + "loss": 1.1216, "step": 2397 }, { - "epoch": 0.5028307821346194, - "grad_norm": 5.058824111966105, - "learning_rate": 1.902381323591231e-05, - "loss": 2.0835, + "epoch": 0.3386049138661395, + "grad_norm": 4.5017686573088165, + "learning_rate": 1.9642219606798566e-05, + "loss": 1.0349, "step": 2398 }, { - "epoch": 0.5030404697001468, - "grad_norm": 5.7260635696615285, - "learning_rate": 1.9022837409761155e-05, - "loss": 2.127, + "epoch": 0.3387461169161254, + "grad_norm": 4.171949373603423, + "learning_rate": 1.964181536802971e-05, + "loss": 1.1737, "step": 2399 }, { - "epoch": 0.5032501572656741, - "grad_norm": 5.14106195159542, - "learning_rate": 1.9021861121173936e-05, - "loss": 2.1018, + "epoch": 0.33888731996611127, + "grad_norm": 4.42170242437467, + "learning_rate": 1.964141090518923e-05, + "loss": 1.1504, "step": 2400 }, { - "epoch": 0.5034598448312015, - "grad_norm": 5.432524328519932, - "learning_rate": 1.9020884370200685e-05, - "loss": 2.1782, + "epoch": 0.33902852301609715, + "grad_norm": 4.208929719352292, + "learning_rate": 1.964100621828652e-05, + "loss": 1.0181, "step": 2401 }, { - "epoch": 0.5036695323967288, - "grad_norm": 6.286592021837883, - "learning_rate": 1.9019907156891464e-05, - "loss": 2.4022, + "epoch": 0.33916972606608303, + "grad_norm": 3.711429047834989, + "learning_rate": 1.964060130733099e-05, + "loss": 0.8933, "step": 2402 }, { - "epoch": 0.5038792199622563, - "grad_norm": 6.102193936181787, - "learning_rate": 1.901892948129636e-05, - "loss": 2.1732, + "epoch": 0.3393109291160689, + "grad_norm": 4.239570852500366, + "learning_rate": 1.9640196172332053e-05, + "loss": 1.1691, "step": 2403 }, { - "epoch": 0.5040889075277836, - "grad_norm": 4.753235931711819, - "learning_rate": 1.9017951343465473e-05, - "loss": 2.0187, + "epoch": 0.3394521321660548, + "grad_norm": 4.016978412051192, + "learning_rate": 1.963979081329912e-05, + "loss": 1.0316, "step": 2404 }, { - "epoch": 0.504298595093311, - "grad_norm": 5.647004233542891, - "learning_rate": 1.9016972743448944e-05, - "loss": 2.0608, + "epoch": 0.3395933352160407, + "grad_norm": 4.267331213384971, + "learning_rate": 1.9639385230241606e-05, + "loss": 1.2225, "step": 2405 }, { - "epoch": 0.5045082826588383, - "grad_norm": 5.605049904184601, - "learning_rate": 1.9015993681296917e-05, - "loss": 2.1389, + "epoch": 0.33973453826602656, + "grad_norm": 5.143212916056214, + "learning_rate": 1.9638979423168948e-05, + "loss": 1.2342, "step": 2406 }, { - "epoch": 0.5047179702243657, - "grad_norm": 6.021952684035758, - "learning_rate": 1.9015014157059578e-05, - "loss": 2.3593, + "epoch": 0.33987574131601245, + "grad_norm": 4.558666127313283, + "learning_rate": 1.9638573392090567e-05, + "loss": 1.1004, "step": 2407 }, { - "epoch": 0.504927657789893, - "grad_norm": 6.103797458703036, - "learning_rate": 1.9014034170787132e-05, - "loss": 2.0789, + "epoch": 0.34001694436599833, + "grad_norm": 5.223102049528061, + "learning_rate": 1.9638167137015905e-05, + "loss": 1.0491, "step": 2408 }, { - "epoch": 0.5051373453554204, - "grad_norm": 5.967531630592809, - "learning_rate": 1.9013053722529798e-05, - "loss": 2.0806, + "epoch": 0.34015814741598416, + "grad_norm": 4.379798037424821, + "learning_rate": 1.96377606579544e-05, + "loss": 0.9218, "step": 2409 }, { - "epoch": 0.5053470329209477, - "grad_norm": 6.029084380909239, - "learning_rate": 1.9012072812337828e-05, - "loss": 2.261, + "epoch": 0.34029935046597004, + "grad_norm": 3.48225427242103, + "learning_rate": 1.9637353954915503e-05, + "loss": 1.0881, "step": 2410 }, { - "epoch": 0.5055567204864752, - "grad_norm": 6.012398732609155, - "learning_rate": 1.9011091440261495e-05, - "loss": 1.9123, + "epoch": 0.3404405535159559, + "grad_norm": 4.392059248251775, + "learning_rate": 1.963694702790866e-05, + "loss": 1.1077, "step": 2411 }, { - "epoch": 0.5057664080520026, - "grad_norm": 5.350645210145288, - "learning_rate": 1.9010109606351098e-05, - "loss": 2.0402, + "epoch": 0.3405817565659418, + "grad_norm": 3.894676270930297, + "learning_rate": 1.9636539876943325e-05, + "loss": 1.119, "step": 2412 }, { - "epoch": 0.5059760956175299, - "grad_norm": 5.633013623478876, - "learning_rate": 1.9009127310656958e-05, - "loss": 2.2955, + "epoch": 0.3407229596159277, + "grad_norm": 3.5318741043340327, + "learning_rate": 1.963613250202897e-05, + "loss": 0.8803, "step": 2413 }, { - "epoch": 0.5061857831830573, - "grad_norm": 5.8473098450970715, - "learning_rate": 1.9008144553229414e-05, - "loss": 2.1665, + "epoch": 0.3408641626659136, + "grad_norm": 3.998772412989041, + "learning_rate": 1.9635724903175055e-05, + "loss": 1.0134, "step": 2414 }, { - "epoch": 0.5063954707485846, - "grad_norm": 5.875046523618884, - "learning_rate": 1.900716133411884e-05, - "loss": 2.1546, + "epoch": 0.34100536571589946, + "grad_norm": 3.6201962319529755, + "learning_rate": 1.9635317080391058e-05, + "loss": 0.9789, "step": 2415 }, { - "epoch": 0.506605158314112, - "grad_norm": 5.683185050794553, - "learning_rate": 1.9006177653375623e-05, - "loss": 1.9391, + "epoch": 0.34114656876588534, + "grad_norm": 4.873649122840227, + "learning_rate": 1.963490903368645e-05, + "loss": 1.2571, "step": 2416 }, { - "epoch": 0.5068148458796393, - "grad_norm": 5.661859288865748, - "learning_rate": 1.9005193511050183e-05, - "loss": 2.1656, + "epoch": 0.3412877718158712, + "grad_norm": 4.151730010185322, + "learning_rate": 1.9634500763070718e-05, + "loss": 1.1259, "step": 2417 }, { - "epoch": 0.5070245334451667, - "grad_norm": 6.508985724057258, - "learning_rate": 1.900420890719296e-05, - "loss": 1.9514, + "epoch": 0.3414289748658571, + "grad_norm": 4.785997092304954, + "learning_rate": 1.963409226855335e-05, + "loss": 1.119, "step": 2418 }, { - "epoch": 0.507234221010694, - "grad_norm": 5.453614688749812, - "learning_rate": 1.900322384185441e-05, - "loss": 2.0257, + "epoch": 0.341570177915843, + "grad_norm": 4.770550207819993, + "learning_rate": 1.963368355014384e-05, + "loss": 0.9843, "step": 2419 }, { - "epoch": 0.5074439085762215, - "grad_norm": 6.151785595113499, - "learning_rate": 1.900223831508502e-05, - "loss": 2.2917, + "epoch": 0.34171138096582887, + "grad_norm": 3.8054131553863093, + "learning_rate": 1.963327460785168e-05, + "loss": 0.9707, "step": 2420 }, { - "epoch": 0.5076535961417488, - "grad_norm": 6.551169074036723, - "learning_rate": 1.9001252326935306e-05, - "loss": 2.5201, + "epoch": 0.34185258401581475, + "grad_norm": 3.5526989167853733, + "learning_rate": 1.963286544168638e-05, + "loss": 0.8548, "step": 2421 }, { - "epoch": 0.5078632837072762, - "grad_norm": 5.479608564104697, - "learning_rate": 1.90002658774558e-05, - "loss": 2.118, + "epoch": 0.34199378706580064, + "grad_norm": 3.7404251447877734, + "learning_rate": 1.9632456051657448e-05, + "loss": 1.0831, "step": 2422 }, { - "epoch": 0.5080729712728035, - "grad_norm": 5.978904310512989, - "learning_rate": 1.8999278966697055e-05, - "loss": 1.9548, + "epoch": 0.3421349901157865, + "grad_norm": 4.07951445670617, + "learning_rate": 1.96320464377744e-05, + "loss": 0.9465, "step": 2423 }, { - "epoch": 0.5082826588383309, - "grad_norm": 5.838716078489018, - "learning_rate": 1.8998291594709655e-05, - "loss": 2.2903, + "epoch": 0.3422761931657724, + "grad_norm": 4.883382515834337, + "learning_rate": 1.9631636600046748e-05, + "loss": 1.1575, "step": 2424 }, { - "epoch": 0.5084923464038582, - "grad_norm": 6.929947633172149, - "learning_rate": 1.8997303761544206e-05, - "loss": 2.2608, + "epoch": 0.3424173962157583, + "grad_norm": 4.153052335524542, + "learning_rate": 1.9631226538484026e-05, + "loss": 1.1216, "step": 2425 }, { - "epoch": 0.5087020339693856, - "grad_norm": 6.610662867772675, - "learning_rate": 1.8996315467251333e-05, - "loss": 2.0301, + "epoch": 0.3425585992657441, + "grad_norm": 4.717587171064485, + "learning_rate": 1.9630816253095754e-05, + "loss": 1.449, "step": 2426 }, { - "epoch": 0.5089117215349129, - "grad_norm": 5.679471733482516, - "learning_rate": 1.8995326711881687e-05, - "loss": 2.0016, + "epoch": 0.34269980231573, + "grad_norm": 4.270216797248425, + "learning_rate": 1.9630405743891475e-05, + "loss": 1.0937, "step": 2427 }, { - "epoch": 0.5091214091004403, - "grad_norm": 6.0816843081496925, - "learning_rate": 1.8994337495485946e-05, - "loss": 1.9341, + "epoch": 0.3428410053657159, + "grad_norm": 4.39908829878803, + "learning_rate": 1.9629995010880724e-05, + "loss": 1.105, "step": 2428 }, { - "epoch": 0.5093310966659677, - "grad_norm": 5.475189235774921, - "learning_rate": 1.899334781811481e-05, - "loss": 1.8723, + "epoch": 0.34298220841570176, + "grad_norm": 3.832943972552059, + "learning_rate": 1.9629584054073044e-05, + "loss": 0.9623, "step": 2429 }, { - "epoch": 0.5095407842314951, - "grad_norm": 4.94909873518066, - "learning_rate": 1.8992357679819e-05, - "loss": 1.8506, + "epoch": 0.34312341146568764, + "grad_norm": 4.437313959133174, + "learning_rate": 1.9629172873477995e-05, + "loss": 1.2047, "step": 2430 }, { - "epoch": 0.5097504717970225, - "grad_norm": 5.668184705303442, - "learning_rate": 1.8991367080649262e-05, - "loss": 2.3004, + "epoch": 0.3432646145156735, + "grad_norm": 4.225571839938346, + "learning_rate": 1.9628761469105123e-05, + "loss": 1.0078, "step": 2431 }, { - "epoch": 0.5099601593625498, - "grad_norm": 6.548289442959977, - "learning_rate": 1.8990376020656363e-05, - "loss": 2.0827, + "epoch": 0.3434058175656594, + "grad_norm": 3.6150598916411023, + "learning_rate": 1.9628349840963997e-05, + "loss": 1.0849, "step": 2432 }, { - "epoch": 0.5101698469280772, - "grad_norm": 6.200477921712447, - "learning_rate": 1.8989384499891105e-05, - "loss": 2.0737, + "epoch": 0.3435470206156453, + "grad_norm": 4.1367108536683865, + "learning_rate": 1.9627937989064177e-05, + "loss": 1.0606, "step": 2433 }, { - "epoch": 0.5103795344936045, - "grad_norm": 6.699598810408789, - "learning_rate": 1.89883925184043e-05, - "loss": 1.9239, + "epoch": 0.3436882236656312, + "grad_norm": 3.338367183512359, + "learning_rate": 1.9627525913415234e-05, + "loss": 0.8998, "step": 2434 }, { - "epoch": 0.5105892220591319, - "grad_norm": 5.933597619317245, - "learning_rate": 1.8987400076246786e-05, - "loss": 2.0603, + "epoch": 0.34382942671561706, + "grad_norm": 4.1311151035671045, + "learning_rate": 1.9627113614026746e-05, + "loss": 1.0938, "step": 2435 }, { - "epoch": 0.5107989096246592, - "grad_norm": 5.594914065145635, - "learning_rate": 1.898640717346943e-05, - "loss": 1.8536, + "epoch": 0.34397062976560294, + "grad_norm": 3.541097384560549, + "learning_rate": 1.96267010909083e-05, + "loss": 0.9975, "step": 2436 }, { - "epoch": 0.5110085971901867, - "grad_norm": 6.124003405592209, - "learning_rate": 1.8985413810123122e-05, - "loss": 2.168, + "epoch": 0.3441118328155888, + "grad_norm": 3.732686121976509, + "learning_rate": 1.9626288344069475e-05, + "loss": 0.9095, "step": 2437 }, { - "epoch": 0.511218284755714, - "grad_norm": 5.117847996774858, - "learning_rate": 1.8984419986258773e-05, - "loss": 2.1751, + "epoch": 0.3442530358655747, + "grad_norm": 3.9669232928078713, + "learning_rate": 1.9625875373519866e-05, + "loss": 0.8914, "step": 2438 }, { - "epoch": 0.5114279723212414, - "grad_norm": 6.496920280151962, - "learning_rate": 1.8983425701927313e-05, - "loss": 2.3464, + "epoch": 0.3443942389155606, + "grad_norm": 3.9068507063648377, + "learning_rate": 1.962546217926907e-05, + "loss": 1.298, "step": 2439 }, { - "epoch": 0.5116376598867687, - "grad_norm": 5.446051004489931, - "learning_rate": 1.898243095717971e-05, - "loss": 2.2514, + "epoch": 0.3445354419655465, + "grad_norm": 3.7603490566803184, + "learning_rate": 1.962504876132669e-05, + "loss": 1.1631, "step": 2440 }, { - "epoch": 0.5118473474522961, - "grad_norm": 6.395967676245346, - "learning_rate": 1.8981435752066937e-05, - "loss": 1.6687, + "epoch": 0.34467664501553236, + "grad_norm": 4.974272913453476, + "learning_rate": 1.9624635119702334e-05, + "loss": 1.2221, "step": 2441 }, { - "epoch": 0.5120570350178234, - "grad_norm": 5.836971640083478, - "learning_rate": 1.898044008664001e-05, - "loss": 2.2307, + "epoch": 0.34481784806551824, + "grad_norm": 4.720861925473938, + "learning_rate": 1.962422125440562e-05, + "loss": 1.3398, "step": 2442 }, { - "epoch": 0.5122667225833508, - "grad_norm": 7.052926053273921, - "learning_rate": 1.8979443960949945e-05, - "loss": 2.0052, + "epoch": 0.34495905111550407, + "grad_norm": 3.3495920264351304, + "learning_rate": 1.9623807165446154e-05, + "loss": 0.8139, "step": 2443 }, { - "epoch": 0.5124764101488781, - "grad_norm": 5.7966212634184044, - "learning_rate": 1.8978447375047805e-05, - "loss": 2.245, + "epoch": 0.34510025416548995, + "grad_norm": 4.103388296732692, + "learning_rate": 1.962339285283357e-05, + "loss": 1.0345, "step": 2444 }, { - "epoch": 0.5126860977144055, - "grad_norm": 4.752110915106241, - "learning_rate": 1.897745032898467e-05, - "loss": 2.02, + "epoch": 0.34524145721547583, + "grad_norm": 3.9204409495502492, + "learning_rate": 1.9622978316577485e-05, + "loss": 0.9395, "step": 2445 }, { - "epoch": 0.5128957852799328, - "grad_norm": 6.124714542750896, - "learning_rate": 1.897645282281163e-05, - "loss": 2.3848, + "epoch": 0.3453826602654617, + "grad_norm": 3.9245196864122502, + "learning_rate": 1.9622563556687545e-05, + "loss": 0.9193, "step": 2446 }, { - "epoch": 0.5131054728454603, - "grad_norm": 5.907468372087065, - "learning_rate": 1.8975454856579817e-05, - "loss": 2.1063, + "epoch": 0.3455238633154476, + "grad_norm": 3.682812805172933, + "learning_rate": 1.962214857317338e-05, + "loss": 1.018, "step": 2447 }, { - "epoch": 0.5133151604109877, - "grad_norm": 6.7460353381616125, - "learning_rate": 1.897445643034037e-05, - "loss": 2.2471, + "epoch": 0.3456650663654335, + "grad_norm": 3.772926890504689, + "learning_rate": 1.9621733366044644e-05, + "loss": 0.9365, "step": 2448 }, { - "epoch": 0.513524847976515, - "grad_norm": 5.256027015938374, - "learning_rate": 1.897345754414447e-05, - "loss": 1.8617, + "epoch": 0.34580626941541937, + "grad_norm": 4.70117475693373, + "learning_rate": 1.9621317935310973e-05, + "loss": 1.288, "step": 2449 }, { - "epoch": 0.5137345355420424, - "grad_norm": 6.888810526007073, - "learning_rate": 1.8972458198043306e-05, - "loss": 1.8979, + "epoch": 0.34594747246540525, + "grad_norm": 3.495948212276124, + "learning_rate": 1.962090228098203e-05, + "loss": 0.845, "step": 2450 }, { - "epoch": 0.5139442231075697, - "grad_norm": 6.274816385128185, - "learning_rate": 1.8971458392088097e-05, - "loss": 1.8552, + "epoch": 0.34608867551539113, + "grad_norm": 3.9384807747329504, + "learning_rate": 1.9620486403067477e-05, + "loss": 0.9899, "step": 2451 }, { - "epoch": 0.5141539106730971, - "grad_norm": 5.701180089204313, - "learning_rate": 1.8970458126330088e-05, - "loss": 1.854, + "epoch": 0.346229878565377, + "grad_norm": 4.452608929216782, + "learning_rate": 1.9620070301576974e-05, + "loss": 1.3221, "step": 2452 }, { - "epoch": 0.5143635982386244, - "grad_norm": 5.497722221126138, - "learning_rate": 1.896945740082054e-05, - "loss": 2.1337, + "epoch": 0.3463710816153629, + "grad_norm": 4.721961785875029, + "learning_rate": 1.9619653976520188e-05, + "loss": 1.1992, "step": 2453 }, { - "epoch": 0.5145732858041518, - "grad_norm": 5.539619232366782, - "learning_rate": 1.8968456215610742e-05, - "loss": 2.5457, + "epoch": 0.3465122846653488, + "grad_norm": 4.225339899243639, + "learning_rate": 1.96192374279068e-05, + "loss": 1.174, "step": 2454 }, { - "epoch": 0.5147829733696792, - "grad_norm": 6.165740341382778, - "learning_rate": 1.896745457075201e-05, - "loss": 2.1018, + "epoch": 0.34665348771533466, + "grad_norm": 4.681510562768214, + "learning_rate": 1.9618820655746488e-05, + "loss": 1.0914, "step": 2455 }, { - "epoch": 0.5149926609352066, - "grad_norm": 5.864414206491105, - "learning_rate": 1.8966452466295674e-05, - "loss": 2.0048, + "epoch": 0.34679469076532055, + "grad_norm": 3.857565966113763, + "learning_rate": 1.9618403660048937e-05, + "loss": 1.0712, "step": 2456 }, { - "epoch": 0.5152023485007339, - "grad_norm": 5.707363694056109, - "learning_rate": 1.8965449902293103e-05, - "loss": 2.2108, + "epoch": 0.34693589381530643, + "grad_norm": 4.147096171538478, + "learning_rate": 1.961798644082384e-05, + "loss": 1.0199, "step": 2457 }, { - "epoch": 0.5154120360662613, - "grad_norm": 5.936006779252624, - "learning_rate": 1.896444687879567e-05, - "loss": 1.9133, + "epoch": 0.3470770968652923, + "grad_norm": 4.034810878139281, + "learning_rate": 1.9617568998080893e-05, + "loss": 1.1382, "step": 2458 }, { - "epoch": 0.5156217236317886, - "grad_norm": 5.6405877269955775, - "learning_rate": 1.896344339585479e-05, - "loss": 1.6468, + "epoch": 0.3472182999152782, + "grad_norm": 3.2545154726521117, + "learning_rate": 1.9617151331829794e-05, + "loss": 1.0045, "step": 2459 }, { - "epoch": 0.515831411197316, - "grad_norm": 6.633958221080308, - "learning_rate": 1.8962439453521887e-05, - "loss": 2.4828, + "epoch": 0.347359502965264, + "grad_norm": 3.5374421400406337, + "learning_rate": 1.9616733442080253e-05, + "loss": 0.9161, "step": 2460 }, { - "epoch": 0.5160410987628433, - "grad_norm": 5.422441272269181, - "learning_rate": 1.8961435051848422e-05, - "loss": 1.9458, + "epoch": 0.3475007060152499, + "grad_norm": 3.763914069834324, + "learning_rate": 1.9616315328841976e-05, + "loss": 1.1935, "step": 2461 }, { - "epoch": 0.5162507863283707, - "grad_norm": 6.4908141697417285, - "learning_rate": 1.8960430190885862e-05, - "loss": 2.3311, + "epoch": 0.3476419090652358, + "grad_norm": 4.087782005852018, + "learning_rate": 1.961589699212469e-05, + "loss": 1.0329, "step": 2462 }, { - "epoch": 0.516460473893898, - "grad_norm": 5.626908643083947, - "learning_rate": 1.8959424870685718e-05, - "loss": 1.9031, + "epoch": 0.34778311211522167, + "grad_norm": 5.735757635104397, + "learning_rate": 1.9615478431938104e-05, + "loss": 1.4835, "step": 2463 }, { - "epoch": 0.5166701614594255, - "grad_norm": 5.645935982009144, - "learning_rate": 1.895841909129951e-05, - "loss": 2.1234, + "epoch": 0.34792431516520755, + "grad_norm": 3.716893499583309, + "learning_rate": 1.9615059648291956e-05, + "loss": 1.0032, "step": 2464 }, { - "epoch": 0.5168798490249529, - "grad_norm": 6.355549663504592, - "learning_rate": 1.895741285277878e-05, - "loss": 1.8931, + "epoch": 0.34806551821519344, + "grad_norm": 4.59197907100095, + "learning_rate": 1.9614640641195975e-05, + "loss": 1.0288, "step": 2465 }, { - "epoch": 0.5170895365904802, - "grad_norm": 5.56836460649157, - "learning_rate": 1.895640615517511e-05, - "loss": 2.0302, + "epoch": 0.3482067212651793, + "grad_norm": 3.8502591325942714, + "learning_rate": 1.9614221410659896e-05, + "loss": 1.0686, "step": 2466 }, { - "epoch": 0.5172992241560076, - "grad_norm": 5.867739565581109, - "learning_rate": 1.895539899854009e-05, - "loss": 1.9763, + "epoch": 0.3483479243151652, + "grad_norm": 3.3020466238325143, + "learning_rate": 1.9613801956693463e-05, + "loss": 0.819, "step": 2467 }, { - "epoch": 0.5175089117215349, - "grad_norm": 6.2409512987061175, - "learning_rate": 1.895439138292534e-05, - "loss": 1.9439, + "epoch": 0.3484891273651511, + "grad_norm": 3.3968251428585967, + "learning_rate": 1.9613382279306426e-05, + "loss": 0.8916, "step": 2468 }, { - "epoch": 0.5177185992870623, - "grad_norm": 5.500422630646335, - "learning_rate": 1.89533833083825e-05, - "loss": 2.1511, + "epoch": 0.34863033041513697, + "grad_norm": 4.022462610854897, + "learning_rate": 1.9612962378508534e-05, + "loss": 1.1449, "step": 2469 }, { - "epoch": 0.5179282868525896, - "grad_norm": 6.392000632653926, - "learning_rate": 1.8952374774963238e-05, - "loss": 2.0937, + "epoch": 0.34877153346512285, + "grad_norm": 4.732278945418171, + "learning_rate": 1.961254225430955e-05, + "loss": 1.2111, "step": 2470 }, { - "epoch": 0.518137974418117, - "grad_norm": 5.866132675037486, - "learning_rate": 1.895136578271924e-05, - "loss": 1.9408, + "epoch": 0.34891273651510873, + "grad_norm": 3.334154516381029, + "learning_rate": 1.9612121906719235e-05, + "loss": 0.8986, "step": 2471 }, { - "epoch": 0.5183476619836443, - "grad_norm": 5.646877043591221, - "learning_rate": 1.895035633170222e-05, - "loss": 1.7246, + "epoch": 0.3490539395650946, + "grad_norm": 4.9019649014218505, + "learning_rate": 1.961170133574736e-05, + "loss": 1.3104, "step": 2472 }, { - "epoch": 0.5185573495491718, - "grad_norm": 6.319846113211682, - "learning_rate": 1.8949346421963914e-05, - "loss": 2.0778, + "epoch": 0.3491951426150805, + "grad_norm": 4.0697191906026395, + "learning_rate": 1.9611280541403695e-05, + "loss": 1.1868, "step": 2473 }, { - "epoch": 0.5187670371146991, - "grad_norm": 6.1000946977443755, - "learning_rate": 1.8948336053556085e-05, - "loss": 2.1154, + "epoch": 0.3493363456650664, + "grad_norm": 4.382259789271648, + "learning_rate": 1.961085952369802e-05, + "loss": 1.2189, "step": 2474 }, { - "epoch": 0.5189767246802265, - "grad_norm": 6.201465736094627, - "learning_rate": 1.894732522653051e-05, - "loss": 2.2514, + "epoch": 0.34947754871505227, + "grad_norm": 5.153839945708991, + "learning_rate": 1.961043828264012e-05, + "loss": 1.2231, "step": 2475 }, { - "epoch": 0.5191864122457538, - "grad_norm": 6.156466409315422, - "learning_rate": 1.8946313940939e-05, - "loss": 2.0332, + "epoch": 0.34961875176503815, + "grad_norm": 4.110011128468003, + "learning_rate": 1.9610016818239788e-05, + "loss": 1.1495, "step": 2476 }, { - "epoch": 0.5193960998112812, - "grad_norm": 4.851808363217823, - "learning_rate": 1.8945302196833383e-05, - "loss": 1.7619, + "epoch": 0.349759954815024, + "grad_norm": 4.014970773737679, + "learning_rate": 1.960959513050681e-05, + "loss": 1.2126, "step": 2477 }, { - "epoch": 0.5196057873768085, - "grad_norm": 6.3137014719236255, - "learning_rate": 1.8944289994265516e-05, - "loss": 2.2938, + "epoch": 0.34990115786500986, + "grad_norm": 3.619337629750043, + "learning_rate": 1.9609173219450998e-05, + "loss": 1.0152, "step": 2478 }, { - "epoch": 0.5198154749423359, - "grad_norm": 5.424746886631281, - "learning_rate": 1.8943277333287274e-05, - "loss": 1.8157, + "epoch": 0.35004236091499574, + "grad_norm": 3.6679701293293405, + "learning_rate": 1.9608751085082146e-05, + "loss": 0.9868, "step": 2479 }, { - "epoch": 0.5200251625078632, - "grad_norm": 5.8054338980657745, - "learning_rate": 1.8942264213950553e-05, - "loss": 2.2112, + "epoch": 0.3501835639649816, + "grad_norm": 4.396825149745631, + "learning_rate": 1.960832872741007e-05, + "loss": 1.2014, "step": 2480 }, { - "epoch": 0.5202348500733907, - "grad_norm": 5.623690997076992, - "learning_rate": 1.8941250636307285e-05, - "loss": 2.0334, + "epoch": 0.3503247670149675, + "grad_norm": 4.583431504551717, + "learning_rate": 1.9607906146444582e-05, + "loss": 1.2305, "step": 2481 }, { - "epoch": 0.520444537638918, - "grad_norm": 6.479274120125267, - "learning_rate": 1.8940236600409415e-05, - "loss": 2.2167, + "epoch": 0.3504659700649534, + "grad_norm": 4.378229825511286, + "learning_rate": 1.9607483342195505e-05, + "loss": 1.0974, "step": 2482 }, { - "epoch": 0.5206542252044454, - "grad_norm": 6.763323296402688, - "learning_rate": 1.893922210630891e-05, - "loss": 2.0633, + "epoch": 0.3506071731149393, + "grad_norm": 4.421448249337399, + "learning_rate": 1.9607060314672667e-05, + "loss": 1.1826, "step": 2483 }, { - "epoch": 0.5208639127699728, - "grad_norm": 6.345361107743618, - "learning_rate": 1.893820715405777e-05, - "loss": 1.8266, + "epoch": 0.35074837616492516, + "grad_norm": 4.339944923598533, + "learning_rate": 1.9606637063885892e-05, + "loss": 0.9886, "step": 2484 }, { - "epoch": 0.5210736003355001, - "grad_norm": 5.788027774176796, - "learning_rate": 1.8937191743708007e-05, - "loss": 2.074, + "epoch": 0.35088957921491104, + "grad_norm": 4.20171443855241, + "learning_rate": 1.9606213589845024e-05, + "loss": 1.2929, "step": 2485 }, { - "epoch": 0.5212832879010275, - "grad_norm": 5.214432151138426, - "learning_rate": 1.893617587531167e-05, - "loss": 2.1817, + "epoch": 0.3510307822648969, + "grad_norm": 4.095742315667121, + "learning_rate": 1.9605789892559902e-05, + "loss": 1.0634, "step": 2486 }, { - "epoch": 0.5214929754665548, - "grad_norm": 5.912122837167264, - "learning_rate": 1.893515954892082e-05, - "loss": 2.001, + "epoch": 0.3511719853148828, + "grad_norm": 4.378674881740143, + "learning_rate": 1.9605365972040368e-05, + "loss": 1.0321, "step": 2487 }, { - "epoch": 0.5217026630320822, - "grad_norm": 5.882870610521304, - "learning_rate": 1.8934142764587548e-05, - "loss": 2.025, + "epoch": 0.3513131883648687, + "grad_norm": 3.82355548137674, + "learning_rate": 1.9604941828296275e-05, + "loss": 1.1205, "step": 2488 }, { - "epoch": 0.5219123505976095, - "grad_norm": 5.990650409757526, - "learning_rate": 1.8933125522363957e-05, - "loss": 2.1127, + "epoch": 0.35145439141485457, + "grad_norm": 4.582665505244621, + "learning_rate": 1.9604517461337486e-05, + "loss": 1.1336, "step": 2489 }, { - "epoch": 0.522122038163137, - "grad_norm": 5.870296045274698, - "learning_rate": 1.8932107822302194e-05, - "loss": 2.0589, + "epoch": 0.35159559446484046, + "grad_norm": 3.3613136712063185, + "learning_rate": 1.960409287117386e-05, + "loss": 0.835, "step": 2490 }, { - "epoch": 0.5223317257286643, - "grad_norm": 6.715016531205387, - "learning_rate": 1.8931089664454415e-05, - "loss": 2.4405, + "epoch": 0.35173679751482634, + "grad_norm": 4.080135591133288, + "learning_rate": 1.9603668057815256e-05, + "loss": 1.0141, "step": 2491 }, { - "epoch": 0.5225414132941917, - "grad_norm": 5.982506981343005, - "learning_rate": 1.89300710488728e-05, - "loss": 2.1682, + "epoch": 0.3518780005648122, + "grad_norm": 4.30818591145011, + "learning_rate": 1.9603243021271562e-05, + "loss": 1.2129, "step": 2492 }, { - "epoch": 0.522751100859719, - "grad_norm": 5.131394305041194, - "learning_rate": 1.892905197560955e-05, - "loss": 2.0204, + "epoch": 0.3520192036147981, + "grad_norm": 4.939170149805019, + "learning_rate": 1.9602817761552643e-05, + "loss": 1.2198, "step": 2493 }, { - "epoch": 0.5229607884252464, - "grad_norm": 6.056956846506437, - "learning_rate": 1.8928032444716905e-05, - "loss": 2.1809, + "epoch": 0.352160406664784, + "grad_norm": 3.7132704205976386, + "learning_rate": 1.960239227866839e-05, + "loss": 0.9295, "step": 2494 }, { - "epoch": 0.5231704759907737, - "grad_norm": 5.70724378372189, - "learning_rate": 1.892701245624711e-05, - "loss": 2.4067, + "epoch": 0.3523016097147698, + "grad_norm": 3.8435256272365126, + "learning_rate": 1.9601966572628686e-05, + "loss": 0.9807, "step": 2495 }, { - "epoch": 0.5233801635563011, - "grad_norm": 5.642034277483455, - "learning_rate": 1.8925992010252442e-05, - "loss": 1.9798, + "epoch": 0.3524428127647557, + "grad_norm": 3.6867309441746183, + "learning_rate": 1.9601540643443423e-05, + "loss": 1.0148, "step": 2496 }, { - "epoch": 0.5235898511218284, - "grad_norm": 6.082824402807786, - "learning_rate": 1.8924971106785203e-05, - "loss": 2.2759, + "epoch": 0.3525840158147416, + "grad_norm": 4.393645506878234, + "learning_rate": 1.9601114491122506e-05, + "loss": 1.3695, "step": 2497 }, { - "epoch": 0.5237995386873558, - "grad_norm": 5.99309358311386, - "learning_rate": 1.892394974589772e-05, - "loss": 2.212, + "epoch": 0.35272521886472746, + "grad_norm": 4.22756129761658, + "learning_rate": 1.960068811567583e-05, + "loss": 1.0401, "step": 2498 }, { - "epoch": 0.5240092262528832, - "grad_norm": 5.6779773412157795, - "learning_rate": 1.892292792764233e-05, - "loss": 1.9566, + "epoch": 0.35286642191471335, + "grad_norm": 4.120954476302948, + "learning_rate": 1.9600261517113312e-05, + "loss": 1.1519, "step": 2499 }, { - "epoch": 0.5242189138184106, - "grad_norm": 6.070708602362544, - "learning_rate": 1.8921905652071407e-05, - "loss": 2.1451, + "epoch": 0.35300762496469923, + "grad_norm": 4.009628987003973, + "learning_rate": 1.9599834695444863e-05, + "loss": 1.224, "step": 2500 }, { - "epoch": 0.524428601383938, - "grad_norm": 7.218418417680411, - "learning_rate": 1.892088291923735e-05, - "loss": 2.399, + "epoch": 0.3531488280146851, + "grad_norm": 3.631119025903588, + "learning_rate": 1.9599407650680397e-05, + "loss": 1.0084, "step": 2501 }, { - "epoch": 0.5246382889494653, - "grad_norm": 5.5088831545755745, - "learning_rate": 1.8919859729192564e-05, - "loss": 2.1479, + "epoch": 0.353290031064671, + "grad_norm": 3.3999451652376758, + "learning_rate": 1.9598980382829848e-05, + "loss": 1.1239, "step": 2502 }, { - "epoch": 0.5248479765149927, - "grad_norm": 6.992877515385851, - "learning_rate": 1.89188360819895e-05, - "loss": 2.2418, + "epoch": 0.3534312341146569, + "grad_norm": 4.270937545689775, + "learning_rate": 1.9598552891903136e-05, + "loss": 1.1968, "step": 2503 }, { - "epoch": 0.52505766408052, - "grad_norm": 5.578938026909314, - "learning_rate": 1.891781197768062e-05, - "loss": 2.3097, + "epoch": 0.35357243716464276, + "grad_norm": 3.692335770695556, + "learning_rate": 1.9598125177910204e-05, + "loss": 0.9371, "step": 2504 }, { - "epoch": 0.5252673516460474, - "grad_norm": 5.614939133387399, - "learning_rate": 1.8916787416318404e-05, - "loss": 2.2699, + "epoch": 0.35371364021462864, + "grad_norm": 3.8537915759975303, + "learning_rate": 1.9597697240860983e-05, + "loss": 0.9721, "step": 2505 }, { - "epoch": 0.5254770392115747, - "grad_norm": 5.583994581207203, - "learning_rate": 1.891576239795537e-05, - "loss": 2.3479, + "epoch": 0.3538548432646145, + "grad_norm": 4.318282105536294, + "learning_rate": 1.959726908076543e-05, + "loss": 0.9592, "step": 2506 }, { - "epoch": 0.5256867267771022, - "grad_norm": 5.420076296041309, - "learning_rate": 1.891473692264405e-05, - "loss": 2.26, + "epoch": 0.3539960463146004, + "grad_norm": 4.4525781298293134, + "learning_rate": 1.9596840697633484e-05, + "loss": 1.1773, "step": 2507 }, { - "epoch": 0.5258964143426295, - "grad_norm": 6.786149398370626, - "learning_rate": 1.8913710990437003e-05, - "loss": 2.2815, + "epoch": 0.3541372493645863, + "grad_norm": 3.9137208499398444, + "learning_rate": 1.9596412091475107e-05, + "loss": 0.9661, "step": 2508 }, { - "epoch": 0.5261061019081569, - "grad_norm": 6.437573523645526, - "learning_rate": 1.8912684601386805e-05, - "loss": 2.0489, + "epoch": 0.3542784524145722, + "grad_norm": 4.261246179836905, + "learning_rate": 1.9595983262300253e-05, + "loss": 1.1474, "step": 2509 }, { - "epoch": 0.5263157894736842, - "grad_norm": 5.257089587666138, - "learning_rate": 1.8911657755546063e-05, - "loss": 2.1594, + "epoch": 0.35441965546455806, + "grad_norm": 4.117969827162626, + "learning_rate": 1.9595554210118896e-05, + "loss": 0.8618, "step": 2510 }, { - "epoch": 0.5265254770392116, - "grad_norm": 5.785725256332309, - "learning_rate": 1.8910630452967403e-05, - "loss": 2.29, + "epoch": 0.35456085851454394, + "grad_norm": 3.986497611485309, + "learning_rate": 1.9595124934941002e-05, + "loss": 0.9507, "step": 2511 }, { - "epoch": 0.5267351646047389, - "grad_norm": 5.190953858034122, - "learning_rate": 1.8909602693703482e-05, - "loss": 2.2172, + "epoch": 0.35470206156452977, + "grad_norm": 4.896057161074228, + "learning_rate": 1.9594695436776548e-05, + "loss": 1.2692, "step": 2512 }, { - "epoch": 0.5269448521702663, - "grad_norm": 5.95180831168956, - "learning_rate": 1.890857447780697e-05, - "loss": 1.9096, + "epoch": 0.35484326461451565, + "grad_norm": 4.9254643946988805, + "learning_rate": 1.9594265715635515e-05, + "loss": 1.2371, "step": 2513 }, { - "epoch": 0.5271545397357936, - "grad_norm": 5.793914220672826, - "learning_rate": 1.890754580533056e-05, - "loss": 1.9626, + "epoch": 0.35498446766450154, + "grad_norm": 4.601486289516987, + "learning_rate": 1.9593835771527893e-05, + "loss": 1.245, "step": 2514 }, { - "epoch": 0.527364227301321, - "grad_norm": 5.794222872174821, - "learning_rate": 1.890651667632698e-05, - "loss": 2.1394, + "epoch": 0.3551256707144874, + "grad_norm": 4.811085086261459, + "learning_rate": 1.9593405604463668e-05, + "loss": 1.3297, "step": 2515 }, { - "epoch": 0.5275739148668483, - "grad_norm": 6.324613866456327, - "learning_rate": 1.890548709084897e-05, - "loss": 2.1301, + "epoch": 0.3552668737644733, + "grad_norm": 4.224424165414707, + "learning_rate": 1.9592975214452844e-05, + "loss": 1.119, "step": 2516 }, { - "epoch": 0.5277836024323758, - "grad_norm": 6.13198617558845, - "learning_rate": 1.8904457048949306e-05, - "loss": 2.1908, + "epoch": 0.3554080768144592, + "grad_norm": 4.3187961764729454, + "learning_rate": 1.9592544601505414e-05, + "loss": 1.2035, "step": 2517 }, { - "epoch": 0.5279932899979031, - "grad_norm": 5.349212019707747, - "learning_rate": 1.890342655068077e-05, - "loss": 2.0959, + "epoch": 0.35554927986444507, + "grad_norm": 4.173887095267237, + "learning_rate": 1.959211376563139e-05, + "loss": 1.2194, "step": 2518 }, { - "epoch": 0.5282029775634305, - "grad_norm": 7.0716757372954335, - "learning_rate": 1.8902395596096184e-05, - "loss": 2.1723, + "epoch": 0.35569048291443095, + "grad_norm": 4.000003415608054, + "learning_rate": 1.959168270684079e-05, + "loss": 1.0176, "step": 2519 }, { - "epoch": 0.5284126651289579, - "grad_norm": 7.486544596485488, - "learning_rate": 1.8901364185248382e-05, - "loss": 2.2201, + "epoch": 0.35583168596441683, + "grad_norm": 3.5849309205398687, + "learning_rate": 1.959125142514362e-05, + "loss": 0.971, "step": 2520 }, { - "epoch": 0.5286223526944852, - "grad_norm": 6.06742622916226, - "learning_rate": 1.8900332318190227e-05, - "loss": 2.2142, + "epoch": 0.3559728890144027, + "grad_norm": 3.8655917985040253, + "learning_rate": 1.9590819920549912e-05, + "loss": 1.0038, "step": 2521 }, { - "epoch": 0.5288320402600126, - "grad_norm": 5.625352313015393, - "learning_rate": 1.8899299994974605e-05, - "loss": 2.1964, + "epoch": 0.3561140920643886, + "grad_norm": 3.788802087877977, + "learning_rate": 1.959038819306969e-05, + "loss": 1.0044, "step": 2522 }, { - "epoch": 0.5290417278255399, - "grad_norm": 5.602054241009448, - "learning_rate": 1.8898267215654424e-05, - "loss": 2.0153, + "epoch": 0.3562552951143745, + "grad_norm": 3.9164012052205406, + "learning_rate": 1.958995624271299e-05, + "loss": 1.2183, "step": 2523 }, { - "epoch": 0.5292514153910673, - "grad_norm": 6.299744307991921, - "learning_rate": 1.8897233980282614e-05, - "loss": 2.4547, + "epoch": 0.35639649816436036, + "grad_norm": 4.927990507645713, + "learning_rate": 1.958952406948985e-05, + "loss": 1.1799, "step": 2524 }, { - "epoch": 0.5294611029565947, - "grad_norm": 5.140554876015905, - "learning_rate": 1.889620028891213e-05, - "loss": 1.7746, + "epoch": 0.35653770121434625, + "grad_norm": 4.013795916940915, + "learning_rate": 1.9589091673410306e-05, + "loss": 1.1286, "step": 2525 }, { - "epoch": 0.5296707905221221, - "grad_norm": 6.018089094757581, - "learning_rate": 1.8895166141595952e-05, - "loss": 1.8517, + "epoch": 0.35667890426433213, + "grad_norm": 4.119246315026301, + "learning_rate": 1.9588659054484417e-05, + "loss": 1.205, "step": 2526 }, { - "epoch": 0.5298804780876494, - "grad_norm": 5.270471274648795, - "learning_rate": 1.8894131538387078e-05, - "loss": 2.2218, + "epoch": 0.356820107314318, + "grad_norm": 3.4665389519728422, + "learning_rate": 1.9588226212722233e-05, + "loss": 1.0516, "step": 2527 }, { - "epoch": 0.5300901656531768, - "grad_norm": 5.729871918211228, - "learning_rate": 1.8893096479338543e-05, - "loss": 2.128, + "epoch": 0.3569613103643039, + "grad_norm": 3.6728180128216215, + "learning_rate": 1.958779314813381e-05, + "loss": 1.0172, "step": 2528 }, { - "epoch": 0.5302998532187041, - "grad_norm": 6.423095525940002, - "learning_rate": 1.8892060964503385e-05, - "loss": 2.3252, + "epoch": 0.3571025134142897, + "grad_norm": 4.353478614843127, + "learning_rate": 1.958735986072922e-05, + "loss": 1.1668, "step": 2529 }, { - "epoch": 0.5305095407842315, - "grad_norm": 5.639575883968387, - "learning_rate": 1.889102499393468e-05, - "loss": 2.1844, + "epoch": 0.3572437164642756, + "grad_norm": 4.296046552366659, + "learning_rate": 1.9586926350518522e-05, + "loss": 1.1394, "step": 2530 }, { - "epoch": 0.5307192283497588, - "grad_norm": 5.914693416864942, - "learning_rate": 1.8889988567685528e-05, - "loss": 1.7898, + "epoch": 0.3573849195142615, + "grad_norm": 4.100489280404129, + "learning_rate": 1.9586492617511797e-05, + "loss": 1.1776, "step": 2531 }, { - "epoch": 0.5309289159152862, - "grad_norm": 5.907057379136445, - "learning_rate": 1.8888951685809036e-05, - "loss": 1.9841, + "epoch": 0.3575261225642474, + "grad_norm": 3.898626603361294, + "learning_rate": 1.9586058661719124e-05, + "loss": 1.2663, "step": 2532 }, { - "epoch": 0.5311386034808135, - "grad_norm": 6.005674083766735, - "learning_rate": 1.8887914348358357e-05, - "loss": 1.7599, + "epoch": 0.35766732561423326, + "grad_norm": 3.642687197006916, + "learning_rate": 1.9585624483150588e-05, + "loss": 0.8974, "step": 2533 }, { - "epoch": 0.531348291046341, - "grad_norm": 6.144101867538987, - "learning_rate": 1.8886876555386654e-05, - "loss": 1.8793, + "epoch": 0.35780852866421914, + "grad_norm": 6.302146407584119, + "learning_rate": 1.9585190081816277e-05, + "loss": 1.0959, "step": 2534 }, { - "epoch": 0.5315579786118683, - "grad_norm": 5.836397378552884, - "learning_rate": 1.888583830694711e-05, - "loss": 1.8599, + "epoch": 0.357949731714205, + "grad_norm": 3.4604169743002955, + "learning_rate": 1.958475545772629e-05, + "loss": 0.796, "step": 2535 }, { - "epoch": 0.5317676661773957, - "grad_norm": 5.707268278185482, - "learning_rate": 1.8884799603092944e-05, - "loss": 1.9251, + "epoch": 0.3580909347641909, + "grad_norm": 3.742509145968615, + "learning_rate": 1.958432061089073e-05, + "loss": 0.867, "step": 2536 }, { - "epoch": 0.5319773537429231, - "grad_norm": 5.409215441129544, - "learning_rate": 1.8883760443877385e-05, - "loss": 2.0358, + "epoch": 0.3582321378141768, + "grad_norm": 5.142661899346792, + "learning_rate": 1.958388554131969e-05, + "loss": 1.367, "step": 2537 }, { - "epoch": 0.5321870413084504, - "grad_norm": 5.782022069269918, - "learning_rate": 1.88827208293537e-05, - "loss": 2.356, + "epoch": 0.35837334086416267, + "grad_norm": 5.111679361334132, + "learning_rate": 1.9583450249023292e-05, + "loss": 1.4026, "step": 2538 }, { - "epoch": 0.5323967288739778, - "grad_norm": 6.025708045357777, - "learning_rate": 1.8881680759575164e-05, - "loss": 2.1097, + "epoch": 0.35851454391414855, + "grad_norm": 4.567735102027591, + "learning_rate": 1.958301473401165e-05, + "loss": 1.1452, "step": 2539 }, { - "epoch": 0.5326064164395051, - "grad_norm": 6.113074775414307, - "learning_rate": 1.888064023459508e-05, - "loss": 2.0383, + "epoch": 0.35865574696413444, + "grad_norm": 3.5906392011566757, + "learning_rate": 1.9582578996294882e-05, + "loss": 0.8824, "step": 2540 }, { - "epoch": 0.5328161040050325, - "grad_norm": 6.874447921239057, - "learning_rate": 1.887959925446679e-05, - "loss": 1.8064, + "epoch": 0.3587969500141203, + "grad_norm": 4.614562551359501, + "learning_rate": 1.958214303588312e-05, + "loss": 1.1862, "step": 2541 }, { - "epoch": 0.5330257915705598, - "grad_norm": 6.974773252832469, - "learning_rate": 1.887855781924363e-05, - "loss": 2.2379, + "epoch": 0.3589381530641062, + "grad_norm": 4.6707418988975675, + "learning_rate": 1.9581706852786492e-05, + "loss": 1.1986, "step": 2542 }, { - "epoch": 0.5332354791360873, - "grad_norm": 7.143128441944039, - "learning_rate": 1.8877515928978988e-05, - "loss": 1.6974, + "epoch": 0.3590793561140921, + "grad_norm": 3.6527111195083934, + "learning_rate": 1.958127044701513e-05, + "loss": 0.9403, "step": 2543 }, { - "epoch": 0.5334451667016146, - "grad_norm": 5.506020343500608, - "learning_rate": 1.8876473583726255e-05, - "loss": 1.9358, + "epoch": 0.35922055916407797, + "grad_norm": 3.930119969334276, + "learning_rate": 1.9580833818579184e-05, + "loss": 1.067, "step": 2544 }, { - "epoch": 0.533654854267142, - "grad_norm": 5.295286436205329, - "learning_rate": 1.8875430783538854e-05, - "loss": 1.4723, + "epoch": 0.35936176221406385, + "grad_norm": 4.699400713506971, + "learning_rate": 1.9580396967488797e-05, + "loss": 1.1573, "step": 2545 }, { - "epoch": 0.5338645418326693, - "grad_norm": 6.818605675315057, - "learning_rate": 1.887438752847023e-05, - "loss": 2.1726, + "epoch": 0.3595029652640497, + "grad_norm": 4.143347786517, + "learning_rate": 1.9579959893754123e-05, + "loss": 1.1486, "step": 2546 }, { - "epoch": 0.5340742293981967, - "grad_norm": 7.437139298754355, - "learning_rate": 1.887334381857386e-05, - "loss": 2.3991, + "epoch": 0.35964416831403556, + "grad_norm": 4.346619719095544, + "learning_rate": 1.9579522597385315e-05, + "loss": 1.1658, "step": 2547 }, { - "epoch": 0.534283916963724, - "grad_norm": 6.6226961714605705, - "learning_rate": 1.8872299653903224e-05, - "loss": 2.2787, + "epoch": 0.35978537136402144, + "grad_norm": 3.9050640348692705, + "learning_rate": 1.9579085078392543e-05, + "loss": 1.1804, "step": 2548 }, { - "epoch": 0.5344936045292514, - "grad_norm": 5.920636651180079, - "learning_rate": 1.8871255034511843e-05, - "loss": 2.1613, + "epoch": 0.3599265744140073, + "grad_norm": 4.029578756898514, + "learning_rate": 1.957864733678597e-05, + "loss": 1.1095, "step": 2549 }, { - "epoch": 0.5347032920947787, - "grad_norm": 5.704353782310128, - "learning_rate": 1.8870209960453257e-05, - "loss": 2.2763, + "epoch": 0.3600677774639932, + "grad_norm": 4.289717306795405, + "learning_rate": 1.9578209372575766e-05, + "loss": 1.2339, "step": 2550 }, { - "epoch": 0.5349129796603062, - "grad_norm": 6.365168925361749, - "learning_rate": 1.8869164431781025e-05, - "loss": 2.2851, + "epoch": 0.3602089805139791, + "grad_norm": 4.273513100374371, + "learning_rate": 1.9577771185772118e-05, + "loss": 1.1185, "step": 2551 }, { - "epoch": 0.5351226672258335, - "grad_norm": 6.852482383812198, - "learning_rate": 1.8868118448548732e-05, - "loss": 1.6981, + "epoch": 0.360350183563965, + "grad_norm": 4.848127197146267, + "learning_rate": 1.95773327763852e-05, + "loss": 1.0992, "step": 2552 }, { - "epoch": 0.5353323547913609, - "grad_norm": 5.341167673368236, - "learning_rate": 1.886707201080999e-05, - "loss": 1.9602, + "epoch": 0.36049138661395086, + "grad_norm": 5.602086404859899, + "learning_rate": 1.957689414442521e-05, + "loss": 1.2741, "step": 2553 }, { - "epoch": 0.5355420423568882, - "grad_norm": 6.580285107003099, - "learning_rate": 1.8866025118618427e-05, - "loss": 2.1814, + "epoch": 0.36063258966393674, + "grad_norm": 4.350079135573795, + "learning_rate": 1.9576455289902327e-05, + "loss": 1.0776, "step": 2554 }, { - "epoch": 0.5357517299224156, - "grad_norm": 6.737933866419712, - "learning_rate": 1.88649777720277e-05, - "loss": 1.9446, + "epoch": 0.3607737927139226, + "grad_norm": 3.527970556189663, + "learning_rate": 1.9576016212826766e-05, + "loss": 0.9498, "step": 2555 }, { - "epoch": 0.535961417487943, - "grad_norm": 6.009354519815355, - "learning_rate": 1.8863929971091483e-05, - "loss": 1.897, + "epoch": 0.3609149957639085, + "grad_norm": 4.241281288615647, + "learning_rate": 1.9575576913208718e-05, + "loss": 1.049, "step": 2556 }, { - "epoch": 0.5361711050534703, - "grad_norm": 6.164549947304244, - "learning_rate": 1.8862881715863485e-05, - "loss": 2.2016, + "epoch": 0.3610561988138944, + "grad_norm": 4.402847496567008, + "learning_rate": 1.9575137391058404e-05, + "loss": 1.0306, "step": 2557 }, { - "epoch": 0.5363807926189977, - "grad_norm": 5.545686855804167, - "learning_rate": 1.8861833006397426e-05, - "loss": 2.0923, + "epoch": 0.3611974018638803, + "grad_norm": 4.239163576663033, + "learning_rate": 1.9574697646386027e-05, + "loss": 1.1279, "step": 2558 }, { - "epoch": 0.536590480184525, - "grad_norm": 6.488597114632481, - "learning_rate": 1.8860783842747056e-05, - "loss": 2.1825, + "epoch": 0.36133860491386616, + "grad_norm": 3.6269966704917787, + "learning_rate": 1.9574257679201814e-05, + "loss": 0.7925, "step": 2559 }, { - "epoch": 0.5368001677500525, - "grad_norm": 5.813140867138001, - "learning_rate": 1.8859734224966145e-05, - "loss": 2.447, + "epoch": 0.36147980796385204, + "grad_norm": 4.064863474621262, + "learning_rate": 1.957381748951599e-05, + "loss": 1.0842, "step": 2560 }, { - "epoch": 0.5370098553155798, - "grad_norm": 6.04901192822802, - "learning_rate": 1.885868415310849e-05, - "loss": 2.2761, + "epoch": 0.3616210110138379, + "grad_norm": 3.963808849874696, + "learning_rate": 1.957337707733878e-05, + "loss": 1.0652, "step": 2561 }, { - "epoch": 0.5372195428811072, - "grad_norm": 6.045359823819872, - "learning_rate": 1.8857633627227905e-05, - "loss": 2.1769, + "epoch": 0.3617622140638238, + "grad_norm": 4.756840585174615, + "learning_rate": 1.9572936442680417e-05, + "loss": 1.1014, "step": 2562 }, { - "epoch": 0.5374292304466345, - "grad_norm": 5.5660647245674095, - "learning_rate": 1.8856582647378238e-05, - "loss": 2.0056, + "epoch": 0.36190341711380963, + "grad_norm": 5.005031496742946, + "learning_rate": 1.957249558555115e-05, + "loss": 1.3319, "step": 2563 }, { - "epoch": 0.5376389180121619, - "grad_norm": 6.22577447568348, - "learning_rate": 1.8855531213613348e-05, - "loss": 2.0116, + "epoch": 0.3620446201637955, + "grad_norm": 4.341396189894744, + "learning_rate": 1.957205450596122e-05, + "loss": 1.0208, "step": 2564 }, { - "epoch": 0.5378486055776892, - "grad_norm": 5.549439006043563, - "learning_rate": 1.885447932598712e-05, - "loss": 2.0624, + "epoch": 0.3621858232137814, + "grad_norm": 4.656639418326444, + "learning_rate": 1.9571613203920874e-05, + "loss": 1.1131, "step": 2565 }, { - "epoch": 0.5380582931432166, - "grad_norm": 6.066944574652426, - "learning_rate": 1.8853426984553472e-05, - "loss": 2.0386, + "epoch": 0.3623270262637673, + "grad_norm": 6.44905363207047, + "learning_rate": 1.9571171679440374e-05, + "loss": 1.2414, "step": 2566 }, { - "epoch": 0.5382679807087439, - "grad_norm": 6.04419168859851, - "learning_rate": 1.8852374189366336e-05, - "loss": 2.0921, + "epoch": 0.36246822931375317, + "grad_norm": 4.048724455435908, + "learning_rate": 1.9570729932529974e-05, + "loss": 0.9768, "step": 2567 }, { - "epoch": 0.5384776682742713, - "grad_norm": 5.606980320856114, - "learning_rate": 1.885132094047967e-05, - "loss": 2.1022, + "epoch": 0.36260943236373905, + "grad_norm": 4.011691898365749, + "learning_rate": 1.9570287963199947e-05, + "loss": 1.0937, "step": 2568 }, { - "epoch": 0.5386873558397987, - "grad_norm": 5.996642453563527, - "learning_rate": 1.885026723794745e-05, - "loss": 1.671, + "epoch": 0.36275063541372493, + "grad_norm": 3.9083180934002293, + "learning_rate": 1.956984577146056e-05, + "loss": 0.9612, "step": 2569 }, { - "epoch": 0.5388970434053261, - "grad_norm": 5.453699393885708, - "learning_rate": 1.884921308182369e-05, - "loss": 1.928, + "epoch": 0.3628918384637108, + "grad_norm": 4.689141875202764, + "learning_rate": 1.956940335732209e-05, + "loss": 1.1409, "step": 2570 }, { - "epoch": 0.5391067309708534, - "grad_norm": 6.017831150741545, - "learning_rate": 1.8848158472162408e-05, - "loss": 1.9925, + "epoch": 0.3630330415136967, + "grad_norm": 4.992250499224746, + "learning_rate": 1.956896072079482e-05, + "loss": 1.3679, "step": 2571 }, { - "epoch": 0.5393164185363808, - "grad_norm": 5.019441325591909, - "learning_rate": 1.8847103409017658e-05, - "loss": 1.5908, + "epoch": 0.3631742445636826, + "grad_norm": 3.666532753766148, + "learning_rate": 1.9568517861889035e-05, + "loss": 1.0293, "step": 2572 }, { - "epoch": 0.5395261061019082, - "grad_norm": 7.434357621169412, - "learning_rate": 1.8846047892443513e-05, - "loss": 2.2383, + "epoch": 0.36331544761366846, + "grad_norm": 4.994677846394718, + "learning_rate": 1.9568074780615026e-05, + "loss": 0.9653, "step": 2573 }, { - "epoch": 0.5397357936674355, - "grad_norm": 5.826370864395905, - "learning_rate": 1.884499192249407e-05, - "loss": 2.0631, + "epoch": 0.36345665066365435, + "grad_norm": 4.320697678110346, + "learning_rate": 1.9567631476983088e-05, + "loss": 1.1125, "step": 2574 }, { - "epoch": 0.5399454812329629, - "grad_norm": 6.2153723496133235, - "learning_rate": 1.8843935499223455e-05, - "loss": 1.7688, + "epoch": 0.36359785371364023, + "grad_norm": 4.166960567512201, + "learning_rate": 1.9567187951003533e-05, + "loss": 1.0624, "step": 2575 }, { - "epoch": 0.5401551687984902, - "grad_norm": 6.066741173415851, - "learning_rate": 1.8842878622685804e-05, - "loss": 1.7406, + "epoch": 0.3637390567636261, + "grad_norm": 4.226767354323393, + "learning_rate": 1.9566744202686657e-05, + "loss": 1.0811, "step": 2576 }, { - "epoch": 0.5403648563640177, - "grad_norm": 5.462624577483679, - "learning_rate": 1.8841821292935286e-05, - "loss": 1.9572, + "epoch": 0.363880259813612, + "grad_norm": 4.176141665471873, + "learning_rate": 1.9566300232042778e-05, + "loss": 1.0234, "step": 2577 }, { - "epoch": 0.540574543929545, - "grad_norm": 5.9342953808306165, - "learning_rate": 1.884076351002609e-05, - "loss": 2.0587, + "epoch": 0.3640214628635979, + "grad_norm": 4.677846483539329, + "learning_rate": 1.9565856039082213e-05, + "loss": 1.194, "step": 2578 }, { - "epoch": 0.5407842314950724, - "grad_norm": 7.314594636167027, - "learning_rate": 1.8839705274012433e-05, - "loss": 2.2103, + "epoch": 0.36416266591358376, + "grad_norm": 3.7442466210235854, + "learning_rate": 1.9565411623815287e-05, + "loss": 0.9986, "step": 2579 }, { - "epoch": 0.5409939190605997, - "grad_norm": 6.320814660898045, - "learning_rate": 1.8838646584948548e-05, - "loss": 1.9895, + "epoch": 0.3643038689635696, + "grad_norm": 4.213189036260175, + "learning_rate": 1.9564966986252326e-05, + "loss": 1.075, "step": 2580 }, { - "epoch": 0.5412036066261271, - "grad_norm": 6.3041781940040185, - "learning_rate": 1.8837587442888695e-05, - "loss": 1.9651, + "epoch": 0.36444507201355547, + "grad_norm": 4.168082144709891, + "learning_rate": 1.956452212640366e-05, + "loss": 1.087, "step": 2581 }, { - "epoch": 0.5414132941916544, - "grad_norm": 6.076140858127466, - "learning_rate": 1.8836527847887155e-05, - "loss": 2.2243, + "epoch": 0.36458627506354135, + "grad_norm": 3.6924693608572676, + "learning_rate": 1.956407704427963e-05, + "loss": 0.9708, "step": 2582 }, { - "epoch": 0.5416229817571818, - "grad_norm": 6.609548119353039, - "learning_rate": 1.8835467799998244e-05, - "loss": 1.7389, + "epoch": 0.36472747811352724, + "grad_norm": 5.398897823996847, + "learning_rate": 1.9563631739890586e-05, + "loss": 1.2565, "step": 2583 }, { - "epoch": 0.5418326693227091, - "grad_norm": 5.645032221596158, - "learning_rate": 1.883440729927628e-05, - "loss": 2.2814, + "epoch": 0.3648686811635131, + "grad_norm": 3.9430816585498403, + "learning_rate": 1.9563186213246864e-05, + "loss": 1.138, "step": 2584 }, { - "epoch": 0.5420423568882365, - "grad_norm": 6.265513245942741, - "learning_rate": 1.8833346345775618e-05, - "loss": 1.6286, + "epoch": 0.365009884213499, + "grad_norm": 4.640374438413612, + "learning_rate": 1.9562740464358828e-05, + "loss": 1.2093, "step": 2585 }, { - "epoch": 0.5422520444537638, - "grad_norm": 6.889547940581719, - "learning_rate": 1.8832284939550635e-05, - "loss": 2.1864, + "epoch": 0.3651510872634849, + "grad_norm": 3.6057963344897535, + "learning_rate": 1.9562294493236834e-05, + "loss": 1.0812, "step": 2586 }, { - "epoch": 0.5424617320192913, - "grad_norm": 6.046332804758267, - "learning_rate": 1.883122308065573e-05, - "loss": 1.932, + "epoch": 0.36529229031347077, + "grad_norm": 4.841710827043427, + "learning_rate": 1.9561848299891243e-05, + "loss": 1.105, "step": 2587 }, { - "epoch": 0.5426714195848186, - "grad_norm": 7.528004394604743, - "learning_rate": 1.8830160769145325e-05, - "loss": 2.2459, + "epoch": 0.36543349336345665, + "grad_norm": 4.159329379608884, + "learning_rate": 1.9561401884332424e-05, + "loss": 1.0502, "step": 2588 }, { - "epoch": 0.542881107150346, - "grad_norm": 5.8426351049485525, - "learning_rate": 1.882909800507387e-05, - "loss": 2.312, + "epoch": 0.36557469641344253, + "grad_norm": 3.4732234623733, + "learning_rate": 1.956095524657076e-05, + "loss": 0.7829, "step": 2589 }, { - "epoch": 0.5430907947158734, - "grad_norm": 6.574870114972231, - "learning_rate": 1.8828034788495825e-05, - "loss": 2.2739, + "epoch": 0.3657158994634284, + "grad_norm": 3.989978056712313, + "learning_rate": 1.9560508386616624e-05, + "loss": 1.3175, "step": 2590 }, { - "epoch": 0.5433004822814007, - "grad_norm": 6.476936864156679, - "learning_rate": 1.8826971119465685e-05, - "loss": 2.1414, + "epoch": 0.3658571025134143, + "grad_norm": 4.562216225358608, + "learning_rate": 1.95600613044804e-05, + "loss": 1.0369, "step": 2591 }, { - "epoch": 0.5435101698469281, - "grad_norm": 6.444448301870777, - "learning_rate": 1.8825906998037968e-05, - "loss": 1.9769, + "epoch": 0.3659983055634002, + "grad_norm": 3.758237079390016, + "learning_rate": 1.9559614000172483e-05, + "loss": 0.918, "step": 2592 }, { - "epoch": 0.5437198574124554, - "grad_norm": 5.944910416967087, - "learning_rate": 1.882484242426721e-05, - "loss": 2.1644, + "epoch": 0.36613950861338607, + "grad_norm": 3.9128640295803243, + "learning_rate": 1.9559166473703265e-05, + "loss": 0.952, "step": 2593 }, { - "epoch": 0.5439295449779828, - "grad_norm": 6.216191310254095, - "learning_rate": 1.8823777398207967e-05, - "loss": 1.9783, + "epoch": 0.36628071166337195, + "grad_norm": 4.297107498774017, + "learning_rate": 1.9558718725083143e-05, + "loss": 1.1633, "step": 2594 }, { - "epoch": 0.5441392325435102, - "grad_norm": 6.232777991800609, - "learning_rate": 1.8822711919914832e-05, - "loss": 2.0438, + "epoch": 0.36642191471335783, + "grad_norm": 5.802336557122011, + "learning_rate": 1.9558270754322528e-05, + "loss": 1.4975, "step": 2595 }, { - "epoch": 0.5443489201090376, - "grad_norm": 6.796762743384618, - "learning_rate": 1.882164598944241e-05, - "loss": 2.323, + "epoch": 0.3665631177633437, + "grad_norm": 3.9028995835946576, + "learning_rate": 1.955782256143183e-05, + "loss": 1.039, "step": 2596 }, { - "epoch": 0.5445586076745649, - "grad_norm": 6.0599533602428375, - "learning_rate": 1.882057960684533e-05, - "loss": 1.8952, + "epoch": 0.36670432081332954, + "grad_norm": 3.1703449733640925, + "learning_rate": 1.9557374146421462e-05, + "loss": 0.7915, "step": 2597 }, { - "epoch": 0.5447682952400923, - "grad_norm": 5.54785777187151, - "learning_rate": 1.8819512772178245e-05, - "loss": 1.7238, + "epoch": 0.3668455238633154, + "grad_norm": 4.295446399763511, + "learning_rate": 1.9556925509301844e-05, + "loss": 1.258, "step": 2598 }, { - "epoch": 0.5449779828056196, - "grad_norm": 4.915630012202421, - "learning_rate": 1.8818445485495832e-05, - "loss": 1.8832, + "epoch": 0.3669867269133013, + "grad_norm": 4.116456735944132, + "learning_rate": 1.9556476650083407e-05, + "loss": 1.2132, "step": 2599 }, { - "epoch": 0.545187670371147, - "grad_norm": 7.059105383533619, - "learning_rate": 1.88173777468528e-05, - "loss": 2.1849, + "epoch": 0.3671279299632872, + "grad_norm": 3.972161913324547, + "learning_rate": 1.9556027568776577e-05, + "loss": 1.1161, "step": 2600 }, { - "epoch": 0.5453973579366743, - "grad_norm": 5.337910178951622, - "learning_rate": 1.881630955630386e-05, - "loss": 2.0503, + "epoch": 0.3672691330132731, + "grad_norm": 7.778899117627368, + "learning_rate": 1.9555578265391797e-05, + "loss": 1.0107, "step": 2601 }, { - "epoch": 0.5456070455022017, - "grad_norm": 6.126488900739763, - "learning_rate": 1.8815240913903764e-05, - "loss": 1.9927, + "epoch": 0.36741033606325896, + "grad_norm": 3.773900012317438, + "learning_rate": 1.9555128739939504e-05, + "loss": 1.056, "step": 2602 }, { - "epoch": 0.545816733067729, - "grad_norm": 6.962111979280045, - "learning_rate": 1.8814171819707282e-05, - "loss": 1.5928, + "epoch": 0.36755153911324484, + "grad_norm": 4.70357587726098, + "learning_rate": 1.9554678992430145e-05, + "loss": 1.222, "step": 2603 }, { - "epoch": 0.5460264206332565, - "grad_norm": 6.510655898069793, - "learning_rate": 1.881310227376921e-05, - "loss": 2.0686, + "epoch": 0.3676927421632307, + "grad_norm": 4.455645668662674, + "learning_rate": 1.9554229022874175e-05, + "loss": 1.0587, "step": 2604 }, { - "epoch": 0.5462361081987838, - "grad_norm": 7.140428743075681, - "learning_rate": 1.881203227614436e-05, - "loss": 2.0624, + "epoch": 0.3678339452132166, + "grad_norm": 3.864786247438436, + "learning_rate": 1.9553778831282043e-05, + "loss": 1.2616, "step": 2605 }, { - "epoch": 0.5464457957643112, - "grad_norm": 7.032297272042619, - "learning_rate": 1.881096182688757e-05, - "loss": 2.3407, + "epoch": 0.3679751482632025, + "grad_norm": 4.404179756595742, + "learning_rate": 1.9553328417664223e-05, + "loss": 1.2918, "step": 2606 }, { - "epoch": 0.5466554833298385, - "grad_norm": 6.566799320265648, - "learning_rate": 1.8809890926053705e-05, - "loss": 2.3214, + "epoch": 0.3681163513131884, + "grad_norm": 3.732342746674166, + "learning_rate": 1.9552877782031172e-05, + "loss": 0.9101, "step": 2607 }, { - "epoch": 0.5468651708953659, - "grad_norm": 5.705523634143453, - "learning_rate": 1.880881957369765e-05, - "loss": 1.997, + "epoch": 0.36825755436317426, + "grad_norm": 3.6330542339800393, + "learning_rate": 1.9552426924393368e-05, + "loss": 1.1368, "step": 2608 }, { - "epoch": 0.5470748584608933, - "grad_norm": 6.197984985801809, - "learning_rate": 1.8807747769874315e-05, - "loss": 1.9006, + "epoch": 0.36839875741316014, + "grad_norm": 3.8957792385974126, + "learning_rate": 1.955197584476129e-05, + "loss": 0.9992, "step": 2609 }, { - "epoch": 0.5472845460264206, - "grad_norm": 6.155445026431149, - "learning_rate": 1.880667551463863e-05, - "loss": 2.1163, + "epoch": 0.368539960463146, + "grad_norm": 4.755621887955596, + "learning_rate": 1.9551524543145417e-05, + "loss": 1.1568, "step": 2610 }, { - "epoch": 0.547494233591948, - "grad_norm": 6.627898538936987, - "learning_rate": 1.880560280804555e-05, - "loss": 2.2132, + "epoch": 0.3686811635131319, + "grad_norm": 3.6228377694934077, + "learning_rate": 1.955107301955624e-05, + "loss": 0.9163, "step": 2611 }, { - "epoch": 0.5477039211574753, - "grad_norm": 6.441122380762986, - "learning_rate": 1.8804529650150054e-05, - "loss": 2.3559, + "epoch": 0.3688223665631178, + "grad_norm": 3.7523259538633744, + "learning_rate": 1.9550621274004248e-05, + "loss": 1.082, "step": 2612 }, { - "epoch": 0.5479136087230028, - "grad_norm": 6.523963593210011, - "learning_rate": 1.8803456041007142e-05, - "loss": 2.2058, + "epoch": 0.36896356961310367, + "grad_norm": 4.395839233243182, + "learning_rate": 1.9550169306499942e-05, + "loss": 1.2092, "step": 2613 }, { - "epoch": 0.5481232962885301, - "grad_norm": 6.090534268779164, - "learning_rate": 1.880238198067184e-05, - "loss": 1.7876, + "epoch": 0.3691047726630895, + "grad_norm": 3.7344739937623537, + "learning_rate": 1.9549717117053828e-05, + "loss": 0.9991, "step": 2614 }, { - "epoch": 0.5483329838540575, - "grad_norm": 5.70892252025577, - "learning_rate": 1.880130746919919e-05, - "loss": 2.3092, + "epoch": 0.3692459757130754, + "grad_norm": 3.3198120065949817, + "learning_rate": 1.954926470567641e-05, + "loss": 0.7808, "step": 2615 }, { - "epoch": 0.5485426714195848, - "grad_norm": 5.975602913938529, - "learning_rate": 1.8800232506644274e-05, - "loss": 2.0855, + "epoch": 0.36938717876306126, + "grad_norm": 3.8017412632461256, + "learning_rate": 1.9548812072378208e-05, + "loss": 1.0538, "step": 2616 }, { - "epoch": 0.5487523589851122, - "grad_norm": 5.181952961539222, - "learning_rate": 1.879915709306218e-05, - "loss": 2.0363, + "epoch": 0.36952838181304715, + "grad_norm": 4.015647756324, + "learning_rate": 1.9548359217169732e-05, + "loss": 1.0941, "step": 2617 }, { - "epoch": 0.5489620465506395, - "grad_norm": 5.957147466276341, - "learning_rate": 1.879808122850802e-05, - "loss": 2.2269, + "epoch": 0.36966958486303303, + "grad_norm": 4.309437839698541, + "learning_rate": 1.9547906140061515e-05, + "loss": 1.0449, "step": 2618 }, { - "epoch": 0.5491717341161669, - "grad_norm": 5.70177242393609, - "learning_rate": 1.8797004913036937e-05, - "loss": 2.1188, + "epoch": 0.3698107879130189, + "grad_norm": 4.255473609926496, + "learning_rate": 1.9547452841064083e-05, + "loss": 1.1466, "step": 2619 }, { - "epoch": 0.5493814216816942, - "grad_norm": 6.0469326749966354, - "learning_rate": 1.879592814670409e-05, - "loss": 2.088, + "epoch": 0.3699519909630048, + "grad_norm": 4.757163261565678, + "learning_rate": 1.9546999320187966e-05, + "loss": 0.8497, "step": 2620 }, { - "epoch": 0.5495911092472217, - "grad_norm": 5.603066859220233, - "learning_rate": 1.879485092956468e-05, - "loss": 2.2123, + "epoch": 0.3700931940129907, + "grad_norm": 4.03713648622002, + "learning_rate": 1.9546545577443715e-05, + "loss": 0.901, "step": 2621 }, { - "epoch": 0.549800796812749, - "grad_norm": 6.684463330144917, - "learning_rate": 1.8793773261673898e-05, - "loss": 2.0154, + "epoch": 0.37023439706297656, + "grad_norm": 3.795108762358568, + "learning_rate": 1.954609161284186e-05, + "loss": 1.1363, "step": 2622 }, { - "epoch": 0.5500104843782764, - "grad_norm": 6.051459721069936, - "learning_rate": 1.879269514308699e-05, - "loss": 1.9838, + "epoch": 0.37037560011296244, + "grad_norm": 3.9216056360652853, + "learning_rate": 1.9545637426392966e-05, + "loss": 1.1115, "step": 2623 }, { - "epoch": 0.5502201719438037, - "grad_norm": 6.504314365555888, - "learning_rate": 1.87916165738592e-05, - "loss": 1.9246, + "epoch": 0.3705168031629483, + "grad_norm": 4.842391888666367, + "learning_rate": 1.9545183018107576e-05, + "loss": 1.2379, "step": 2624 }, { - "epoch": 0.5504298595093311, - "grad_norm": 5.335514199646772, - "learning_rate": 1.8790537554045816e-05, - "loss": 1.9697, + "epoch": 0.3706580062129342, + "grad_norm": 5.09348271062579, + "learning_rate": 1.9544728387996255e-05, + "loss": 1.2357, "step": 2625 }, { - "epoch": 0.5506395470748585, - "grad_norm": 6.466563052930602, - "learning_rate": 1.8789458083702134e-05, - "loss": 2.1781, + "epoch": 0.3707992092629201, + "grad_norm": 4.25666053926506, + "learning_rate": 1.9544273536069573e-05, + "loss": 1.0311, "step": 2626 }, { - "epoch": 0.5508492346403858, - "grad_norm": 4.991162315360214, - "learning_rate": 1.878837816288348e-05, - "loss": 1.9836, + "epoch": 0.370940412312906, + "grad_norm": 3.7601192077035965, + "learning_rate": 1.9543818462338088e-05, + "loss": 0.9642, "step": 2627 }, { - "epoch": 0.5510589222059132, - "grad_norm": 5.530390858435121, - "learning_rate": 1.8787297791645206e-05, - "loss": 2.1867, + "epoch": 0.37108161536289186, + "grad_norm": 3.586899463771343, + "learning_rate": 1.9543363166812387e-05, + "loss": 1.003, "step": 2628 }, { - "epoch": 0.5512686097714405, - "grad_norm": 6.0951658650466936, - "learning_rate": 1.8786216970042677e-05, - "loss": 2.4288, + "epoch": 0.37122281841287774, + "grad_norm": 4.016519845216909, + "learning_rate": 1.954290764950305e-05, + "loss": 1.1244, "step": 2629 }, { - "epoch": 0.551478297336968, - "grad_norm": 6.146414981998222, - "learning_rate": 1.8785135698131287e-05, - "loss": 1.9042, + "epoch": 0.3713640214628636, + "grad_norm": 4.197927373014517, + "learning_rate": 1.9542451910420655e-05, + "loss": 1.0522, "step": 2630 }, { - "epoch": 0.5516879849024953, - "grad_norm": 6.043900892902211, - "learning_rate": 1.878405397596646e-05, - "loss": 2.0916, + "epoch": 0.37150522451284945, + "grad_norm": 4.28277985995062, + "learning_rate": 1.9541995949575806e-05, + "loss": 0.9575, "step": 2631 }, { - "epoch": 0.5518976724680227, - "grad_norm": 5.493283793183004, - "learning_rate": 1.878297180360363e-05, - "loss": 1.72, + "epoch": 0.37164642756283534, + "grad_norm": 3.853798088707334, + "learning_rate": 1.9541539766979087e-05, + "loss": 0.9294, "step": 2632 }, { - "epoch": 0.55210736003355, - "grad_norm": 6.523742918756957, - "learning_rate": 1.878188918109826e-05, - "loss": 2.1661, + "epoch": 0.3717876306128212, + "grad_norm": 3.5723717464735714, + "learning_rate": 1.9541083362641105e-05, + "loss": 0.9443, "step": 2633 }, { - "epoch": 0.5523170475990774, - "grad_norm": 6.268987761913348, - "learning_rate": 1.8780806108505843e-05, - "loss": 1.6766, + "epoch": 0.3719288336628071, + "grad_norm": 4.3969911517167874, + "learning_rate": 1.954062673657247e-05, + "loss": 0.9765, "step": 2634 }, { - "epoch": 0.5525267351646047, - "grad_norm": 7.081458621037962, - "learning_rate": 1.8779722585881877e-05, - "loss": 2.1916, + "epoch": 0.372070036712793, + "grad_norm": 3.8952237306387794, + "learning_rate": 1.9540169888783786e-05, + "loss": 1.0487, "step": 2635 }, { - "epoch": 0.5527364227301321, - "grad_norm": 6.985263527659814, - "learning_rate": 1.8778638613281904e-05, - "loss": 1.7591, + "epoch": 0.37221123976277887, + "grad_norm": 4.154113295143288, + "learning_rate": 1.9539712819285674e-05, + "loss": 1.1628, "step": 2636 }, { - "epoch": 0.5529461102956594, - "grad_norm": 6.450472955554288, - "learning_rate": 1.8777554190761478e-05, - "loss": 1.6982, + "epoch": 0.37235244281276475, + "grad_norm": 3.7128959034966704, + "learning_rate": 1.9539255528088757e-05, + "loss": 0.969, "step": 2637 }, { - "epoch": 0.5531557978611868, - "grad_norm": 6.544425544827743, - "learning_rate": 1.8776469318376175e-05, - "loss": 2.0519, + "epoch": 0.37249364586275063, + "grad_norm": 4.08440270371656, + "learning_rate": 1.953879801520366e-05, + "loss": 1.0588, "step": 2638 }, { - "epoch": 0.5533654854267142, - "grad_norm": 6.621091158480967, - "learning_rate": 1.8775383996181595e-05, - "loss": 1.7643, + "epoch": 0.3726348489127365, + "grad_norm": 3.862995324890109, + "learning_rate": 1.9538340280641018e-05, + "loss": 1.0782, "step": 2639 }, { - "epoch": 0.5535751729922416, - "grad_norm": 6.6094164115710905, - "learning_rate": 1.8774298224233364e-05, - "loss": 1.9746, + "epoch": 0.3727760519627224, + "grad_norm": 3.8558077011851215, + "learning_rate": 1.953788232441147e-05, + "loss": 1.0931, "step": 2640 }, { - "epoch": 0.5537848605577689, - "grad_norm": 6.053812782568108, - "learning_rate": 1.8773212002587135e-05, - "loss": 2.044, + "epoch": 0.3729172550127083, + "grad_norm": 4.58102720276026, + "learning_rate": 1.953742414652565e-05, + "loss": 1.1056, "step": 2641 }, { - "epoch": 0.5539945481232963, - "grad_norm": 6.983403383375147, - "learning_rate": 1.877212533129857e-05, - "loss": 2.3276, + "epoch": 0.37305845806269416, + "grad_norm": 4.374194040456852, + "learning_rate": 1.9536965746994213e-05, + "loss": 1.0056, "step": 2642 }, { - "epoch": 0.5542042356888236, - "grad_norm": 6.416429155049442, - "learning_rate": 1.8771038210423375e-05, - "loss": 2.0387, + "epoch": 0.37319966111268005, + "grad_norm": 3.498352355097033, + "learning_rate": 1.9536507125827812e-05, + "loss": 0.9966, "step": 2643 }, { - "epoch": 0.554413923254351, - "grad_norm": 7.029730292703193, - "learning_rate": 1.8769950640017253e-05, - "loss": 2.0282, + "epoch": 0.37334086416266593, + "grad_norm": 3.532055317243616, + "learning_rate": 1.9536048283037105e-05, + "loss": 0.9219, "step": 2644 }, { - "epoch": 0.5546236108198784, - "grad_norm": 6.577962749200691, - "learning_rate": 1.8768862620135947e-05, - "loss": 2.1805, + "epoch": 0.3734820672126518, + "grad_norm": 3.9817641631313383, + "learning_rate": 1.9535589218632753e-05, + "loss": 1.182, "step": 2645 }, { - "epoch": 0.5548332983854057, - "grad_norm": 6.5731343776378095, - "learning_rate": 1.876777415083523e-05, - "loss": 1.6695, + "epoch": 0.3736232702626377, + "grad_norm": 5.785004833342896, + "learning_rate": 1.9535129932625425e-05, + "loss": 1.0559, "step": 2646 }, { - "epoch": 0.5550429859509332, - "grad_norm": 6.467715596166901, - "learning_rate": 1.8766685232170877e-05, - "loss": 1.8594, + "epoch": 0.3737644733126236, + "grad_norm": 4.319712805816962, + "learning_rate": 1.9534670425025797e-05, + "loss": 1.1583, "step": 2647 }, { - "epoch": 0.5552526735164605, - "grad_norm": 6.76420042303689, - "learning_rate": 1.8765595864198702e-05, - "loss": 1.8977, + "epoch": 0.3739056763626094, + "grad_norm": 3.80630344736385, + "learning_rate": 1.9534210695844543e-05, + "loss": 1.1674, "step": 2648 }, { - "epoch": 0.5554623610819879, - "grad_norm": 5.59630298445021, - "learning_rate": 1.8764506046974536e-05, - "loss": 1.6031, + "epoch": 0.3740468794125953, + "grad_norm": 3.3294615444271836, + "learning_rate": 1.953375074509235e-05, + "loss": 0.9583, "step": 2649 }, { - "epoch": 0.5556720486475152, - "grad_norm": 5.959674279827521, - "learning_rate": 1.8763415780554232e-05, - "loss": 1.9359, + "epoch": 0.3741880824625812, + "grad_norm": 3.7984871568571092, + "learning_rate": 1.9533290572779912e-05, + "loss": 1.0346, "step": 2650 }, { - "epoch": 0.5558817362130426, - "grad_norm": 5.9601162940686265, - "learning_rate": 1.876232506499367e-05, - "loss": 2.3092, + "epoch": 0.37432928551256706, + "grad_norm": 4.322781365966304, + "learning_rate": 1.9532830178917915e-05, + "loss": 1.4114, "step": 2651 }, { - "epoch": 0.5560914237785699, - "grad_norm": 5.450924639150595, - "learning_rate": 1.8761233900348755e-05, - "loss": 1.8073, + "epoch": 0.37447048856255294, + "grad_norm": 4.20493116503359, + "learning_rate": 1.9532369563517066e-05, + "loss": 1.2394, "step": 2652 }, { - "epoch": 0.5563011113440973, - "grad_norm": 5.793176530396519, - "learning_rate": 1.8760142286675403e-05, - "loss": 1.9406, + "epoch": 0.3746116916125388, + "grad_norm": 3.7573858692757427, + "learning_rate": 1.9531908726588054e-05, + "loss": 0.9712, "step": 2653 }, { - "epoch": 0.5565107989096246, - "grad_norm": 5.171880563937533, - "learning_rate": 1.8759050224029564e-05, - "loss": 1.9244, + "epoch": 0.3747528946625247, + "grad_norm": 3.5038034186922222, + "learning_rate": 1.953144766814161e-05, + "loss": 0.8316, "step": 2654 }, { - "epoch": 0.556720486475152, - "grad_norm": 5.788681288637579, - "learning_rate": 1.8757957712467213e-05, - "loss": 2.0788, + "epoch": 0.3748940977125106, + "grad_norm": 4.42422056042016, + "learning_rate": 1.9530986388188435e-05, + "loss": 1.225, "step": 2655 }, { - "epoch": 0.5569301740406793, - "grad_norm": 5.525966055569547, - "learning_rate": 1.8756864752044336e-05, - "loss": 1.9819, + "epoch": 0.37503530076249647, + "grad_norm": 3.5229286643543594, + "learning_rate": 1.9530524886739254e-05, + "loss": 1.0534, "step": 2656 }, { - "epoch": 0.5571398616062068, - "grad_norm": 6.493016913575826, - "learning_rate": 1.8755771342816956e-05, - "loss": 2.1539, + "epoch": 0.37517650381248235, + "grad_norm": 4.421760015644147, + "learning_rate": 1.9530063163804788e-05, + "loss": 1.1626, "step": 2657 }, { - "epoch": 0.5573495491717341, - "grad_norm": 5.680384664958545, - "learning_rate": 1.875467748484111e-05, - "loss": 1.8154, + "epoch": 0.37531770686246824, + "grad_norm": 4.234414019437061, + "learning_rate": 1.952960121939577e-05, + "loss": 1.1658, "step": 2658 }, { - "epoch": 0.5575592367372615, - "grad_norm": 7.4916988363603405, - "learning_rate": 1.8753583178172855e-05, - "loss": 2.4587, + "epoch": 0.3754589099124541, + "grad_norm": 3.407028559838957, + "learning_rate": 1.9529139053522937e-05, + "loss": 0.9492, "step": 2659 }, { - "epoch": 0.5577689243027888, - "grad_norm": 5.500849148239562, - "learning_rate": 1.8752488422868278e-05, - "loss": 2.1669, + "epoch": 0.37560011296244, + "grad_norm": 3.912165594715894, + "learning_rate": 1.9528676666197026e-05, + "loss": 1.1739, "step": 2660 }, { - "epoch": 0.5579786118683162, - "grad_norm": 5.9440986408268355, - "learning_rate": 1.875139321898349e-05, - "loss": 2.3973, + "epoch": 0.3757413160124259, + "grad_norm": 4.051297316817544, + "learning_rate": 1.9528214057428785e-05, + "loss": 1.0469, "step": 2661 }, { - "epoch": 0.5581882994338436, - "grad_norm": 5.933776625062114, - "learning_rate": 1.8750297566574623e-05, - "loss": 2.1191, + "epoch": 0.37588251906241177, + "grad_norm": 4.308649961399725, + "learning_rate": 1.9527751227228964e-05, + "loss": 1.3052, "step": 2662 }, { - "epoch": 0.5583979869993709, - "grad_norm": 5.432377400318719, - "learning_rate": 1.874920146569783e-05, - "loss": 1.7428, + "epoch": 0.37602372211239765, + "grad_norm": 4.089780724893491, + "learning_rate": 1.952728817560832e-05, + "loss": 1.0246, "step": 2663 }, { - "epoch": 0.5586076745648983, - "grad_norm": 5.324312088306347, - "learning_rate": 1.874810491640928e-05, - "loss": 2.0472, + "epoch": 0.37616492516238353, + "grad_norm": 3.125357231604796, + "learning_rate": 1.9526824902577614e-05, + "loss": 0.8718, "step": 2664 }, { - "epoch": 0.5588173621304257, - "grad_norm": 5.757867124769394, - "learning_rate": 1.874700791876519e-05, - "loss": 2.0221, + "epoch": 0.37630612821236936, + "grad_norm": 4.949633516736115, + "learning_rate": 1.952636140814761e-05, + "loss": 1.1369, "step": 2665 }, { - "epoch": 0.5590270496959531, - "grad_norm": 5.659279041224531, - "learning_rate": 1.8745910472821767e-05, - "loss": 2.1093, + "epoch": 0.37644733126235524, + "grad_norm": 3.9762433984093333, + "learning_rate": 1.9525897692329082e-05, + "loss": 1.0834, "step": 2666 }, { - "epoch": 0.5592367372614804, - "grad_norm": 5.8152583903364325, - "learning_rate": 1.8744812578635264e-05, - "loss": 2.3153, + "epoch": 0.3765885343123411, + "grad_norm": 4.659754245044144, + "learning_rate": 1.9525433755132805e-05, + "loss": 1.3132, "step": 2667 }, { - "epoch": 0.5594464248270078, - "grad_norm": 6.274909912584504, - "learning_rate": 1.874371423626195e-05, - "loss": 2.1075, + "epoch": 0.376729737362327, + "grad_norm": 4.726953798130423, + "learning_rate": 1.952496959656956e-05, + "loss": 1.1566, "step": 2668 }, { - "epoch": 0.5596561123925351, - "grad_norm": 5.7198017657466576, - "learning_rate": 1.874261544575812e-05, - "loss": 2.2077, + "epoch": 0.3768709404123129, + "grad_norm": 3.778798035419523, + "learning_rate": 1.9524505216650136e-05, + "loss": 1.0133, "step": 2669 }, { - "epoch": 0.5598657999580625, - "grad_norm": 6.208011647448722, - "learning_rate": 1.8741516207180082e-05, - "loss": 2.2804, + "epoch": 0.3770121434622988, + "grad_norm": 4.003925433875248, + "learning_rate": 1.9524040615385324e-05, + "loss": 0.9351, "step": 2670 }, { - "epoch": 0.5600754875235898, - "grad_norm": 5.1911253368763655, - "learning_rate": 1.8740416520584174e-05, - "loss": 2.1247, + "epoch": 0.37715334651228466, + "grad_norm": 3.6668973005269336, + "learning_rate": 1.9523575792785924e-05, + "loss": 1.0866, "step": 2671 }, { - "epoch": 0.5602851750891172, - "grad_norm": 5.225813556654828, - "learning_rate": 1.8739316386026764e-05, - "loss": 1.9818, + "epoch": 0.37729454956227054, + "grad_norm": 4.696904301565168, + "learning_rate": 1.9523110748862733e-05, + "loss": 1.3557, "step": 2672 }, { - "epoch": 0.5604948626546445, - "grad_norm": 4.7691457650823, - "learning_rate": 1.873821580356423e-05, - "loss": 2.074, + "epoch": 0.3774357526122564, + "grad_norm": 3.9070941376213524, + "learning_rate": 1.9522645483626558e-05, + "loss": 0.9807, "step": 2673 }, { - "epoch": 0.560704550220172, - "grad_norm": 5.344593547062895, - "learning_rate": 1.873711477325298e-05, - "loss": 1.9696, + "epoch": 0.3775769556622423, + "grad_norm": 4.328223610242269, + "learning_rate": 1.952217999708822e-05, + "loss": 1.1334, "step": 2674 }, { - "epoch": 0.5609142377856993, - "grad_norm": 4.95845880405185, - "learning_rate": 1.8736013295149444e-05, - "loss": 1.9341, + "epoch": 0.3777181587122282, + "grad_norm": 3.8758848340098866, + "learning_rate": 1.9521714289258527e-05, + "loss": 1.0521, "step": 2675 }, { - "epoch": 0.5611239253512267, - "grad_norm": 7.361642565632095, - "learning_rate": 1.8734911369310078e-05, - "loss": 2.041, + "epoch": 0.3778593617622141, + "grad_norm": 4.594226276400707, + "learning_rate": 1.952124836014831e-05, + "loss": 1.1198, "step": 2676 }, { - "epoch": 0.561333612916754, - "grad_norm": 5.7560843924351035, - "learning_rate": 1.873380899579135e-05, - "loss": 2.0045, + "epoch": 0.37800056481219996, + "grad_norm": 3.355492383155113, + "learning_rate": 1.952078220976839e-05, + "loss": 0.8233, "step": 2677 }, { - "epoch": 0.5615433004822814, - "grad_norm": 5.698414140311639, - "learning_rate": 1.8732706174649765e-05, - "loss": 2.3569, + "epoch": 0.37814176786218584, + "grad_norm": 4.039550637523467, + "learning_rate": 1.9520315838129602e-05, + "loss": 1.1269, "step": 2678 }, { - "epoch": 0.5617529880478087, - "grad_norm": 5.638760562710624, - "learning_rate": 1.8731602905941843e-05, - "loss": 2.0654, + "epoch": 0.3782829709121717, + "grad_norm": 3.864121688104095, + "learning_rate": 1.951984924524279e-05, + "loss": 0.9341, "step": 2679 }, { - "epoch": 0.5619626756133361, - "grad_norm": 5.543907889938176, - "learning_rate": 1.8730499189724125e-05, - "loss": 2.0676, + "epoch": 0.3784241739621576, + "grad_norm": 5.292133876301585, + "learning_rate": 1.951938243111879e-05, + "loss": 1.5232, "step": 2680 }, { - "epoch": 0.5621723631788635, - "grad_norm": 5.093171829741121, - "learning_rate": 1.8729395026053185e-05, - "loss": 2.0148, + "epoch": 0.3785653770121435, + "grad_norm": 4.296730531760853, + "learning_rate": 1.9518915395768455e-05, + "loss": 1.179, "step": 2681 }, { - "epoch": 0.5623820507443908, - "grad_norm": 6.602405268014984, - "learning_rate": 1.872829041498561e-05, - "loss": 1.8978, + "epoch": 0.3787065800621293, + "grad_norm": 3.870423010863545, + "learning_rate": 1.9518448139202632e-05, + "loss": 1.0432, "step": 2682 }, { - "epoch": 0.5625917383099183, - "grad_norm": 4.917211262729164, - "learning_rate": 1.872718535657801e-05, - "loss": 2.2525, + "epoch": 0.3788477831121152, + "grad_norm": 3.5768396943707743, + "learning_rate": 1.951798066143219e-05, + "loss": 0.9423, "step": 2683 }, { - "epoch": 0.5628014258754456, - "grad_norm": 6.20822782818195, - "learning_rate": 1.872607985088702e-05, - "loss": 2.2741, + "epoch": 0.3789889861621011, + "grad_norm": 4.086096918811555, + "learning_rate": 1.9517512962467987e-05, + "loss": 1.0197, "step": 2684 }, { - "epoch": 0.563011113440973, - "grad_norm": 6.286655280455439, - "learning_rate": 1.8724973897969308e-05, - "loss": 2.0052, + "epoch": 0.37913018921208697, + "grad_norm": 4.918901831231184, + "learning_rate": 1.9517045042320893e-05, + "loss": 1.2252, "step": 2685 }, { - "epoch": 0.5632208010065003, - "grad_norm": 5.087714316119428, - "learning_rate": 1.8723867497881553e-05, - "loss": 1.9857, + "epoch": 0.37927139226207285, + "grad_norm": 3.9410263627522113, + "learning_rate": 1.951657690100178e-05, + "loss": 1.0439, "step": 2686 }, { - "epoch": 0.5634304885720277, - "grad_norm": 5.203714422900515, - "learning_rate": 1.8722760650680454e-05, - "loss": 2.0874, + "epoch": 0.37941259531205873, + "grad_norm": 4.779836024281969, + "learning_rate": 1.951610853852153e-05, + "loss": 1.1313, "step": 2687 }, { - "epoch": 0.563640176137555, - "grad_norm": 6.376718930125289, - "learning_rate": 1.8721653356422745e-05, - "loss": 2.3018, + "epoch": 0.3795537983620446, + "grad_norm": 3.622061123635568, + "learning_rate": 1.951563995489103e-05, + "loss": 0.997, "step": 2688 }, { - "epoch": 0.5638498637030824, - "grad_norm": 5.150186999573697, - "learning_rate": 1.8720545615165174e-05, - "loss": 2.0346, + "epoch": 0.3796950014120305, + "grad_norm": 4.169836329608204, + "learning_rate": 1.9515171150121167e-05, + "loss": 1.3071, "step": 2689 }, { - "epoch": 0.5640595512686097, - "grad_norm": 5.9658279364258195, - "learning_rate": 1.8719437426964515e-05, - "loss": 2.1122, + "epoch": 0.3798362044620164, + "grad_norm": 4.374344681431315, + "learning_rate": 1.9514702124222837e-05, + "loss": 1.0597, "step": 2690 }, { - "epoch": 0.5642692388341372, - "grad_norm": 5.787366077157618, - "learning_rate": 1.8718328791877563e-05, - "loss": 1.8914, + "epoch": 0.37997740751200226, + "grad_norm": 3.6603412495739334, + "learning_rate": 1.9514232877206932e-05, + "loss": 1.0816, "step": 2691 }, { - "epoch": 0.5644789263996645, - "grad_norm": 6.903315531310373, - "learning_rate": 1.8717219709961142e-05, - "loss": 2.3797, + "epoch": 0.38011861056198815, + "grad_norm": 4.232412934918964, + "learning_rate": 1.951376340908437e-05, + "loss": 1.0337, "step": 2692 }, { - "epoch": 0.5646886139651919, - "grad_norm": 5.767717891657005, - "learning_rate": 1.8716110181272094e-05, - "loss": 1.9054, + "epoch": 0.38025981361197403, + "grad_norm": 3.9488707622913743, + "learning_rate": 1.9513293719866054e-05, + "loss": 0.9889, "step": 2693 }, { - "epoch": 0.5648983015307192, - "grad_norm": 6.021351567504202, - "learning_rate": 1.8715000205867278e-05, - "loss": 2.0147, + "epoch": 0.3804010166619599, + "grad_norm": 4.001883021703839, + "learning_rate": 1.95128238095629e-05, + "loss": 0.8783, "step": 2694 }, { - "epoch": 0.5651079890962466, - "grad_norm": 6.017687709569518, - "learning_rate": 1.8713889783803588e-05, - "loss": 2.1029, + "epoch": 0.3805422197119458, + "grad_norm": 4.174075182615598, + "learning_rate": 1.9512353678185828e-05, + "loss": 1.0153, "step": 2695 }, { - "epoch": 0.5653176766617739, - "grad_norm": 5.446117259902237, - "learning_rate": 1.8712778915137928e-05, - "loss": 1.9414, + "epoch": 0.3806834227619317, + "grad_norm": 3.516864017477731, + "learning_rate": 1.9511883325745767e-05, + "loss": 0.932, "step": 2696 }, { - "epoch": 0.5655273642273013, - "grad_norm": 5.298011868684031, - "learning_rate": 1.8711667599927242e-05, - "loss": 1.9641, + "epoch": 0.38082462581191756, + "grad_norm": 3.770791065856057, + "learning_rate": 1.9511412752253644e-05, + "loss": 1.0378, "step": 2697 }, { - "epoch": 0.5657370517928287, - "grad_norm": 6.206091165472913, - "learning_rate": 1.871055583822848e-05, - "loss": 1.9994, + "epoch": 0.38096582886190344, + "grad_norm": 3.4299240307939307, + "learning_rate": 1.9510941957720396e-05, + "loss": 0.8851, "step": 2698 }, { - "epoch": 0.565946739358356, - "grad_norm": 6.092905590961644, - "learning_rate": 1.8709443630098626e-05, - "loss": 2.0255, + "epoch": 0.38110703191188927, + "grad_norm": 4.4304058437051435, + "learning_rate": 1.9510470942156963e-05, + "loss": 1.1107, "step": 2699 }, { - "epoch": 0.5661564269238835, - "grad_norm": 5.995637053235547, - "learning_rate": 1.8708330975594677e-05, - "loss": 2.1665, + "epoch": 0.38124823496187515, + "grad_norm": 3.965970069558552, + "learning_rate": 1.9509999705574293e-05, + "loss": 0.8162, "step": 2700 }, { - "epoch": 0.5663661144894108, - "grad_norm": 6.671332802115261, - "learning_rate": 1.8707217874773662e-05, - "loss": 2.1807, + "epoch": 0.38138943801186104, + "grad_norm": 5.602704184742451, + "learning_rate": 1.950952824798334e-05, + "loss": 1.3913, "step": 2701 }, { - "epoch": 0.5665758020549382, - "grad_norm": 6.7459237712878215, - "learning_rate": 1.8706104327692634e-05, - "loss": 2.1185, + "epoch": 0.3815306410618469, + "grad_norm": 4.553262964596167, + "learning_rate": 1.950905656939505e-05, + "loss": 1.0868, "step": 2702 }, { - "epoch": 0.5667854896204655, - "grad_norm": 7.982511796624487, - "learning_rate": 1.8704990334408656e-05, - "loss": 1.9709, + "epoch": 0.3816718441118328, + "grad_norm": 4.421385307887014, + "learning_rate": 1.95085846698204e-05, + "loss": 0.9162, "step": 2703 }, { - "epoch": 0.5669951771859929, - "grad_norm": 6.046707582558813, - "learning_rate": 1.870387589497882e-05, - "loss": 2.0603, + "epoch": 0.3818130471618187, + "grad_norm": 3.7775288617615512, + "learning_rate": 1.9508112549270346e-05, + "loss": 1.0031, "step": 2704 }, { - "epoch": 0.5672048647515202, - "grad_norm": 6.0054835322872915, - "learning_rate": 1.8702761009460257e-05, - "loss": 2.2664, + "epoch": 0.38195425021180457, + "grad_norm": 4.1441089250265435, + "learning_rate": 1.9507640207755863e-05, + "loss": 0.9769, "step": 2705 }, { - "epoch": 0.5674145523170476, - "grad_norm": 6.971973131413373, - "learning_rate": 1.870164567791009e-05, - "loss": 1.9682, + "epoch": 0.38209545326179045, + "grad_norm": 4.938961726624924, + "learning_rate": 1.9507167645287926e-05, + "loss": 1.1947, "step": 2706 }, { - "epoch": 0.5676242398825749, - "grad_norm": 5.456181558165276, - "learning_rate": 1.8700529900385497e-05, - "loss": 1.8005, + "epoch": 0.38223665631177633, + "grad_norm": 3.9623101715891833, + "learning_rate": 1.950669486187752e-05, + "loss": 1.1082, "step": 2707 }, { - "epoch": 0.5678339274481023, - "grad_norm": 6.86761808320295, - "learning_rate": 1.869941367694365e-05, - "loss": 2.3656, + "epoch": 0.3823778593617622, + "grad_norm": 4.532783941100174, + "learning_rate": 1.950622185753563e-05, + "loss": 1.3035, "step": 2708 }, { - "epoch": 0.5680436150136297, - "grad_norm": 6.944595423798982, - "learning_rate": 1.8698297007641767e-05, - "loss": 1.8246, + "epoch": 0.3825190624117481, + "grad_norm": 4.338254266892728, + "learning_rate": 1.950574863227325e-05, + "loss": 0.9905, "step": 2709 }, { - "epoch": 0.5682533025791571, - "grad_norm": 6.0645727191871694, - "learning_rate": 1.869717989253708e-05, - "loss": 1.8002, + "epoch": 0.382660265461734, + "grad_norm": 3.778152467797074, + "learning_rate": 1.9505275186101378e-05, + "loss": 1.0297, "step": 2710 }, { - "epoch": 0.5684629901446844, - "grad_norm": 6.188568172044652, - "learning_rate": 1.869606233168683e-05, - "loss": 2.36, + "epoch": 0.38280146851171987, + "grad_norm": 4.4851040542862926, + "learning_rate": 1.9504801519031015e-05, + "loss": 1.2811, "step": 2711 }, { - "epoch": 0.5686726777102118, - "grad_norm": 5.891868649907786, - "learning_rate": 1.869494432514831e-05, - "loss": 2.1397, + "epoch": 0.38294267156170575, + "grad_norm": 3.9568407045620857, + "learning_rate": 1.950432763107317e-05, + "loss": 1.0075, "step": 2712 }, { - "epoch": 0.5688823652757391, - "grad_norm": 6.347512614165626, - "learning_rate": 1.869382587297881e-05, - "loss": 2.0811, + "epoch": 0.38308387461169163, + "grad_norm": 3.7533386973845566, + "learning_rate": 1.950385352223885e-05, + "loss": 1.0422, "step": 2713 }, { - "epoch": 0.5690920528412665, - "grad_norm": 6.150558442739384, - "learning_rate": 1.8692706975235657e-05, - "loss": 1.8356, + "epoch": 0.3832250776616775, + "grad_norm": 4.265347833194422, + "learning_rate": 1.9503379192539086e-05, + "loss": 0.9976, "step": 2714 }, { - "epoch": 0.5693017404067939, - "grad_norm": 6.415217463317695, - "learning_rate": 1.8691587631976195e-05, - "loss": 2.3428, + "epoch": 0.3833662807116634, + "grad_norm": 3.8381243027550553, + "learning_rate": 1.950290464198489e-05, + "loss": 1.0491, "step": 2715 }, { - "epoch": 0.5695114279723212, - "grad_norm": 6.121405345262252, - "learning_rate": 1.869046784325779e-05, - "loss": 2.142, + "epoch": 0.3835074837616492, + "grad_norm": 4.470390815506896, + "learning_rate": 1.9502429870587295e-05, + "loss": 1.0129, "step": 2716 }, { - "epoch": 0.5697211155378487, - "grad_norm": 7.433282054083341, - "learning_rate": 1.8689347609137834e-05, - "loss": 2.3011, + "epoch": 0.3836486868116351, + "grad_norm": 4.593178100202923, + "learning_rate": 1.9501954878357335e-05, + "loss": 1.131, "step": 2717 }, { - "epoch": 0.569930803103376, - "grad_norm": 6.47311274821471, - "learning_rate": 1.8688226929673747e-05, - "loss": 2.0019, + "epoch": 0.383789889861621, + "grad_norm": 5.430329039521456, + "learning_rate": 1.9501479665306046e-05, + "loss": 1.0151, "step": 2718 }, { - "epoch": 0.5701404906689034, - "grad_norm": 6.45377360232322, - "learning_rate": 1.8687105804922955e-05, - "loss": 2.227, + "epoch": 0.3839310929116069, + "grad_norm": 5.051734937013839, + "learning_rate": 1.9501004231444475e-05, + "loss": 1.1075, "step": 2719 }, { - "epoch": 0.5703501782344307, - "grad_norm": 6.047156282100101, - "learning_rate": 1.8685984234942928e-05, - "loss": 1.9088, + "epoch": 0.38407229596159276, + "grad_norm": 4.615079885648788, + "learning_rate": 1.9500528576783667e-05, + "loss": 1.1841, "step": 2720 }, { - "epoch": 0.5705598657999581, - "grad_norm": 5.315206635865982, - "learning_rate": 1.8684862219791142e-05, - "loss": 1.9434, + "epoch": 0.38421349901157864, + "grad_norm": 4.337096240779647, + "learning_rate": 1.9500052701334676e-05, + "loss": 1.1719, "step": 2721 }, { - "epoch": 0.5707695533654854, - "grad_norm": 6.627961023081533, - "learning_rate": 1.8683739759525105e-05, - "loss": 2.1496, + "epoch": 0.3843547020615645, + "grad_norm": 3.3863351116251525, + "learning_rate": 1.9499576605108564e-05, + "loss": 0.8904, "step": 2722 }, { - "epoch": 0.5709792409310128, - "grad_norm": 5.760285731253, - "learning_rate": 1.868261685420234e-05, - "loss": 1.6827, + "epoch": 0.3844959051115504, + "grad_norm": 4.460291499387634, + "learning_rate": 1.9499100288116395e-05, + "loss": 1.1331, "step": 2723 }, { - "epoch": 0.5711889284965401, - "grad_norm": 6.598516163423714, - "learning_rate": 1.8681493503880406e-05, - "loss": 1.9888, + "epoch": 0.3846371081615363, + "grad_norm": 3.9318462124993157, + "learning_rate": 1.949862375036924e-05, + "loss": 1.0726, "step": 2724 }, { - "epoch": 0.5713986160620675, - "grad_norm": 6.230349513261235, - "learning_rate": 1.868036970861687e-05, - "loss": 2.2908, + "epoch": 0.3847783112115222, + "grad_norm": 3.7766029512745964, + "learning_rate": 1.9498146991878168e-05, + "loss": 0.9912, "step": 2725 }, { - "epoch": 0.5716083036275948, - "grad_norm": 6.505106907214323, - "learning_rate": 1.8679245468469334e-05, - "loss": 2.1056, + "epoch": 0.38491951426150806, + "grad_norm": 4.4911537459112765, + "learning_rate": 1.949767001265426e-05, + "loss": 0.7385, "step": 2726 }, { - "epoch": 0.5718179911931223, - "grad_norm": 6.079516966081716, - "learning_rate": 1.8678120783495415e-05, - "loss": 1.8275, + "epoch": 0.38506071731149394, + "grad_norm": 3.977308316926048, + "learning_rate": 1.9497192812708606e-05, + "loss": 1.0132, "step": 2727 }, { - "epoch": 0.5720276787586496, - "grad_norm": 7.022574927643935, - "learning_rate": 1.8676995653752755e-05, - "loss": 2.0554, + "epoch": 0.3852019203614798, + "grad_norm": 4.581931776307272, + "learning_rate": 1.949671539205229e-05, + "loss": 1.1203, "step": 2728 }, { - "epoch": 0.572237366324177, - "grad_norm": 6.411236972209665, - "learning_rate": 1.8675870079299014e-05, - "loss": 1.7686, + "epoch": 0.3853431234114657, + "grad_norm": 4.699403601790278, + "learning_rate": 1.9496237750696413e-05, + "loss": 1.3438, "step": 2729 }, { - "epoch": 0.5724470538897043, - "grad_norm": 5.677846621554343, - "learning_rate": 1.867474406019189e-05, - "loss": 2.1153, + "epoch": 0.3854843264614516, + "grad_norm": 3.699279976222106, + "learning_rate": 1.9495759888652072e-05, + "loss": 0.908, "step": 2730 }, { - "epoch": 0.5726567414552317, - "grad_norm": 6.250652107104516, - "learning_rate": 1.8673617596489082e-05, - "loss": 2.049, + "epoch": 0.38562552951143747, + "grad_norm": 3.924015321526006, + "learning_rate": 1.949528180593037e-05, + "loss": 1.1464, "step": 2731 }, { - "epoch": 0.572866429020759, - "grad_norm": 5.549938127805381, - "learning_rate": 1.8672490688248328e-05, - "loss": 2.1606, + "epoch": 0.38576673256142335, + "grad_norm": 5.09779881025153, + "learning_rate": 1.9494803502542415e-05, + "loss": 1.5832, "step": 2732 }, { - "epoch": 0.5730761165862864, - "grad_norm": 5.665935603894634, - "learning_rate": 1.8671363335527387e-05, - "loss": 1.7843, + "epoch": 0.3859079356114092, + "grad_norm": 4.856794529656477, + "learning_rate": 1.9494324978499335e-05, + "loss": 1.1704, "step": 2733 }, { - "epoch": 0.5732858041518138, - "grad_norm": 6.207191963647915, - "learning_rate": 1.8670235538384033e-05, - "loss": 2.1177, + "epoch": 0.38604913866139506, + "grad_norm": 4.676851568014474, + "learning_rate": 1.949384623381224e-05, + "loss": 1.2581, "step": 2734 }, { - "epoch": 0.5734954917173412, - "grad_norm": 7.1470663510588555, - "learning_rate": 1.8669107296876073e-05, - "loss": 2.1896, + "epoch": 0.38619034171138095, + "grad_norm": 4.332530877567536, + "learning_rate": 1.9493367268492258e-05, + "loss": 1.0467, "step": 2735 }, { - "epoch": 0.5737051792828686, - "grad_norm": 6.191036326130498, - "learning_rate": 1.866797861106133e-05, - "loss": 1.9805, + "epoch": 0.38633154476136683, + "grad_norm": 3.0685108020725798, + "learning_rate": 1.949288808255052e-05, + "loss": 0.8526, "step": 2736 }, { - "epoch": 0.5739148668483959, - "grad_norm": 7.008784218917093, - "learning_rate": 1.8666849480997644e-05, - "loss": 1.9644, + "epoch": 0.3864727478113527, + "grad_norm": 3.923528354617052, + "learning_rate": 1.9492408675998162e-05, + "loss": 1.1083, "step": 2737 }, { - "epoch": 0.5741245544139233, - "grad_norm": 6.68268645709309, - "learning_rate": 1.8665719906742893e-05, - "loss": 2.2133, + "epoch": 0.3866139508613386, + "grad_norm": 4.278407818709807, + "learning_rate": 1.9491929048846328e-05, + "loss": 1.3086, "step": 2738 }, { - "epoch": 0.5743342419794506, - "grad_norm": 5.9350242021380994, - "learning_rate": 1.8664589888354966e-05, - "loss": 2.1331, + "epoch": 0.3867551539113245, + "grad_norm": 3.7099759402937247, + "learning_rate": 1.9491449201106162e-05, + "loss": 1.0408, "step": 2739 }, { - "epoch": 0.574543929544978, - "grad_norm": 5.75101298241169, - "learning_rate": 1.866345942589178e-05, - "loss": 1.9286, + "epoch": 0.38689635696131036, + "grad_norm": 3.832341795392169, + "learning_rate": 1.9490969132788815e-05, + "loss": 0.9741, "step": 2740 }, { - "epoch": 0.5747536171105053, - "grad_norm": 7.210009665016464, - "learning_rate": 1.866232851941127e-05, - "loss": 2.0464, + "epoch": 0.38703756001129624, + "grad_norm": 4.141725444034112, + "learning_rate": 1.9490488843905444e-05, + "loss": 1.0957, "step": 2741 }, { - "epoch": 0.5749633046760327, - "grad_norm": 5.2990713313272595, - "learning_rate": 1.8661197168971403e-05, - "loss": 1.9399, + "epoch": 0.3871787630612821, + "grad_norm": 4.059847309148408, + "learning_rate": 1.9490008334467212e-05, + "loss": 1.1496, "step": 2742 }, { - "epoch": 0.57517299224156, - "grad_norm": 7.4849935933744955, - "learning_rate": 1.8660065374630155e-05, - "loss": 1.6694, + "epoch": 0.387319966111268, + "grad_norm": 4.246293329019511, + "learning_rate": 1.9489527604485284e-05, + "loss": 1.0252, "step": 2743 }, { - "epoch": 0.5753826798070875, - "grad_norm": 6.914434132044269, - "learning_rate": 1.865893313644554e-05, - "loss": 1.849, + "epoch": 0.3874611691612539, + "grad_norm": 3.4751086442631394, + "learning_rate": 1.948904665397083e-05, + "loss": 1.016, "step": 2744 }, { - "epoch": 0.5755923673726148, - "grad_norm": 6.278163681295618, - "learning_rate": 1.865780045447558e-05, - "loss": 2.2206, + "epoch": 0.3876023722112398, + "grad_norm": 3.9739716106378142, + "learning_rate": 1.9488565482935035e-05, + "loss": 1.0474, "step": 2745 }, { - "epoch": 0.5758020549381422, - "grad_norm": 5.854642415445766, - "learning_rate": 1.8656667328778336e-05, - "loss": 1.9686, + "epoch": 0.38774357526122566, + "grad_norm": 4.82982226244476, + "learning_rate": 1.948808409138907e-05, + "loss": 1.4463, "step": 2746 }, { - "epoch": 0.5760117425036695, - "grad_norm": 6.034483674605276, - "learning_rate": 1.8655533759411876e-05, - "loss": 1.9169, + "epoch": 0.38788477831121154, + "grad_norm": 3.6111002597187354, + "learning_rate": 1.9487602479344136e-05, + "loss": 0.9508, "step": 2747 }, { - "epoch": 0.5762214300691969, - "grad_norm": 5.828105447086947, - "learning_rate": 1.8654399746434296e-05, - "loss": 1.8914, + "epoch": 0.3880259813611974, + "grad_norm": 4.63687148749335, + "learning_rate": 1.948712064681141e-05, + "loss": 1.289, "step": 2748 }, { - "epoch": 0.5764311176347242, - "grad_norm": 7.732193301976145, - "learning_rate": 1.865326528990372e-05, - "loss": 1.561, + "epoch": 0.3881671844111833, + "grad_norm": 4.177795358177969, + "learning_rate": 1.9486638593802102e-05, + "loss": 1.1522, "step": 2749 }, { - "epoch": 0.5766408052002516, - "grad_norm": 6.977330496882968, - "learning_rate": 1.865213038987829e-05, - "loss": 2.0952, + "epoch": 0.38830838746116914, + "grad_norm": 4.015539170279566, + "learning_rate": 1.9486156320327406e-05, + "loss": 1.1319, "step": 2750 }, { - "epoch": 0.576850492765779, - "grad_norm": 6.4732742621727475, - "learning_rate": 1.8650995046416167e-05, - "loss": 2.094, + "epoch": 0.388449590511155, + "grad_norm": 3.6544564020451635, + "learning_rate": 1.948567382639854e-05, + "loss": 1.027, "step": 2751 }, { - "epoch": 0.5770601803313063, - "grad_norm": 6.17100823584381, - "learning_rate": 1.8649859259575548e-05, - "loss": 2.1014, + "epoch": 0.3885907935611409, + "grad_norm": 3.774945154471758, + "learning_rate": 1.9485191112026707e-05, + "loss": 1.1077, "step": 2752 }, { - "epoch": 0.5772698678968338, - "grad_norm": 8.400581570556966, - "learning_rate": 1.8648723029414638e-05, - "loss": 2.239, + "epoch": 0.3887319966111268, + "grad_norm": 4.740124640826402, + "learning_rate": 1.948470817722313e-05, + "loss": 1.1409, "step": 2753 }, { - "epoch": 0.5774795554623611, - "grad_norm": 6.358691614627974, - "learning_rate": 1.864758635599167e-05, - "loss": 1.9169, + "epoch": 0.38887319966111267, + "grad_norm": 4.75264550585457, + "learning_rate": 1.9484225021999032e-05, + "loss": 1.1033, "step": 2754 }, { - "epoch": 0.5776892430278885, - "grad_norm": 6.625139041557192, - "learning_rate": 1.8646449239364906e-05, - "loss": 2.4451, + "epoch": 0.38901440271109855, + "grad_norm": 4.302933897867006, + "learning_rate": 1.9483741646365634e-05, + "loss": 1.1271, "step": 2755 }, { - "epoch": 0.5778989305934158, - "grad_norm": 5.396835085962694, - "learning_rate": 1.8645311679592622e-05, - "loss": 1.8255, + "epoch": 0.38915560576108443, + "grad_norm": 4.452942458369776, + "learning_rate": 1.9483258050334183e-05, + "loss": 1.2529, "step": 2756 }, { - "epoch": 0.5781086181589432, - "grad_norm": 6.612106918883937, - "learning_rate": 1.864417367673312e-05, - "loss": 1.9214, + "epoch": 0.3892968088110703, + "grad_norm": 4.160460243434189, + "learning_rate": 1.948277423391591e-05, + "loss": 1.0664, "step": 2757 }, { - "epoch": 0.5783183057244705, - "grad_norm": 6.605480488474029, - "learning_rate": 1.8643035230844725e-05, - "loss": 2.1115, + "epoch": 0.3894380118610562, + "grad_norm": 3.808903172503447, + "learning_rate": 1.9482290197122054e-05, + "loss": 1.0117, "step": 2758 }, { - "epoch": 0.5785279932899979, - "grad_norm": 6.573471124856382, - "learning_rate": 1.864189634198578e-05, - "loss": 2.1502, + "epoch": 0.3895792149110421, + "grad_norm": 4.096770110305158, + "learning_rate": 1.948180593996387e-05, + "loss": 1.1152, "step": 2759 }, { - "epoch": 0.5787376808555252, - "grad_norm": 6.23270152363272, - "learning_rate": 1.864075701021466e-05, - "loss": 2.152, + "epoch": 0.38972041796102797, + "grad_norm": 4.731694182616994, + "learning_rate": 1.9481321462452617e-05, + "loss": 1.2267, "step": 2760 }, { - "epoch": 0.5789473684210527, - "grad_norm": 5.4339113024414365, - "learning_rate": 1.863961723558976e-05, - "loss": 1.9662, + "epoch": 0.38986162101101385, + "grad_norm": 3.5671029909026992, + "learning_rate": 1.948083676459954e-05, + "loss": 0.945, "step": 2761 }, { - "epoch": 0.57915705598658, - "grad_norm": 6.039031895674701, - "learning_rate": 1.863847701816949e-05, - "loss": 1.9518, + "epoch": 0.39000282406099973, + "grad_norm": 3.933822707549875, + "learning_rate": 1.9480351846415918e-05, + "loss": 1.114, "step": 2762 }, { - "epoch": 0.5793667435521074, - "grad_norm": 5.2411896271802805, - "learning_rate": 1.863733635801229e-05, - "loss": 2.0189, + "epoch": 0.3901440271109856, + "grad_norm": 4.236137168500127, + "learning_rate": 1.947986670791301e-05, + "loss": 0.8734, "step": 2763 }, { - "epoch": 0.5795764311176347, - "grad_norm": 5.4320705186084774, - "learning_rate": 1.863619525517662e-05, - "loss": 2.0442, + "epoch": 0.3902852301609715, + "grad_norm": 4.359060024644206, + "learning_rate": 1.9479381349102095e-05, + "loss": 1.2369, "step": 2764 }, { - "epoch": 0.5797861186831621, - "grad_norm": 6.40665092609313, - "learning_rate": 1.8635053709720963e-05, - "loss": 1.9739, + "epoch": 0.3904264332109574, + "grad_norm": 4.029332521176153, + "learning_rate": 1.9478895769994447e-05, + "loss": 0.9955, "step": 2765 }, { - "epoch": 0.5799958062486894, - "grad_norm": 6.7146461245807085, - "learning_rate": 1.8633911721703833e-05, - "loss": 2.2687, + "epoch": 0.39056763626094326, + "grad_norm": 4.386578709334208, + "learning_rate": 1.947840997060136e-05, + "loss": 1.1138, "step": 2766 }, { - "epoch": 0.5802054938142168, - "grad_norm": 5.53533748368371, - "learning_rate": 1.8632769291183748e-05, - "loss": 1.8952, + "epoch": 0.3907088393109291, + "grad_norm": 4.1981754583420114, + "learning_rate": 1.9477923950934117e-05, + "loss": 1.1013, "step": 2767 }, { - "epoch": 0.5804151813797441, - "grad_norm": 6.197429178287639, - "learning_rate": 1.8631626418219262e-05, - "loss": 2.1775, + "epoch": 0.390850042360915, + "grad_norm": 3.8325815635145277, + "learning_rate": 1.9477437711004015e-05, + "loss": 1.4762, "step": 2768 }, { - "epoch": 0.5806248689452715, - "grad_norm": 5.6572588882299755, - "learning_rate": 1.8630483102868954e-05, - "loss": 1.7355, + "epoch": 0.39099124541090086, + "grad_norm": 4.299665249574838, + "learning_rate": 1.9476951250822352e-05, + "loss": 1.1533, "step": 2769 }, { - "epoch": 0.580834556510799, - "grad_norm": 6.667840159438343, - "learning_rate": 1.862933934519142e-05, - "loss": 1.7145, + "epoch": 0.39113244846088674, + "grad_norm": 3.3133993437449587, + "learning_rate": 1.9476464570400434e-05, + "loss": 0.8659, "step": 2770 }, { - "epoch": 0.5810442440763263, - "grad_norm": 5.437844620737426, - "learning_rate": 1.8628195145245275e-05, - "loss": 2.077, + "epoch": 0.3912736515108726, + "grad_norm": 4.457612392487415, + "learning_rate": 1.9475977669749576e-05, + "loss": 1.4018, "step": 2771 }, { - "epoch": 0.5812539316418537, - "grad_norm": 6.088118114755378, - "learning_rate": 1.8627050503089164e-05, - "loss": 1.872, + "epoch": 0.3914148545608585, + "grad_norm": 4.281947259081842, + "learning_rate": 1.9475490548881083e-05, + "loss": 1.0546, "step": 2772 }, { - "epoch": 0.581463619207381, - "grad_norm": 6.728400677888094, - "learning_rate": 1.8625905418781754e-05, - "loss": 2.4107, + "epoch": 0.3915560576108444, + "grad_norm": 4.47114227005312, + "learning_rate": 1.947500320780629e-05, + "loss": 1.0499, "step": 2773 }, { - "epoch": 0.5816733067729084, - "grad_norm": 6.166774301311825, - "learning_rate": 1.862475989238173e-05, - "loss": 2.1215, + "epoch": 0.39169726066083027, + "grad_norm": 3.6351366322365926, + "learning_rate": 1.9474515646536507e-05, + "loss": 0.9539, "step": 2774 }, { - "epoch": 0.5818829943384357, - "grad_norm": 6.191949331219909, - "learning_rate": 1.8623613923947802e-05, - "loss": 1.9648, + "epoch": 0.39183846371081615, + "grad_norm": 3.360208719388863, + "learning_rate": 1.9474027865083078e-05, + "loss": 1.1307, "step": 2775 }, { - "epoch": 0.5820926819039631, - "grad_norm": 6.378446343983823, - "learning_rate": 1.86224675135387e-05, - "loss": 2.0417, + "epoch": 0.39197966676080204, + "grad_norm": 4.792750695891107, + "learning_rate": 1.947353986345733e-05, + "loss": 1.2572, "step": 2776 }, { - "epoch": 0.5823023694694904, - "grad_norm": 6.2375076055480365, - "learning_rate": 1.8621320661213186e-05, - "loss": 1.8335, + "epoch": 0.3921208698107879, + "grad_norm": 3.7234572040857072, + "learning_rate": 1.9473051641670606e-05, + "loss": 1.1238, "step": 2777 }, { - "epoch": 0.5825120570350178, - "grad_norm": 5.701035106367095, - "learning_rate": 1.8620173367030036e-05, - "loss": 1.9553, + "epoch": 0.3922620728607738, + "grad_norm": 4.024809281706479, + "learning_rate": 1.9472563199734254e-05, + "loss": 1.1902, "step": 2778 }, { - "epoch": 0.5827217446005452, - "grad_norm": 6.091961221841586, - "learning_rate": 1.8619025631048047e-05, - "loss": 2.0834, + "epoch": 0.3924032759107597, + "grad_norm": 4.32184941242279, + "learning_rate": 1.9472074537659623e-05, + "loss": 1.183, "step": 2779 }, { - "epoch": 0.5829314321660726, - "grad_norm": 6.575829598696737, - "learning_rate": 1.861787745332605e-05, - "loss": 1.9819, + "epoch": 0.39254447896074557, + "grad_norm": 3.973898061809334, + "learning_rate": 1.9471585655458073e-05, + "loss": 1.0463, "step": 2780 }, { - "epoch": 0.5831411197315999, - "grad_norm": 7.304348619015532, - "learning_rate": 1.8616728833922884e-05, - "loss": 2.0994, + "epoch": 0.39268568201073145, + "grad_norm": 3.5382949625765256, + "learning_rate": 1.947109655314096e-05, + "loss": 0.9137, "step": 2781 }, { - "epoch": 0.5833508072971273, - "grad_norm": 6.937064025578596, - "learning_rate": 1.861557977289742e-05, - "loss": 1.9736, + "epoch": 0.39282688506071733, + "grad_norm": 3.659168018837145, + "learning_rate": 1.9470607230719654e-05, + "loss": 0.8778, "step": 2782 }, { - "epoch": 0.5835604948626546, - "grad_norm": 5.303755529620455, - "learning_rate": 1.8614430270308554e-05, - "loss": 1.6312, + "epoch": 0.3929680881107032, + "grad_norm": 3.710724697036336, + "learning_rate": 1.947011768820553e-05, + "loss": 1.1154, "step": 2783 }, { - "epoch": 0.583770182428182, - "grad_norm": 7.529976332468666, - "learning_rate": 1.8613280326215192e-05, - "loss": 1.9495, + "epoch": 0.39310929116068904, + "grad_norm": 3.5645993734216552, + "learning_rate": 1.9469627925609956e-05, + "loss": 1.1494, "step": 2784 }, { - "epoch": 0.5839798699937093, - "grad_norm": 6.279715443093613, - "learning_rate": 1.8612129940676272e-05, - "loss": 1.8962, + "epoch": 0.39325049421067493, + "grad_norm": 4.453936604512423, + "learning_rate": 1.9469137942944322e-05, + "loss": 1.23, "step": 2785 }, { - "epoch": 0.5841895575592367, - "grad_norm": 6.675047264909567, - "learning_rate": 1.861097911375076e-05, - "loss": 2.1242, + "epoch": 0.3933916972606608, + "grad_norm": 3.8497070601764034, + "learning_rate": 1.946864774022001e-05, + "loss": 1.0835, "step": 2786 }, { - "epoch": 0.5843992451247642, - "grad_norm": 6.348626906787265, - "learning_rate": 1.860982784549763e-05, - "loss": 1.8807, + "epoch": 0.3935329003106467, + "grad_norm": 3.7273806837807073, + "learning_rate": 1.946815731744841e-05, + "loss": 0.9714, "step": 2787 }, { - "epoch": 0.5846089326902915, - "grad_norm": 6.348096122172748, - "learning_rate": 1.860867613597589e-05, - "loss": 1.733, + "epoch": 0.3936741033606326, + "grad_norm": 3.542739712473149, + "learning_rate": 1.946766667464093e-05, + "loss": 0.9233, "step": 2788 }, { - "epoch": 0.5848186202558189, - "grad_norm": 5.942722257261544, - "learning_rate": 1.8607523985244567e-05, - "loss": 2.2468, + "epoch": 0.39381530641061846, + "grad_norm": 4.309760392052565, + "learning_rate": 1.946717581180896e-05, + "loss": 1.0419, "step": 2789 }, { - "epoch": 0.5850283078213462, - "grad_norm": 7.243354422238376, - "learning_rate": 1.8606371393362713e-05, - "loss": 2.2528, + "epoch": 0.39395650946060434, + "grad_norm": 4.2747948005786816, + "learning_rate": 1.9466684728963914e-05, + "loss": 0.9659, "step": 2790 }, { - "epoch": 0.5852379953868736, - "grad_norm": 5.524743898035511, - "learning_rate": 1.86052183603894e-05, - "loss": 2.0669, + "epoch": 0.3940977125105902, + "grad_norm": 4.071373379838009, + "learning_rate": 1.94661934261172e-05, + "loss": 1.1733, "step": 2791 }, { - "epoch": 0.5854476829524009, - "grad_norm": 5.437122265737213, - "learning_rate": 1.8604064886383718e-05, - "loss": 2.201, + "epoch": 0.3942389155605761, + "grad_norm": 4.354308692603431, + "learning_rate": 1.9465701903280246e-05, + "loss": 1.1826, "step": 2792 }, { - "epoch": 0.5856573705179283, - "grad_norm": 6.073973431340435, - "learning_rate": 1.8602910971404786e-05, - "loss": 2.2775, + "epoch": 0.394380118610562, + "grad_norm": 3.467250005279303, + "learning_rate": 1.946521016046446e-05, + "loss": 0.8859, "step": 2793 }, { - "epoch": 0.5858670580834556, - "grad_norm": 5.4748252490280365, - "learning_rate": 1.860175661551175e-05, - "loss": 1.9526, + "epoch": 0.3945213216605479, + "grad_norm": 3.973150345311791, + "learning_rate": 1.9464718197681284e-05, + "loss": 1.1531, "step": 2794 }, { - "epoch": 0.586076745648983, - "grad_norm": 5.889596274557162, - "learning_rate": 1.8600601818763764e-05, - "loss": 1.9138, + "epoch": 0.39466252471053376, + "grad_norm": 4.75614069620667, + "learning_rate": 1.9464226014942143e-05, + "loss": 1.1953, "step": 2795 }, { - "epoch": 0.5862864332145103, - "grad_norm": 5.818773399302393, - "learning_rate": 1.8599446581220018e-05, - "loss": 1.8794, + "epoch": 0.39480372776051964, + "grad_norm": 4.658797466289896, + "learning_rate": 1.9463733612258476e-05, + "loss": 1.3401, "step": 2796 }, { - "epoch": 0.5864961207800378, - "grad_norm": 5.126087766732159, - "learning_rate": 1.8598290902939724e-05, - "loss": 2.1673, + "epoch": 0.3949449308105055, + "grad_norm": 4.870245341561487, + "learning_rate": 1.9463240989641728e-05, + "loss": 1.5336, "step": 2797 }, { - "epoch": 0.5867058083455651, - "grad_norm": 5.9354522772227085, - "learning_rate": 1.8597134783982103e-05, - "loss": 2.4213, + "epoch": 0.3950861338604914, + "grad_norm": 4.163220008997059, + "learning_rate": 1.9462748147103342e-05, + "loss": 1.1639, "step": 2798 }, { - "epoch": 0.5869154959110925, - "grad_norm": 5.5948818181582896, - "learning_rate": 1.859597822440642e-05, - "loss": 1.9696, + "epoch": 0.3952273369104773, + "grad_norm": 3.711852407156884, + "learning_rate": 1.946225508465478e-05, + "loss": 0.9642, "step": 2799 }, { - "epoch": 0.5871251834766198, - "grad_norm": 5.833858255715922, - "learning_rate": 1.8594821224271937e-05, - "loss": 2.1716, + "epoch": 0.3953685399604632, + "grad_norm": 9.58097065388552, + "learning_rate": 1.9461761802307494e-05, + "loss": 1.2129, "step": 2800 }, { - "epoch": 0.5873348710421472, - "grad_norm": 5.908323227946351, - "learning_rate": 1.8593663783637962e-05, - "loss": 2.3965, + "epoch": 0.395509743010449, + "grad_norm": 4.717086295626161, + "learning_rate": 1.9461268300072957e-05, + "loss": 1.2061, "step": 2801 }, { - "epoch": 0.5875445586076745, - "grad_norm": 5.64316681768169, - "learning_rate": 1.8592505902563816e-05, - "loss": 1.8461, + "epoch": 0.3956509460604349, + "grad_norm": 5.044536280719149, + "learning_rate": 1.9460774577962622e-05, + "loss": 1.151, "step": 2802 }, { - "epoch": 0.5877542461732019, - "grad_norm": 6.797872025434759, - "learning_rate": 1.8591347581108835e-05, - "loss": 1.9383, + "epoch": 0.39579214911042077, + "grad_norm": 3.8343522998532236, + "learning_rate": 1.9460280635987972e-05, + "loss": 1.0645, "step": 2803 }, { - "epoch": 0.5879639337387292, - "grad_norm": 5.322537800890154, - "learning_rate": 1.8590188819332392e-05, - "loss": 1.992, + "epoch": 0.39593335216040665, + "grad_norm": 5.440166613264818, + "learning_rate": 1.945978647416049e-05, + "loss": 1.3921, "step": 2804 }, { - "epoch": 0.5881736213042567, - "grad_norm": 5.524221411534303, - "learning_rate": 1.8589029617293875e-05, - "loss": 2.1636, + "epoch": 0.39607455521039253, + "grad_norm": 3.708363929608251, + "learning_rate": 1.9459292092491654e-05, + "loss": 0.957, "step": 2805 }, { - "epoch": 0.5883833088697841, - "grad_norm": 7.135498871497982, - "learning_rate": 1.858786997505269e-05, - "loss": 1.9046, + "epoch": 0.3962157582603784, + "grad_norm": 4.155079615706019, + "learning_rate": 1.9458797490992954e-05, + "loss": 1.2308, "step": 2806 }, { - "epoch": 0.5885929964353114, - "grad_norm": 5.950978899457029, - "learning_rate": 1.8586709892668274e-05, - "loss": 2.0656, + "epoch": 0.3963569613103643, + "grad_norm": 4.4752003599324235, + "learning_rate": 1.9458302669675885e-05, + "loss": 1.0389, "step": 2807 }, { - "epoch": 0.5888026840008388, - "grad_norm": 6.473838064300838, - "learning_rate": 1.858554937020008e-05, - "loss": 2.0665, + "epoch": 0.3964981643603502, + "grad_norm": 4.684881116513056, + "learning_rate": 1.9457807628551947e-05, + "loss": 1.4132, "step": 2808 }, { - "epoch": 0.5890123715663661, - "grad_norm": 5.800764570950762, - "learning_rate": 1.8584388407707598e-05, - "loss": 1.9313, + "epoch": 0.39663936741033606, + "grad_norm": 3.989504604936287, + "learning_rate": 1.9457312367632645e-05, + "loss": 1.1522, "step": 2809 }, { - "epoch": 0.5892220591318935, - "grad_norm": 6.320039863800695, - "learning_rate": 1.8583227005250316e-05, - "loss": 2.2284, + "epoch": 0.39678057046032195, + "grad_norm": 3.9344980111803904, + "learning_rate": 1.945681688692949e-05, + "loss": 0.9876, "step": 2810 }, { - "epoch": 0.5894317466974208, - "grad_norm": 5.523034828041235, - "learning_rate": 1.8582065162887763e-05, - "loss": 2.3193, + "epoch": 0.39692177351030783, + "grad_norm": 3.5406678018825977, + "learning_rate": 1.945632118645399e-05, + "loss": 0.7986, "step": 2811 }, { - "epoch": 0.5896414342629482, - "grad_norm": 5.723140928424983, - "learning_rate": 1.8580902880679487e-05, - "loss": 1.9274, + "epoch": 0.3970629765602937, + "grad_norm": 4.108074557932551, + "learning_rate": 1.9455825266217674e-05, + "loss": 1.2487, "step": 2812 }, { - "epoch": 0.5898511218284755, - "grad_norm": 6.629506154808888, - "learning_rate": 1.8579740158685057e-05, - "loss": 2.282, + "epoch": 0.3972041796102796, + "grad_norm": 4.225168997024323, + "learning_rate": 1.9455329126232062e-05, + "loss": 1.2726, "step": 2813 }, { - "epoch": 0.590060809394003, - "grad_norm": 6.210511427186196, - "learning_rate": 1.857857699696406e-05, - "loss": 2.0276, + "epoch": 0.3973453826602655, + "grad_norm": 4.390988225733527, + "learning_rate": 1.945483276650868e-05, + "loss": 1.0149, "step": 2814 }, { - "epoch": 0.5902704969595303, - "grad_norm": 5.767235434414866, - "learning_rate": 1.8577413395576114e-05, - "loss": 1.9594, + "epoch": 0.39748658571025136, + "grad_norm": 4.336442748880578, + "learning_rate": 1.945433618705907e-05, + "loss": 1.0722, "step": 2815 }, { - "epoch": 0.5904801845250577, - "grad_norm": 6.425811354731852, - "learning_rate": 1.8576249354580857e-05, - "loss": 2.0, + "epoch": 0.39762778876023724, + "grad_norm": 4.246477202517171, + "learning_rate": 1.945383938789477e-05, + "loss": 1.1181, "step": 2816 }, { - "epoch": 0.590689872090585, - "grad_norm": 7.252463844689402, - "learning_rate": 1.8575084874037944e-05, - "loss": 2.2094, + "epoch": 0.3977689918102231, + "grad_norm": 4.372277116775641, + "learning_rate": 1.945334236902733e-05, + "loss": 1.022, "step": 2817 }, { - "epoch": 0.5908995596561124, - "grad_norm": 5.997443830607522, - "learning_rate": 1.857391995400706e-05, - "loss": 2.1512, + "epoch": 0.39791019486020895, + "grad_norm": 4.8401263117502795, + "learning_rate": 1.945284513046829e-05, + "loss": 1.1328, "step": 2818 }, { - "epoch": 0.5911092472216397, - "grad_norm": 5.73839274685035, - "learning_rate": 1.8572754594547903e-05, - "loss": 2.1362, + "epoch": 0.39805139791019484, + "grad_norm": 4.292931467045822, + "learning_rate": 1.945234767222921e-05, + "loss": 1.0954, "step": 2819 }, { - "epoch": 0.5913189347871671, - "grad_norm": 6.08102002355284, - "learning_rate": 1.8571588795720207e-05, - "loss": 2.1546, + "epoch": 0.3981926009601807, + "grad_norm": 4.72573299249462, + "learning_rate": 1.945184999432166e-05, + "loss": 1.2775, "step": 2820 }, { - "epoch": 0.5915286223526944, - "grad_norm": 6.564845547130511, - "learning_rate": 1.857042255758372e-05, - "loss": 2.0197, + "epoch": 0.3983338040101666, + "grad_norm": 3.8239463230501674, + "learning_rate": 1.9451352096757194e-05, + "loss": 1.0205, "step": 2821 }, { - "epoch": 0.5917383099182218, - "grad_norm": 6.253006059024423, - "learning_rate": 1.8569255880198213e-05, - "loss": 2.0026, + "epoch": 0.3984750070601525, + "grad_norm": 4.041276156472654, + "learning_rate": 1.9450853979547384e-05, + "loss": 0.9573, "step": 2822 }, { - "epoch": 0.5919479974837493, - "grad_norm": 6.043827078687982, - "learning_rate": 1.8568088763623478e-05, - "loss": 1.9969, + "epoch": 0.39861621011013837, + "grad_norm": 4.070560610445045, + "learning_rate": 1.9450355642703812e-05, + "loss": 1.2284, "step": 2823 }, { - "epoch": 0.5921576850492766, - "grad_norm": 6.426688536911274, - "learning_rate": 1.8566921207919332e-05, - "loss": 2.1987, + "epoch": 0.39875741316012425, + "grad_norm": 3.9760097844388644, + "learning_rate": 1.9449857086238058e-05, + "loss": 1.1104, "step": 2824 }, { - "epoch": 0.592367372614804, - "grad_norm": 5.880424982159923, - "learning_rate": 1.8565753213145615e-05, - "loss": 2.304, + "epoch": 0.39889861621011014, + "grad_norm": 3.938362948686599, + "learning_rate": 1.9449358310161702e-05, + "loss": 1.0475, "step": 2825 }, { - "epoch": 0.5925770601803313, - "grad_norm": 6.269099861378297, - "learning_rate": 1.8564584779362192e-05, - "loss": 2.2299, + "epoch": 0.399039819260096, + "grad_norm": 4.504721162087163, + "learning_rate": 1.9448859314486342e-05, + "loss": 1.275, "step": 2826 }, { - "epoch": 0.5927867477458587, - "grad_norm": 5.493781815782301, - "learning_rate": 1.8563415906628942e-05, - "loss": 1.9815, + "epoch": 0.3991810223100819, + "grad_norm": 4.150285804261682, + "learning_rate": 1.9448360099223573e-05, + "loss": 1.173, "step": 2827 }, { - "epoch": 0.592996435311386, - "grad_norm": 6.482903463790162, - "learning_rate": 1.8562246595005774e-05, - "loss": 1.9065, + "epoch": 0.3993222253600678, + "grad_norm": 5.231670709249456, + "learning_rate": 1.9447860664384998e-05, + "loss": 1.1332, "step": 2828 }, { - "epoch": 0.5932061228769134, - "grad_norm": 5.4657120503943935, - "learning_rate": 1.856107684455262e-05, - "loss": 2.0115, + "epoch": 0.39946342841005367, + "grad_norm": 3.5230753210176435, + "learning_rate": 1.944736100998222e-05, + "loss": 0.9293, "step": 2829 }, { - "epoch": 0.5934158104424407, - "grad_norm": 5.881506615973131, - "learning_rate": 1.855990665532943e-05, - "loss": 1.8569, + "epoch": 0.39960463146003955, + "grad_norm": 4.727604239013494, + "learning_rate": 1.9446861136026846e-05, + "loss": 1.2187, "step": 2830 }, { - "epoch": 0.5936254980079682, - "grad_norm": 6.1975118592375, - "learning_rate": 1.8558736027396177e-05, - "loss": 2.2694, + "epoch": 0.39974583451002543, + "grad_norm": 4.2897729467955195, + "learning_rate": 1.9446361042530504e-05, + "loss": 0.9592, "step": 2831 }, { - "epoch": 0.5938351855734955, - "grad_norm": 5.280387541626495, - "learning_rate": 1.8557564960812855e-05, - "loss": 1.8424, + "epoch": 0.3998870375600113, + "grad_norm": 3.5500370772861713, + "learning_rate": 1.9445860729504812e-05, + "loss": 0.9962, "step": 2832 }, { - "epoch": 0.5940448731390229, - "grad_norm": 5.994526462094391, - "learning_rate": 1.855639345563949e-05, - "loss": 1.9947, + "epoch": 0.4000282406099972, + "grad_norm": 4.182788855234692, + "learning_rate": 1.9445360196961394e-05, + "loss": 1.2218, "step": 2833 }, { - "epoch": 0.5942545607045502, - "grad_norm": 5.4735144671289175, - "learning_rate": 1.8555221511936118e-05, - "loss": 2.0521, + "epoch": 0.4001694436599831, + "grad_norm": 3.505284317296871, + "learning_rate": 1.9444859444911884e-05, + "loss": 0.8898, "step": 2834 }, { - "epoch": 0.5944642482700776, - "grad_norm": 5.754751226434579, - "learning_rate": 1.8554049129762804e-05, - "loss": 1.7855, + "epoch": 0.4003106467099689, + "grad_norm": 3.7217854577370897, + "learning_rate": 1.9444358473367918e-05, + "loss": 0.9976, "step": 2835 }, { - "epoch": 0.5946739358356049, - "grad_norm": 5.532918599236389, - "learning_rate": 1.855287630917964e-05, - "loss": 2.0404, + "epoch": 0.4004518497599548, + "grad_norm": 4.456964792485029, + "learning_rate": 1.9443857282341144e-05, + "loss": 1.2514, "step": 2836 }, { - "epoch": 0.5948836234011323, - "grad_norm": 5.220453338413051, - "learning_rate": 1.855170305024673e-05, - "loss": 1.8221, + "epoch": 0.4005930528099407, + "grad_norm": 3.8733581802950092, + "learning_rate": 1.9443355871843204e-05, + "loss": 0.9771, "step": 2837 }, { - "epoch": 0.5950933109666596, - "grad_norm": 5.7450617480030655, - "learning_rate": 1.8550529353024204e-05, - "loss": 2.0589, + "epoch": 0.40073425585992656, + "grad_norm": 4.569326207442759, + "learning_rate": 1.944285424188575e-05, + "loss": 1.0039, "step": 2838 }, { - "epoch": 0.595302998532187, - "grad_norm": 6.363323676364217, - "learning_rate": 1.854935521757222e-05, - "loss": 2.1924, + "epoch": 0.40087545890991244, + "grad_norm": 3.599581856348858, + "learning_rate": 1.9442352392480442e-05, + "loss": 0.9974, "step": 2839 }, { - "epoch": 0.5955126860977145, - "grad_norm": 6.764205381451113, - "learning_rate": 1.8548180643950955e-05, - "loss": 1.9919, + "epoch": 0.4010166619598983, + "grad_norm": 4.822634572211196, + "learning_rate": 1.9441850323638944e-05, + "loss": 1.2405, "step": 2840 }, { - "epoch": 0.5957223736632418, - "grad_norm": 6.635936417589668, - "learning_rate": 1.8547005632220602e-05, - "loss": 1.9, + "epoch": 0.4011578650098842, + "grad_norm": 5.556602725747065, + "learning_rate": 1.944134803537292e-05, + "loss": 1.4219, "step": 2841 }, { - "epoch": 0.5959320612287692, - "grad_norm": 5.82680697197393, - "learning_rate": 1.8545830182441386e-05, - "loss": 2.2936, + "epoch": 0.4012990680598701, + "grad_norm": 4.638659780718478, + "learning_rate": 1.9440845527694047e-05, + "loss": 1.0865, "step": 2842 }, { - "epoch": 0.5961417487942965, - "grad_norm": 6.35022104767472, - "learning_rate": 1.8544654294673553e-05, - "loss": 2.0621, + "epoch": 0.401440271109856, + "grad_norm": 3.661355102017519, + "learning_rate": 1.9440342800614e-05, + "loss": 0.8872, "step": 2843 }, { - "epoch": 0.5963514363598239, - "grad_norm": 7.036026652989272, - "learning_rate": 1.854347796897737e-05, - "loss": 2.1278, + "epoch": 0.40158147415984186, + "grad_norm": 5.124125297052814, + "learning_rate": 1.9439839854144463e-05, + "loss": 1.3008, "step": 2844 }, { - "epoch": 0.5965611239253512, - "grad_norm": 5.847140920811413, - "learning_rate": 1.854230120541312e-05, - "loss": 1.8412, + "epoch": 0.40172267720982774, + "grad_norm": 5.2145852159988495, + "learning_rate": 1.9439336688297124e-05, + "loss": 1.1957, "step": 2845 }, { - "epoch": 0.5967708114908786, - "grad_norm": 5.972632852655691, - "learning_rate": 1.854112400404112e-05, - "loss": 2.2266, + "epoch": 0.4018638802598136, + "grad_norm": 4.525556129189602, + "learning_rate": 1.9438833303083677e-05, + "loss": 1.1, "step": 2846 }, { - "epoch": 0.5969804990564059, - "grad_norm": 5.2835076907116845, - "learning_rate": 1.85399463649217e-05, - "loss": 1.6403, + "epoch": 0.4020050833097995, + "grad_norm": 4.116332338388515, + "learning_rate": 1.9438329698515823e-05, + "loss": 1.1821, "step": 2847 }, { - "epoch": 0.5971901866219333, - "grad_norm": 6.784861006432264, - "learning_rate": 1.8538768288115213e-05, - "loss": 1.8356, + "epoch": 0.4021462863597854, + "grad_norm": 4.103742148760553, + "learning_rate": 1.943782587460526e-05, + "loss": 1.2015, "step": 2848 }, { - "epoch": 0.5973998741874607, - "grad_norm": 6.129010218309208, - "learning_rate": 1.8537589773682046e-05, - "loss": 1.6441, + "epoch": 0.40228748940977127, + "grad_norm": 4.341750961649565, + "learning_rate": 1.94373218313637e-05, + "loss": 1.1189, "step": 2849 }, { - "epoch": 0.5976095617529881, - "grad_norm": 6.100929593146235, - "learning_rate": 1.8536410821682594e-05, - "loss": 1.9326, + "epoch": 0.40242869245975715, + "grad_norm": 3.3873804460373695, + "learning_rate": 1.9436817568802854e-05, + "loss": 0.8193, "step": 2850 }, { - "epoch": 0.5978192493185154, - "grad_norm": 5.885765651449141, - "learning_rate": 1.853523143217728e-05, - "loss": 2.0494, + "epoch": 0.40256989550974304, + "grad_norm": 4.016785386950191, + "learning_rate": 1.943631308693445e-05, + "loss": 1.0794, "step": 2851 }, { - "epoch": 0.5980289368840428, - "grad_norm": 5.801941437890225, - "learning_rate": 1.8534051605226554e-05, - "loss": 1.9244, + "epoch": 0.40271109855972886, + "grad_norm": 4.319167640626143, + "learning_rate": 1.94358083857702e-05, + "loss": 1.1174, "step": 2852 }, { - "epoch": 0.5982386244495701, - "grad_norm": 6.280821335752855, - "learning_rate": 1.853287134089088e-05, - "loss": 2.4128, + "epoch": 0.40285230160971475, + "grad_norm": 3.311247592128335, + "learning_rate": 1.943530346532184e-05, + "loss": 0.9365, "step": 2853 }, { - "epoch": 0.5984483120150975, - "grad_norm": 6.000581315497626, - "learning_rate": 1.8531690639230754e-05, - "loss": 1.9193, + "epoch": 0.40299350465970063, + "grad_norm": 3.6004888360968095, + "learning_rate": 1.9434798325601098e-05, + "loss": 0.9965, "step": 2854 }, { - "epoch": 0.5986579995806248, - "grad_norm": 6.420621630717793, - "learning_rate": 1.853050950030668e-05, - "loss": 2.0262, + "epoch": 0.4031347077096865, + "grad_norm": 3.511507114216554, + "learning_rate": 1.943429296661972e-05, + "loss": 0.9672, "step": 2855 }, { - "epoch": 0.5988676871461522, - "grad_norm": 7.9537214163745915, - "learning_rate": 1.8529327924179203e-05, - "loss": 2.1248, + "epoch": 0.4032759107596724, + "grad_norm": 3.989606697578757, + "learning_rate": 1.9433787388389453e-05, + "loss": 1.2496, "step": 2856 }, { - "epoch": 0.5990773747116795, - "grad_norm": 7.0640750717408975, - "learning_rate": 1.8528145910908874e-05, - "loss": 1.679, + "epoch": 0.4034171138096583, + "grad_norm": 4.270706904458701, + "learning_rate": 1.9433281590922036e-05, + "loss": 1.2814, "step": 2857 }, { - "epoch": 0.599287062277207, - "grad_norm": 5.370730147105455, - "learning_rate": 1.852696346055628e-05, - "loss": 1.8568, + "epoch": 0.40355831685964416, + "grad_norm": 4.289290271299601, + "learning_rate": 1.943277557422923e-05, + "loss": 1.0291, "step": 2858 }, { - "epoch": 0.5994967498427344, - "grad_norm": 5.685459794465621, - "learning_rate": 1.852578057318201e-05, - "loss": 2.1262, + "epoch": 0.40369951990963004, + "grad_norm": 3.922499527858311, + "learning_rate": 1.9432269338322793e-05, + "loss": 1.1003, "step": 2859 }, { - "epoch": 0.5997064374082617, - "grad_norm": 6.3106112522630236, - "learning_rate": 1.8524597248846707e-05, - "loss": 2.0098, + "epoch": 0.4038407229596159, + "grad_norm": 3.8491093208243687, + "learning_rate": 1.943176288321449e-05, + "loss": 1.1666, "step": 2860 }, { - "epoch": 0.5999161249737891, - "grad_norm": 6.074111782677629, - "learning_rate": 1.8523413487611005e-05, - "loss": 2.224, + "epoch": 0.4039819260096018, + "grad_norm": 3.825980070495842, + "learning_rate": 1.943125620891609e-05, + "loss": 1.1526, "step": 2861 }, { - "epoch": 0.6001258125393164, - "grad_norm": 6.143776159094985, - "learning_rate": 1.8522229289535583e-05, - "loss": 1.6457, + "epoch": 0.4041231290595877, + "grad_norm": 3.8334076999297, + "learning_rate": 1.943074931543937e-05, + "loss": 1.1317, "step": 2862 }, { - "epoch": 0.6003355001048438, - "grad_norm": 7.949057583030364, - "learning_rate": 1.8521044654681124e-05, - "loss": 1.8428, + "epoch": 0.4042643321095736, + "grad_norm": 4.337298530672957, + "learning_rate": 1.9430242202796107e-05, + "loss": 1.1099, "step": 2863 }, { - "epoch": 0.6005451876703711, - "grad_norm": 7.86701714819926, - "learning_rate": 1.8519859583108353e-05, - "loss": 2.2839, + "epoch": 0.40440553515955946, + "grad_norm": 3.4343828981332836, + "learning_rate": 1.942973487099809e-05, + "loss": 0.8236, "step": 2864 }, { - "epoch": 0.6007548752358985, - "grad_norm": 6.391031902563666, - "learning_rate": 1.8518674074877995e-05, - "loss": 1.9156, + "epoch": 0.40454673820954534, + "grad_norm": 3.786103422509321, + "learning_rate": 1.9429227320057106e-05, + "loss": 1.1006, "step": 2865 }, { - "epoch": 0.6009645628014259, - "grad_norm": 6.683752131355188, - "learning_rate": 1.8517488130050824e-05, - "loss": 1.7305, + "epoch": 0.4046879412595312, + "grad_norm": 4.487701523468622, + "learning_rate": 1.9428719549984955e-05, + "loss": 1.1669, "step": 2866 }, { - "epoch": 0.6011742503669533, - "grad_norm": 6.644589719481444, - "learning_rate": 1.8516301748687608e-05, - "loss": 2.104, + "epoch": 0.4048291443095171, + "grad_norm": 3.4445239823811558, + "learning_rate": 1.9428211560793428e-05, + "loss": 0.8673, "step": 2867 }, { - "epoch": 0.6013839379324806, - "grad_norm": 5.623284751753787, - "learning_rate": 1.8515114930849158e-05, - "loss": 1.7125, + "epoch": 0.404970347359503, + "grad_norm": 3.702158668768986, + "learning_rate": 1.9427703352494335e-05, + "loss": 0.9227, "step": 2868 }, { - "epoch": 0.601593625498008, - "grad_norm": 6.229321658809429, - "learning_rate": 1.8513927676596294e-05, - "loss": 2.1369, + "epoch": 0.4051115504094888, + "grad_norm": 5.46220658963223, + "learning_rate": 1.9427194925099494e-05, + "loss": 1.2239, "step": 2869 }, { - "epoch": 0.6018033130635353, - "grad_norm": 6.357073562502818, - "learning_rate": 1.8512739985989874e-05, - "loss": 1.9917, + "epoch": 0.4052527534594747, + "grad_norm": 3.6225140965531155, + "learning_rate": 1.942668627862071e-05, + "loss": 0.9566, "step": 2870 }, { - "epoch": 0.6020130006290627, - "grad_norm": 6.526375644843321, - "learning_rate": 1.8511551859090766e-05, - "loss": 2.0576, + "epoch": 0.4053939565094606, + "grad_norm": 4.337107348316009, + "learning_rate": 1.942617741306981e-05, + "loss": 1.1838, "step": 2871 }, { - "epoch": 0.60222268819459, - "grad_norm": 6.229562658983859, - "learning_rate": 1.8510363295959865e-05, - "loss": 2.0787, + "epoch": 0.40553515955944647, + "grad_norm": 4.565524519449637, + "learning_rate": 1.9425668328458616e-05, + "loss": 1.3793, "step": 2872 }, { - "epoch": 0.6024323757601174, - "grad_norm": 6.152929085028943, - "learning_rate": 1.8509174296658078e-05, - "loss": 1.9562, + "epoch": 0.40567636260943235, + "grad_norm": 3.86252950189058, + "learning_rate": 1.942515902479896e-05, + "loss": 1.1304, "step": 2873 }, { - "epoch": 0.6026420633256447, - "grad_norm": 5.188991159961233, - "learning_rate": 1.8507984861246356e-05, - "loss": 2.0261, + "epoch": 0.40581756565941823, + "grad_norm": 4.108341347455858, + "learning_rate": 1.942464950210268e-05, + "loss": 1.0401, "step": 2874 }, { - "epoch": 0.6028517508911722, - "grad_norm": 6.406112501076073, - "learning_rate": 1.850679498978565e-05, - "loss": 2.1274, + "epoch": 0.4059587687094041, + "grad_norm": 4.762919246132147, + "learning_rate": 1.942413976038162e-05, + "loss": 1.3263, "step": 2875 }, { - "epoch": 0.6030614384566996, - "grad_norm": 7.39611509009168, - "learning_rate": 1.8505604682336947e-05, - "loss": 1.88, + "epoch": 0.40609997175939, + "grad_norm": 4.137159423555265, + "learning_rate": 1.9423629799647618e-05, + "loss": 1.0961, "step": 2876 }, { - "epoch": 0.6032711260222269, - "grad_norm": 5.798044942484015, - "learning_rate": 1.850441393896125e-05, - "loss": 1.9593, + "epoch": 0.4062411748093759, + "grad_norm": 3.6896545280011157, + "learning_rate": 1.9423119619912527e-05, + "loss": 1.0562, "step": 2877 }, { - "epoch": 0.6034808135877543, - "grad_norm": 6.173909061279093, - "learning_rate": 1.8503222759719595e-05, - "loss": 1.9724, + "epoch": 0.40638237785936177, + "grad_norm": 5.367575830697878, + "learning_rate": 1.9422609221188208e-05, + "loss": 1.3097, "step": 2878 }, { - "epoch": 0.6036905011532816, - "grad_norm": 6.292733087099212, - "learning_rate": 1.850203114467302e-05, - "loss": 2.2413, + "epoch": 0.40652358090934765, + "grad_norm": 3.845062530908352, + "learning_rate": 1.9422098603486515e-05, + "loss": 1.1083, "step": 2879 }, { - "epoch": 0.603900188718809, - "grad_norm": 6.825306134314194, - "learning_rate": 1.8500839093882605e-05, - "loss": 2.3622, + "epoch": 0.40666478395933353, + "grad_norm": 3.676070379789297, + "learning_rate": 1.942158776681933e-05, + "loss": 0.8073, "step": 2880 }, { - "epoch": 0.6041098762843363, - "grad_norm": 6.064936351106037, - "learning_rate": 1.849964660740944e-05, - "loss": 2.0736, + "epoch": 0.4068059870093194, + "grad_norm": 4.434320226597917, + "learning_rate": 1.9421076711198506e-05, + "loss": 1.2405, "step": 2881 }, { - "epoch": 0.6043195638498637, - "grad_norm": 5.821697498833978, - "learning_rate": 1.849845368531465e-05, - "loss": 1.7887, + "epoch": 0.4069471900593053, + "grad_norm": 4.32857986896634, + "learning_rate": 1.942056543663593e-05, + "loss": 1.2995, "step": 2882 }, { - "epoch": 0.604529251415391, - "grad_norm": 5.6173388166632225, - "learning_rate": 1.8497260327659365e-05, - "loss": 2.0661, + "epoch": 0.4070883931092912, + "grad_norm": 4.413173199569783, + "learning_rate": 1.942005394314348e-05, + "loss": 1.1787, "step": 2883 }, { - "epoch": 0.6047389389809185, - "grad_norm": 5.878711660941405, - "learning_rate": 1.849606653450475e-05, - "loss": 1.6939, + "epoch": 0.40722959615927706, + "grad_norm": 4.857200222535491, + "learning_rate": 1.941954223073305e-05, + "loss": 1.2984, "step": 2884 }, { - "epoch": 0.6049486265464458, - "grad_norm": 5.9113748114240945, - "learning_rate": 1.8494872305911985e-05, - "loss": 1.9963, + "epoch": 0.40737079920926295, + "grad_norm": 3.517980566567667, + "learning_rate": 1.941903029941652e-05, + "loss": 1.0472, "step": 2885 }, { - "epoch": 0.6051583141119732, - "grad_norm": 6.994643935741634, - "learning_rate": 1.8493677641942285e-05, - "loss": 1.9344, + "epoch": 0.4075120022592488, + "grad_norm": 4.235916496034987, + "learning_rate": 1.94185181492058e-05, + "loss": 1.1288, "step": 2886 }, { - "epoch": 0.6053680016775005, - "grad_norm": 6.691061982530917, - "learning_rate": 1.8492482542656876e-05, - "loss": 1.8457, + "epoch": 0.40765320530923466, + "grad_norm": 4.057923225045238, + "learning_rate": 1.9418005780112777e-05, + "loss": 1.1307, "step": 2887 }, { - "epoch": 0.6055776892430279, - "grad_norm": 7.030763620402425, - "learning_rate": 1.8491287008117006e-05, - "loss": 2.0861, + "epoch": 0.40779440835922054, + "grad_norm": 4.214396380290513, + "learning_rate": 1.9417493192149376e-05, + "loss": 1.2268, "step": 2888 }, { - "epoch": 0.6057873768085552, - "grad_norm": 7.005274911508568, - "learning_rate": 1.849009103838395e-05, - "loss": 2.2697, + "epoch": 0.4079356114092064, + "grad_norm": 4.038566942763581, + "learning_rate": 1.9416980385327498e-05, + "loss": 1.0289, "step": 2889 }, { - "epoch": 0.6059970643740826, - "grad_norm": 6.241309158168311, - "learning_rate": 1.8488894633519003e-05, - "loss": 2.1214, + "epoch": 0.4080768144591923, + "grad_norm": 3.91936775596543, + "learning_rate": 1.941646735965906e-05, + "loss": 1.0895, "step": 2890 }, { - "epoch": 0.6062067519396099, - "grad_norm": 5.830015435991998, - "learning_rate": 1.8487697793583482e-05, - "loss": 1.8558, + "epoch": 0.4082180175091782, + "grad_norm": 3.6719061902776464, + "learning_rate": 1.941595411515599e-05, + "loss": 0.9377, "step": 2891 }, { - "epoch": 0.6064164395051374, - "grad_norm": 7.224704941738584, - "learning_rate": 1.8486500518638728e-05, - "loss": 1.9305, + "epoch": 0.40835922055916407, + "grad_norm": 3.4112088096717845, + "learning_rate": 1.941544065183021e-05, + "loss": 0.9002, "step": 2892 }, { - "epoch": 0.6066261270706647, - "grad_norm": 6.260624604496244, - "learning_rate": 1.8485302808746105e-05, - "loss": 2.2322, + "epoch": 0.40850042360914995, + "grad_norm": 4.095534536524793, + "learning_rate": 1.9414926969693656e-05, + "loss": 0.9946, "step": 2893 }, { - "epoch": 0.6068358146361921, - "grad_norm": 6.8111114191392, - "learning_rate": 1.8484104663966995e-05, - "loss": 2.18, + "epoch": 0.40864162665913584, + "grad_norm": 4.0611089448990985, + "learning_rate": 1.9414413068758266e-05, + "loss": 1.0269, "step": 2894 }, { - "epoch": 0.6070455022017195, - "grad_norm": 6.657392986031021, - "learning_rate": 1.8482906084362803e-05, - "loss": 1.7316, + "epoch": 0.4087828297091217, + "grad_norm": 3.8096730364427076, + "learning_rate": 1.9413898949035984e-05, + "loss": 1.1793, "step": 2895 }, { - "epoch": 0.6072551897672468, - "grad_norm": 6.201499580975748, - "learning_rate": 1.8481707069994966e-05, - "loss": 1.9609, + "epoch": 0.4089240327591076, + "grad_norm": 4.492082158315833, + "learning_rate": 1.9413384610538752e-05, + "loss": 1.0685, "step": 2896 }, { - "epoch": 0.6074648773327742, - "grad_norm": 5.470043526155994, - "learning_rate": 1.8480507620924928e-05, - "loss": 2.0646, + "epoch": 0.4090652358090935, + "grad_norm": 5.487680047657643, + "learning_rate": 1.941287005327853e-05, + "loss": 1.1822, "step": 2897 }, { - "epoch": 0.6076745648983015, - "grad_norm": 6.796724121156567, - "learning_rate": 1.8479307737214167e-05, - "loss": 2.095, + "epoch": 0.40920643885907937, + "grad_norm": 3.795545089931177, + "learning_rate": 1.941235527726727e-05, + "loss": 0.8969, "step": 2898 }, { - "epoch": 0.6078842524638289, - "grad_norm": 6.560112781251169, - "learning_rate": 1.8478107418924177e-05, - "loss": 2.0357, + "epoch": 0.40934764190906525, + "grad_norm": 4.03234949224482, + "learning_rate": 1.9411840282516942e-05, + "loss": 1.1299, "step": 2899 }, { - "epoch": 0.6080939400293562, - "grad_norm": 5.4305111553655925, - "learning_rate": 1.8476906666116477e-05, - "loss": 1.8864, + "epoch": 0.40948884495905113, + "grad_norm": 3.7970327476469086, + "learning_rate": 1.941132506903951e-05, + "loss": 1.0857, "step": 2900 }, { - "epoch": 0.6083036275948837, - "grad_norm": 5.465648062777567, - "learning_rate": 1.847570547885261e-05, - "loss": 1.869, + "epoch": 0.409630048009037, + "grad_norm": 4.625579191005492, + "learning_rate": 1.9410809636846944e-05, + "loss": 1.0396, "step": 2901 }, { - "epoch": 0.608513315160411, - "grad_norm": 5.57374208627059, - "learning_rate": 1.8474503857194133e-05, - "loss": 1.8677, + "epoch": 0.4097712510590229, + "grad_norm": 4.480743071899812, + "learning_rate": 1.9410293985951233e-05, + "loss": 1.1204, "step": 2902 }, { - "epoch": 0.6087230027259384, - "grad_norm": 5.6505789539138025, - "learning_rate": 1.8473301801202638e-05, - "loss": 1.9986, + "epoch": 0.40991245410900873, + "grad_norm": 3.605369476357036, + "learning_rate": 1.9409778116364348e-05, + "loss": 0.9553, "step": 2903 }, { - "epoch": 0.6089326902914657, - "grad_norm": 6.01513487906172, - "learning_rate": 1.847209931093973e-05, - "loss": 2.021, + "epoch": 0.4100536571589946, + "grad_norm": 3.7506194647636253, + "learning_rate": 1.9409262028098285e-05, + "loss": 0.9916, "step": 2904 }, { - "epoch": 0.6091423778569931, - "grad_norm": 5.466153132829219, - "learning_rate": 1.8470896386467038e-05, - "loss": 1.7392, + "epoch": 0.4101948602089805, + "grad_norm": 4.040127056152281, + "learning_rate": 1.9408745721165036e-05, + "loss": 1.1525, "step": 2905 }, { - "epoch": 0.6093520654225204, - "grad_norm": 5.883750662777273, - "learning_rate": 1.8469693027846213e-05, - "loss": 2.1267, + "epoch": 0.4103360632589664, + "grad_norm": 4.279249903489633, + "learning_rate": 1.94082291955766e-05, + "loss": 1.1097, "step": 2906 }, { - "epoch": 0.6095617529880478, - "grad_norm": 5.2252626889403935, - "learning_rate": 1.846848923513893e-05, - "loss": 2.1191, + "epoch": 0.41047726630895226, + "grad_norm": 4.112230179182728, + "learning_rate": 1.940771245134498e-05, + "loss": 0.9315, "step": 2907 }, { - "epoch": 0.6097714405535751, - "grad_norm": 6.119880745658791, - "learning_rate": 1.8467285008406884e-05, - "loss": 2.03, + "epoch": 0.41061846935893814, + "grad_norm": 4.589461858156398, + "learning_rate": 1.9407195488482185e-05, + "loss": 1.1118, "step": 2908 }, { - "epoch": 0.6099811281191025, - "grad_norm": 5.6369860322041, - "learning_rate": 1.84660803477118e-05, - "loss": 1.9044, + "epoch": 0.410759672408924, + "grad_norm": 4.845963577400727, + "learning_rate": 1.9406678307000232e-05, + "loss": 1.1199, "step": 2909 }, { - "epoch": 0.6101908156846299, - "grad_norm": 4.9913954117609975, - "learning_rate": 1.846487525311541e-05, - "loss": 1.7837, + "epoch": 0.4109008754589099, + "grad_norm": 3.2962672664968475, + "learning_rate": 1.9406160906911137e-05, + "loss": 0.8427, "step": 2910 }, { - "epoch": 0.6104005032501573, - "grad_norm": 6.466961569951358, - "learning_rate": 1.8463669724679488e-05, - "loss": 2.0469, + "epoch": 0.4110420785088958, + "grad_norm": 3.8762616371397773, + "learning_rate": 1.940564328822692e-05, + "loss": 1.0769, "step": 2911 }, { - "epoch": 0.6106101908156847, - "grad_norm": 5.69077776061508, - "learning_rate": 1.8462463762465808e-05, - "loss": 1.6832, + "epoch": 0.4111832815588817, + "grad_norm": 3.6760579977394547, + "learning_rate": 1.9405125450959623e-05, + "loss": 0.947, "step": 2912 }, { - "epoch": 0.610819878381212, - "grad_norm": 5.577767689681743, - "learning_rate": 1.8461257366536186e-05, - "loss": 1.7935, + "epoch": 0.41132448460886756, + "grad_norm": 4.205177159808833, + "learning_rate": 1.9404607395121266e-05, + "loss": 0.9743, "step": 2913 }, { - "epoch": 0.6110295659467394, - "grad_norm": 5.793788948469559, - "learning_rate": 1.8460050536952443e-05, - "loss": 1.9819, + "epoch": 0.41146568765885344, + "grad_norm": 3.9307308571303965, + "learning_rate": 1.94040891207239e-05, + "loss": 1.0163, "step": 2914 }, { - "epoch": 0.6112392535122667, - "grad_norm": 6.475328337335335, - "learning_rate": 1.8458843273776442e-05, - "loss": 2.1086, + "epoch": 0.4116068907088393, + "grad_norm": 4.162641929604948, + "learning_rate": 1.940357062777956e-05, + "loss": 0.988, "step": 2915 }, { - "epoch": 0.6114489410777941, - "grad_norm": 6.185528942463991, - "learning_rate": 1.845763557707005e-05, - "loss": 2.3214, + "epoch": 0.4117480937588252, + "grad_norm": 5.304032545216394, + "learning_rate": 1.9403051916300296e-05, + "loss": 1.1511, "step": 2916 }, { - "epoch": 0.6116586286433214, - "grad_norm": 6.6081351299207345, - "learning_rate": 1.8456427446895168e-05, - "loss": 2.0832, + "epoch": 0.4118892968088111, + "grad_norm": 5.967033691460604, + "learning_rate": 1.940253298629817e-05, + "loss": 1.1816, "step": 2917 }, { - "epoch": 0.6118683162088489, - "grad_norm": 5.908546059484214, - "learning_rate": 1.845521888331371e-05, - "loss": 2.1168, + "epoch": 0.412030499858797, + "grad_norm": 3.7326198418494068, + "learning_rate": 1.9402013837785242e-05, + "loss": 0.9557, "step": 2918 }, { - "epoch": 0.6120780037743762, - "grad_norm": 6.0208292537308825, - "learning_rate": 1.8454009886387617e-05, - "loss": 2.0028, + "epoch": 0.41217170290878286, + "grad_norm": 3.7592119236457977, + "learning_rate": 1.940149447077357e-05, + "loss": 0.9411, "step": 2919 }, { - "epoch": 0.6122876913399036, - "grad_norm": 6.273060215600239, - "learning_rate": 1.845280045617886e-05, - "loss": 2.4412, + "epoch": 0.4123129059587687, + "grad_norm": 4.62032473246964, + "learning_rate": 1.9400974885275226e-05, + "loss": 0.9074, "step": 2920 }, { - "epoch": 0.6124973789054309, - "grad_norm": 5.8018969512882945, - "learning_rate": 1.845159059274942e-05, - "loss": 1.9969, + "epoch": 0.41245410900875457, + "grad_norm": 4.440049454858506, + "learning_rate": 1.9400455081302287e-05, + "loss": 1.162, "step": 2921 }, { - "epoch": 0.6127070664709583, - "grad_norm": 6.174908830481501, - "learning_rate": 1.84503802961613e-05, - "loss": 2.0131, + "epoch": 0.41259531205874045, + "grad_norm": 3.7492538552338694, + "learning_rate": 1.939993505886683e-05, + "loss": 0.974, "step": 2922 }, { - "epoch": 0.6129167540364856, - "grad_norm": 7.121011420175259, - "learning_rate": 1.844916956647653e-05, - "loss": 2.2599, + "epoch": 0.41273651510872633, + "grad_norm": 3.8826384689302667, + "learning_rate": 1.9399414817980945e-05, + "loss": 1.0394, "step": 2923 }, { - "epoch": 0.613126441602013, - "grad_norm": 5.676948897530075, - "learning_rate": 1.8447958403757172e-05, - "loss": 1.6529, + "epoch": 0.4128777181587122, + "grad_norm": 4.3725984902259105, + "learning_rate": 1.9398894358656713e-05, + "loss": 1.0699, "step": 2924 }, { - "epoch": 0.6133361291675403, - "grad_norm": 6.242963608964129, - "learning_rate": 1.844674680806529e-05, - "loss": 1.6443, + "epoch": 0.4130189212086981, + "grad_norm": 3.793479845934427, + "learning_rate": 1.9398373680906242e-05, + "loss": 0.9866, "step": 2925 }, { - "epoch": 0.6135458167330677, - "grad_norm": 6.116517228764057, - "learning_rate": 1.8445534779462982e-05, - "loss": 2.2042, + "epoch": 0.413160124258684, + "grad_norm": 4.1472125644515625, + "learning_rate": 1.939785278474162e-05, + "loss": 1.1752, "step": 2926 }, { - "epoch": 0.613755504298595, - "grad_norm": 6.036515936567746, - "learning_rate": 1.8444322318012373e-05, - "loss": 2.0309, + "epoch": 0.41330132730866986, + "grad_norm": 4.519306232436554, + "learning_rate": 1.9397331670174958e-05, + "loss": 1.2904, "step": 2927 }, { - "epoch": 0.6139651918641225, - "grad_norm": 6.007903017768495, - "learning_rate": 1.84431094237756e-05, - "loss": 1.963, + "epoch": 0.41344253035865575, + "grad_norm": 4.253264277881394, + "learning_rate": 1.9396810337218373e-05, + "loss": 1.134, "step": 2928 }, { - "epoch": 0.6141748794296498, - "grad_norm": 5.136990563195864, - "learning_rate": 1.844189609681482e-05, - "loss": 1.8762, + "epoch": 0.41358373340864163, + "grad_norm": 4.070477433674152, + "learning_rate": 1.9396288785883968e-05, + "loss": 1.1479, "step": 2929 }, { - "epoch": 0.6143845669951772, - "grad_norm": 6.140688288583342, - "learning_rate": 1.8440682337192224e-05, - "loss": 2.1942, + "epoch": 0.4137249364586275, + "grad_norm": 3.938217226718001, + "learning_rate": 1.939576701618387e-05, + "loss": 1.1257, "step": 2930 }, { - "epoch": 0.6145942545607046, - "grad_norm": 5.538283354234424, - "learning_rate": 1.8439468144970022e-05, - "loss": 1.882, + "epoch": 0.4138661395086134, + "grad_norm": 3.982918332204933, + "learning_rate": 1.9395245028130205e-05, + "loss": 1.1336, "step": 2931 }, { - "epoch": 0.6148039421262319, - "grad_norm": 5.2441375597442255, - "learning_rate": 1.843825352021044e-05, - "loss": 2.1779, + "epoch": 0.4140073425585993, + "grad_norm": 3.9098492968515135, + "learning_rate": 1.9394722821735105e-05, + "loss": 1.1502, "step": 2932 }, { - "epoch": 0.6150136296917593, - "grad_norm": 5.956895783713266, - "learning_rate": 1.8437038462975728e-05, - "loss": 1.7479, + "epoch": 0.41414854560858516, + "grad_norm": 3.731447089150116, + "learning_rate": 1.93942003970107e-05, + "loss": 0.9934, "step": 2933 }, { - "epoch": 0.6152233172572866, - "grad_norm": 6.392289428765457, - "learning_rate": 1.843582297332816e-05, - "loss": 1.8556, + "epoch": 0.41428974865857104, + "grad_norm": 3.630236914862187, + "learning_rate": 1.9393677753969137e-05, + "loss": 0.976, "step": 2934 }, { - "epoch": 0.615433004822814, - "grad_norm": 6.78811861493254, - "learning_rate": 1.8434607051330034e-05, - "loss": 1.9108, + "epoch": 0.4144309517085569, + "grad_norm": 3.9682240211213013, + "learning_rate": 1.939315489262256e-05, + "loss": 1.0208, "step": 2935 }, { - "epoch": 0.6156426923883414, - "grad_norm": 6.404002829575354, - "learning_rate": 1.8433390697043668e-05, - "loss": 1.9437, + "epoch": 0.4145721547585428, + "grad_norm": 4.4969947139892055, + "learning_rate": 1.939263181298312e-05, + "loss": 1.0882, "step": 2936 }, { - "epoch": 0.6158523799538688, - "grad_norm": 6.311719832625062, - "learning_rate": 1.8432173910531405e-05, - "loss": 2.08, + "epoch": 0.41471335780852864, + "grad_norm": 4.644199723029871, + "learning_rate": 1.9392108515062973e-05, + "loss": 1.1258, "step": 2937 }, { - "epoch": 0.6160620675193961, - "grad_norm": 5.801134861337478, - "learning_rate": 1.84309566918556e-05, - "loss": 1.8332, + "epoch": 0.4148545608585145, + "grad_norm": 3.5982982846682816, + "learning_rate": 1.939158499887428e-05, + "loss": 1.0078, "step": 2938 }, { - "epoch": 0.6162717550849235, - "grad_norm": 5.843811089819831, - "learning_rate": 1.8429739041078646e-05, - "loss": 2.1143, + "epoch": 0.4149957639085004, + "grad_norm": 3.8268172344085145, + "learning_rate": 1.9391061264429207e-05, + "loss": 0.936, "step": 2939 }, { - "epoch": 0.6164814426504508, - "grad_norm": 5.484120365084043, - "learning_rate": 1.8428520958262942e-05, - "loss": 1.8145, + "epoch": 0.4151369669584863, + "grad_norm": 4.660959823668172, + "learning_rate": 1.9390537311739927e-05, + "loss": 1.4282, "step": 2940 }, { - "epoch": 0.6166911302159782, - "grad_norm": 6.407809558323341, - "learning_rate": 1.842730244347092e-05, - "loss": 1.8818, + "epoch": 0.41527817000847217, + "grad_norm": 4.14310241056, + "learning_rate": 1.9390013140818612e-05, + "loss": 1.268, "step": 2941 }, { - "epoch": 0.6169008177815055, - "grad_norm": 6.272648428434445, - "learning_rate": 1.8426083496765033e-05, - "loss": 2.0181, + "epoch": 0.41541937305845805, + "grad_norm": 5.6555251825391295, + "learning_rate": 1.938948875167745e-05, + "loss": 1.2468, "step": 2942 }, { - "epoch": 0.6171105053470329, - "grad_norm": 6.0755994469483285, - "learning_rate": 1.842486411820775e-05, - "loss": 2.0487, + "epoch": 0.41556057610844394, + "grad_norm": 3.7502422429615314, + "learning_rate": 1.9388964144328626e-05, + "loss": 1.0252, "step": 2943 }, { - "epoch": 0.6173201929125602, - "grad_norm": 5.792865816084123, - "learning_rate": 1.8423644307861573e-05, - "loss": 2.2358, + "epoch": 0.4157017791584298, + "grad_norm": 3.98705285787874, + "learning_rate": 1.9388439318784328e-05, + "loss": 1.1134, "step": 2944 }, { - "epoch": 0.6175298804780877, - "grad_norm": 5.925759022180038, - "learning_rate": 1.8422424065789012e-05, - "loss": 1.8529, + "epoch": 0.4158429822084157, + "grad_norm": 3.6240863322419505, + "learning_rate": 1.9387914275056754e-05, + "loss": 0.9367, "step": 2945 }, { - "epoch": 0.617739568043615, - "grad_norm": 6.70922830796199, - "learning_rate": 1.8421203392052612e-05, - "loss": 2.1349, + "epoch": 0.4159841852584016, + "grad_norm": 3.553166782745452, + "learning_rate": 1.9387389013158108e-05, + "loss": 0.9547, "step": 2946 }, { - "epoch": 0.6179492556091424, - "grad_norm": 4.754658103579967, - "learning_rate": 1.841998228671493e-05, - "loss": 1.9735, + "epoch": 0.41612538830838747, + "grad_norm": 3.981078488280875, + "learning_rate": 1.9386863533100597e-05, + "loss": 1.1791, "step": 2947 }, { - "epoch": 0.6181589431746698, - "grad_norm": 5.870961551358106, - "learning_rate": 1.8418760749838554e-05, - "loss": 2.1093, + "epoch": 0.41626659135837335, + "grad_norm": 3.376660750677664, + "learning_rate": 1.9386337834896428e-05, + "loss": 0.8798, "step": 2948 }, { - "epoch": 0.6183686307401971, - "grad_norm": 6.554197846675178, - "learning_rate": 1.8417538781486087e-05, - "loss": 2.0117, + "epoch": 0.41640779440835923, + "grad_norm": 4.26818289019022, + "learning_rate": 1.9385811918557822e-05, + "loss": 0.993, "step": 2949 }, { - "epoch": 0.6185783183057245, - "grad_norm": 5.816861469325661, - "learning_rate": 1.8416316381720157e-05, - "loss": 1.8815, + "epoch": 0.4165489974583451, + "grad_norm": 4.536825714719432, + "learning_rate": 1.9385285784097e-05, + "loss": 1.1402, "step": 2950 }, { - "epoch": 0.6187880058712518, - "grad_norm": 5.599796922541357, - "learning_rate": 1.8415093550603417e-05, - "loss": 1.8963, + "epoch": 0.416690200508331, + "grad_norm": 3.6106692925528727, + "learning_rate": 1.9384759431526192e-05, + "loss": 0.7852, "step": 2951 }, { - "epoch": 0.6189976934367792, - "grad_norm": 7.049869665461245, - "learning_rate": 1.8413870288198537e-05, - "loss": 2.0832, + "epoch": 0.4168314035583169, + "grad_norm": 4.147406182716625, + "learning_rate": 1.9384232860857627e-05, + "loss": 1.1421, "step": 2952 }, { - "epoch": 0.6192073810023065, - "grad_norm": 6.1240343175288645, - "learning_rate": 1.841264659456821e-05, - "loss": 1.8794, + "epoch": 0.41697260660830276, + "grad_norm": 4.885322473280678, + "learning_rate": 1.938370607210354e-05, + "loss": 1.3687, "step": 2953 }, { - "epoch": 0.619417068567834, - "grad_norm": 6.781324922899264, - "learning_rate": 1.8411422469775157e-05, - "loss": 1.9303, + "epoch": 0.4171138096582886, + "grad_norm": 4.159471897154181, + "learning_rate": 1.938317906527618e-05, + "loss": 0.9791, "step": 2954 }, { - "epoch": 0.6196267561333613, - "grad_norm": 6.103287801229008, - "learning_rate": 1.841019791388211e-05, - "loss": 1.6427, + "epoch": 0.4172550127082745, + "grad_norm": 4.1498407242052595, + "learning_rate": 1.938265184038779e-05, + "loss": 1.0724, "step": 2955 }, { - "epoch": 0.6198364436988887, - "grad_norm": 7.014494920410526, - "learning_rate": 1.8408972926951833e-05, - "loss": 2.0703, + "epoch": 0.41739621575826036, + "grad_norm": 4.5948087712234065, + "learning_rate": 1.938212439745062e-05, + "loss": 1.1297, "step": 2956 }, { - "epoch": 0.620046131264416, - "grad_norm": 6.659788083223147, - "learning_rate": 1.840774750904711e-05, - "loss": 2.0225, + "epoch": 0.41753741880824624, + "grad_norm": 4.893705377001965, + "learning_rate": 1.9381596736476936e-05, + "loss": 1.0395, "step": 2957 }, { - "epoch": 0.6202558188299434, - "grad_norm": 6.841402058639845, - "learning_rate": 1.840652166023075e-05, - "loss": 2.2322, + "epoch": 0.4176786218582321, + "grad_norm": 4.1482513911701595, + "learning_rate": 1.9381068857478994e-05, + "loss": 1.1519, "step": 2958 }, { - "epoch": 0.6204655063954707, - "grad_norm": 5.490412568410373, - "learning_rate": 1.840529538056557e-05, - "loss": 1.8545, + "epoch": 0.417819824908218, + "grad_norm": 4.757438876955242, + "learning_rate": 1.938054076046906e-05, + "loss": 1.3424, "step": 2959 }, { - "epoch": 0.6206751939609981, - "grad_norm": 5.641754294915338, - "learning_rate": 1.840406867011442e-05, - "loss": 1.9678, + "epoch": 0.4179610279582039, + "grad_norm": 4.995694320864768, + "learning_rate": 1.938001244545941e-05, + "loss": 1.2273, "step": 2960 }, { - "epoch": 0.6208848815265254, - "grad_norm": 6.423084300056922, - "learning_rate": 1.8402841528940178e-05, - "loss": 1.9471, + "epoch": 0.4181022310081898, + "grad_norm": 3.786883865536849, + "learning_rate": 1.9379483912462326e-05, + "loss": 1.0798, "step": 2961 }, { - "epoch": 0.6210945690920529, - "grad_norm": 6.243026150690286, - "learning_rate": 1.8401613957105735e-05, - "loss": 1.8033, + "epoch": 0.41824343405817566, + "grad_norm": 3.5285318571271076, + "learning_rate": 1.9378955161490086e-05, + "loss": 1.0306, "step": 2962 }, { - "epoch": 0.6213042566575802, - "grad_norm": 5.355698002207967, - "learning_rate": 1.8400385954674002e-05, - "loss": 1.8766, + "epoch": 0.41838463710816154, + "grad_norm": 4.084789505881134, + "learning_rate": 1.9378426192554975e-05, + "loss": 1.0592, "step": 2963 }, { - "epoch": 0.6215139442231076, - "grad_norm": 7.491708880422434, - "learning_rate": 1.8399157521707918e-05, - "loss": 1.963, + "epoch": 0.4185258401581474, + "grad_norm": 3.6749399432449517, + "learning_rate": 1.937789700566929e-05, + "loss": 0.8817, "step": 2964 }, { - "epoch": 0.621723631788635, - "grad_norm": 6.001839891011696, - "learning_rate": 1.839792865827045e-05, - "loss": 1.9395, + "epoch": 0.4186670432081333, + "grad_norm": 4.001618647336214, + "learning_rate": 1.9377367600845333e-05, + "loss": 0.9592, "step": 2965 }, { - "epoch": 0.6219333193541623, - "grad_norm": 7.076759551407105, - "learning_rate": 1.839669936442457e-05, - "loss": 1.9208, + "epoch": 0.4188082462581192, + "grad_norm": 3.716854461116892, + "learning_rate": 1.93768379780954e-05, + "loss": 1.1373, "step": 2966 }, { - "epoch": 0.6221430069196897, - "grad_norm": 6.499574473097217, - "learning_rate": 1.8395469640233277e-05, - "loss": 2.0457, + "epoch": 0.41894944930810507, + "grad_norm": 5.1639864738998575, + "learning_rate": 1.9376308137431802e-05, + "loss": 1.3133, "step": 2967 }, { - "epoch": 0.622352694485217, - "grad_norm": 6.242789360030616, - "learning_rate": 1.8394239485759613e-05, - "loss": 1.8231, + "epoch": 0.41909065235809095, + "grad_norm": 3.092885078183632, + "learning_rate": 1.937577807886685e-05, + "loss": 0.7712, "step": 2968 }, { - "epoch": 0.6225623820507444, - "grad_norm": 6.082853195416612, - "learning_rate": 1.8393008901066612e-05, - "loss": 1.9834, + "epoch": 0.41923185540807684, + "grad_norm": 5.78927822030806, + "learning_rate": 1.9375247802412867e-05, + "loss": 1.385, "step": 2969 }, { - "epoch": 0.6227720696162717, - "grad_norm": 6.362345890634872, - "learning_rate": 1.839177788621735e-05, - "loss": 2.1581, + "epoch": 0.4193730584580627, + "grad_norm": 3.7088723792850042, + "learning_rate": 1.9374717308082172e-05, + "loss": 1.0394, "step": 2970 }, { - "epoch": 0.6229817571817992, - "grad_norm": 5.440557014113412, - "learning_rate": 1.8390546441274914e-05, - "loss": 1.9935, + "epoch": 0.41951426150804855, + "grad_norm": 4.356329954290291, + "learning_rate": 1.9374186595887096e-05, + "loss": 1.1356, "step": 2971 }, { - "epoch": 0.6231914447473265, - "grad_norm": 5.413578860244006, - "learning_rate": 1.8389314566302422e-05, - "loss": 2.0807, + "epoch": 0.41965546455803443, + "grad_norm": 3.8486158356130757, + "learning_rate": 1.9373655665839973e-05, + "loss": 1.1013, "step": 2972 }, { - "epoch": 0.6234011323128539, - "grad_norm": 5.253535948966556, - "learning_rate": 1.838808226136301e-05, - "loss": 1.8844, + "epoch": 0.4197966676080203, + "grad_norm": 3.6979239248404605, + "learning_rate": 1.937312451795314e-05, + "loss": 1.0834, "step": 2973 }, { - "epoch": 0.6236108198783812, - "grad_norm": 6.026476148171401, - "learning_rate": 1.838684952651983e-05, - "loss": 1.7923, + "epoch": 0.4199378706580062, + "grad_norm": 4.2480244243582765, + "learning_rate": 1.937259315223894e-05, + "loss": 1.2392, "step": 2974 }, { - "epoch": 0.6238205074439086, - "grad_norm": 6.031666843340524, - "learning_rate": 1.8385616361836066e-05, - "loss": 2.072, + "epoch": 0.4200790737079921, + "grad_norm": 3.821583404521679, + "learning_rate": 1.937206156870973e-05, + "loss": 0.868, "step": 2975 }, { - "epoch": 0.6240301950094359, - "grad_norm": 5.831357455822318, - "learning_rate": 1.8384382767374923e-05, - "loss": 1.6445, + "epoch": 0.42022027675797796, + "grad_norm": 3.4317261341639234, + "learning_rate": 1.937152976737785e-05, + "loss": 0.8321, "step": 2976 }, { - "epoch": 0.6242398825749633, - "grad_norm": 5.873151157900796, - "learning_rate": 1.8383148743199615e-05, - "loss": 2.2271, + "epoch": 0.42036147980796384, + "grad_norm": 4.4689300338247, + "learning_rate": 1.9370997748255665e-05, + "loss": 1.3988, "step": 2977 }, { - "epoch": 0.6244495701404906, - "grad_norm": 6.399140507553066, - "learning_rate": 1.8381914289373396e-05, - "loss": 1.931, + "epoch": 0.42050268285794973, + "grad_norm": 4.343532653965976, + "learning_rate": 1.937046551135554e-05, + "loss": 1.1517, "step": 2978 }, { - "epoch": 0.624659257706018, - "grad_norm": 5.939862297556723, - "learning_rate": 1.8380679405959535e-05, - "loss": 1.4384, + "epoch": 0.4206438859079356, + "grad_norm": 4.110860584375025, + "learning_rate": 1.936993305668984e-05, + "loss": 0.9158, "step": 2979 }, { - "epoch": 0.6248689452715454, - "grad_norm": 7.985760661391244, - "learning_rate": 1.8379444093021317e-05, - "loss": 2.1693, + "epoch": 0.4207850889579215, + "grad_norm": 3.8065787826575934, + "learning_rate": 1.9369400384270948e-05, + "loss": 1.0486, "step": 2980 }, { - "epoch": 0.6250786328370728, - "grad_norm": 6.134582472468553, - "learning_rate": 1.8378208350622056e-05, - "loss": 2.2313, + "epoch": 0.4209262920079074, + "grad_norm": 3.541050117612057, + "learning_rate": 1.936886749411124e-05, + "loss": 0.9584, "step": 2981 }, { - "epoch": 0.6252883204026001, - "grad_norm": 7.330600879974096, - "learning_rate": 1.8376972178825085e-05, - "loss": 2.4532, + "epoch": 0.42106749505789326, + "grad_norm": 4.376819041779908, + "learning_rate": 1.9368334386223092e-05, + "loss": 1.3686, "step": 2982 }, { - "epoch": 0.6254980079681275, - "grad_norm": 7.01349886967936, - "learning_rate": 1.8375735577693763e-05, - "loss": 2.2038, + "epoch": 0.42120869810787914, + "grad_norm": 4.022992484473648, + "learning_rate": 1.93678010606189e-05, + "loss": 1.2296, "step": 2983 }, { - "epoch": 0.6257076955336549, - "grad_norm": 6.566217796768941, - "learning_rate": 1.8374498547291467e-05, - "loss": 1.8165, + "epoch": 0.421349901157865, + "grad_norm": 3.587734342463129, + "learning_rate": 1.9367267517311057e-05, + "loss": 0.9388, "step": 2984 }, { - "epoch": 0.6259173830991822, - "grad_norm": 6.2582402601021885, - "learning_rate": 1.837326108768159e-05, - "loss": 1.7524, + "epoch": 0.4214911042078509, + "grad_norm": 4.75393160614606, + "learning_rate": 1.936673375631196e-05, + "loss": 1.2229, "step": 2985 }, { - "epoch": 0.6261270706647096, - "grad_norm": 6.265951401991545, - "learning_rate": 1.837202319892756e-05, - "loss": 1.9066, + "epoch": 0.4216323072578368, + "grad_norm": 3.5073132722201636, + "learning_rate": 1.9366199777634018e-05, + "loss": 1.048, "step": 2986 }, { - "epoch": 0.6263367582302369, - "grad_norm": 6.368107366229069, - "learning_rate": 1.8370784881092823e-05, - "loss": 2.1225, + "epoch": 0.4217735103078227, + "grad_norm": 3.910440585809235, + "learning_rate": 1.936566558128964e-05, + "loss": 0.978, "step": 2987 }, { - "epoch": 0.6265464457957644, - "grad_norm": 5.564079796347212, - "learning_rate": 1.8369546134240844e-05, - "loss": 2.1955, + "epoch": 0.4219147133578085, + "grad_norm": 4.666741046696743, + "learning_rate": 1.9365131167291237e-05, + "loss": 1.1699, "step": 2988 }, { - "epoch": 0.6267561333612917, - "grad_norm": 6.5435462682551595, - "learning_rate": 1.8368306958435108e-05, - "loss": 1.8207, + "epoch": 0.4220559164077944, + "grad_norm": 4.5289714222021535, + "learning_rate": 1.936459653565123e-05, + "loss": 1.2689, "step": 2989 }, { - "epoch": 0.6269658209268191, - "grad_norm": 6.430563097358877, - "learning_rate": 1.8367067353739125e-05, - "loss": 2.0898, + "epoch": 0.42219711945778027, + "grad_norm": 4.762993725661353, + "learning_rate": 1.9364061686382042e-05, + "loss": 1.2597, "step": 2990 }, { - "epoch": 0.6271755084923464, - "grad_norm": 6.154208578228744, - "learning_rate": 1.836582732021643e-05, - "loss": 1.6919, + "epoch": 0.42233832250776615, + "grad_norm": 4.069427959080039, + "learning_rate": 1.9363526619496106e-05, + "loss": 0.8957, "step": 2991 }, { - "epoch": 0.6273851960578738, - "grad_norm": 5.764625633442794, - "learning_rate": 1.8364586857930574e-05, - "loss": 1.7333, + "epoch": 0.42247952555775203, + "grad_norm": 3.3439384686161384, + "learning_rate": 1.9362991335005853e-05, + "loss": 0.9248, "step": 2992 }, { - "epoch": 0.6275948836234011, - "grad_norm": 5.602025141125753, - "learning_rate": 1.8363345966945136e-05, - "loss": 1.7396, + "epoch": 0.4226207286077379, + "grad_norm": 4.225208176149682, + "learning_rate": 1.9362455832923726e-05, + "loss": 1.1362, "step": 2993 }, { - "epoch": 0.6278045711889285, - "grad_norm": 6.205866619625006, - "learning_rate": 1.836210464732371e-05, - "loss": 2.002, + "epoch": 0.4227619316577238, + "grad_norm": 3.639864023243686, + "learning_rate": 1.9361920113262172e-05, + "loss": 1.0212, "step": 2994 }, { - "epoch": 0.6280142587544558, - "grad_norm": 5.489230757671925, - "learning_rate": 1.8360862899129917e-05, - "loss": 1.9006, + "epoch": 0.4229031347077097, + "grad_norm": 4.369128058571234, + "learning_rate": 1.9361384176033637e-05, + "loss": 1.3446, "step": 2995 }, { - "epoch": 0.6282239463199832, - "grad_norm": 6.075765108712726, - "learning_rate": 1.83596207224274e-05, - "loss": 2.0385, + "epoch": 0.42304433775769557, + "grad_norm": 4.327988460038719, + "learning_rate": 1.9360848021250573e-05, + "loss": 1.0235, "step": 2996 }, { - "epoch": 0.6284336338855105, - "grad_norm": 5.7565748011513165, - "learning_rate": 1.835837811727982e-05, - "loss": 2.3448, + "epoch": 0.42318554080768145, + "grad_norm": 4.366791415942107, + "learning_rate": 1.9360311648925448e-05, + "loss": 1.197, "step": 2997 }, { - "epoch": 0.628643321451038, - "grad_norm": 5.5024067343373435, - "learning_rate": 1.8357135083750867e-05, - "loss": 1.973, + "epoch": 0.42332674385766733, + "grad_norm": 4.050730950424255, + "learning_rate": 1.935977505907072e-05, + "loss": 1.1757, "step": 2998 }, { - "epoch": 0.6288530090165653, - "grad_norm": 5.86692485272245, - "learning_rate": 1.8355891621904245e-05, - "loss": 1.9835, + "epoch": 0.4234679469076532, + "grad_norm": 4.0717476681005085, + "learning_rate": 1.9359238251698862e-05, + "loss": 1.0177, "step": 2999 }, { - "epoch": 0.6290626965820927, - "grad_norm": 5.805337766616843, - "learning_rate": 1.8354647731803687e-05, - "loss": 2.1475, + "epoch": 0.4236091499576391, + "grad_norm": 5.236325766803049, + "learning_rate": 1.935870122682235e-05, + "loss": 1.0793, "step": 3000 }, { - "epoch": 0.6292723841476201, - "grad_norm": 6.881706936242785, - "learning_rate": 1.835340341351294e-05, - "loss": 2.1763, + "epoch": 0.423750353007625, + "grad_norm": 3.4543363743118687, + "learning_rate": 1.935816398445366e-05, + "loss": 0.935, "step": 3001 }, { - "epoch": 0.6294820717131474, - "grad_norm": 6.445833120415932, - "learning_rate": 1.835215866709578e-05, - "loss": 1.9523, + "epoch": 0.42389155605761086, + "grad_norm": 4.594292267265184, + "learning_rate": 1.9357626524605286e-05, + "loss": 1.426, "step": 3002 }, { - "epoch": 0.6296917592786748, - "grad_norm": 7.641125533660694, - "learning_rate": 1.8350913492616e-05, - "loss": 2.1747, + "epoch": 0.42403275910759675, + "grad_norm": 3.2856845464798066, + "learning_rate": 1.9357088847289705e-05, + "loss": 0.9806, "step": 3003 }, { - "epoch": 0.6299014468442021, - "grad_norm": 6.585319856334905, - "learning_rate": 1.8349667890137425e-05, - "loss": 2.3402, + "epoch": 0.42417396215758263, + "grad_norm": 4.356371456307121, + "learning_rate": 1.935655095251943e-05, + "loss": 1.0419, "step": 3004 }, { - "epoch": 0.6301111344097295, - "grad_norm": 5.86932865879389, - "learning_rate": 1.8348421859723886e-05, - "loss": 1.8128, + "epoch": 0.42431516520756846, + "grad_norm": 3.2771613212080495, + "learning_rate": 1.9356012840306945e-05, + "loss": 0.8717, "step": 3005 }, { - "epoch": 0.6303208219752569, - "grad_norm": 6.8128799455015585, - "learning_rate": 1.8347175401439245e-05, - "loss": 2.2549, + "epoch": 0.42445636825755434, + "grad_norm": 4.722649611407031, + "learning_rate": 1.9355474510664763e-05, + "loss": 1.0947, "step": 3006 }, { - "epoch": 0.6305305095407843, - "grad_norm": 6.235016165116718, - "learning_rate": 1.834592851534739e-05, - "loss": 2.0222, + "epoch": 0.4245975713075402, + "grad_norm": 3.985001307257452, + "learning_rate": 1.9354935963605395e-05, + "loss": 1.0849, "step": 3007 }, { - "epoch": 0.6307401971063116, - "grad_norm": 5.670669593581825, - "learning_rate": 1.8344681201512223e-05, - "loss": 1.9152, + "epoch": 0.4247387743575261, + "grad_norm": 4.034451347047195, + "learning_rate": 1.9354397199141356e-05, + "loss": 0.9316, "step": 3008 }, { - "epoch": 0.630949884671839, - "grad_norm": 6.0532093687989414, - "learning_rate": 1.834343345999767e-05, - "loss": 2.2976, + "epoch": 0.424879977407512, + "grad_norm": 3.9485174388475808, + "learning_rate": 1.935385821728516e-05, + "loss": 1.0944, "step": 3009 }, { - "epoch": 0.6311595722373663, - "grad_norm": 5.627434316039015, - "learning_rate": 1.8342185290867683e-05, - "loss": 1.6983, + "epoch": 0.42502118045749787, + "grad_norm": 3.7002010891142385, + "learning_rate": 1.9353319018049346e-05, + "loss": 0.9603, "step": 3010 }, { - "epoch": 0.6313692598028937, - "grad_norm": 5.4178916979648415, - "learning_rate": 1.8340936694186232e-05, - "loss": 2.02, + "epoch": 0.42516238350748375, + "grad_norm": 4.910970112165826, + "learning_rate": 1.9352779601446435e-05, + "loss": 1.1746, "step": 3011 }, { - "epoch": 0.631578947368421, - "grad_norm": 5.8437081006531875, - "learning_rate": 1.8339687670017306e-05, - "loss": 2.0354, + "epoch": 0.42530358655746964, + "grad_norm": 3.7963204765837193, + "learning_rate": 1.9352239967488965e-05, + "loss": 1.0846, "step": 3012 }, { - "epoch": 0.6317886349339484, - "grad_norm": 5.661663000980645, - "learning_rate": 1.8338438218424923e-05, - "loss": 2.3399, + "epoch": 0.4254447896074555, + "grad_norm": 4.570854899293397, + "learning_rate": 1.9351700116189474e-05, + "loss": 1.1241, "step": 3013 }, { - "epoch": 0.6319983224994757, - "grad_norm": 6.353765604564318, - "learning_rate": 1.833718833947312e-05, - "loss": 2.0716, + "epoch": 0.4255859926574414, + "grad_norm": 4.915726820841134, + "learning_rate": 1.9351160047560516e-05, + "loss": 1.2272, "step": 3014 }, { - "epoch": 0.6322080100650032, - "grad_norm": 5.75083910948988, - "learning_rate": 1.8335938033225954e-05, - "loss": 1.99, + "epoch": 0.4257271957074273, + "grad_norm": 4.134927119536651, + "learning_rate": 1.9350619761614634e-05, + "loss": 1.0089, "step": 3015 }, { - "epoch": 0.6324176976305305, - "grad_norm": 5.747361601818475, - "learning_rate": 1.8334687299747505e-05, - "loss": 1.9127, + "epoch": 0.42586839875741317, + "grad_norm": 3.6027544377206686, + "learning_rate": 1.935007925836439e-05, + "loss": 0.7604, "step": 3016 }, { - "epoch": 0.6326273851960579, - "grad_norm": 6.457989111053694, - "learning_rate": 1.8333436139101876e-05, - "loss": 1.9246, + "epoch": 0.42600960180739905, + "grad_norm": 3.935618371132091, + "learning_rate": 1.9349538537822342e-05, + "loss": 1.0387, "step": 3017 }, { - "epoch": 0.6328370727615852, - "grad_norm": 6.231392402406366, - "learning_rate": 1.8332184551353193e-05, - "loss": 2.2198, + "epoch": 0.42615080485738493, + "grad_norm": 4.136742954892019, + "learning_rate": 1.9348997600001052e-05, + "loss": 1.2162, "step": 3018 }, { - "epoch": 0.6330467603271126, - "grad_norm": 6.310874894429705, - "learning_rate": 1.83309325365656e-05, - "loss": 1.7845, + "epoch": 0.4262920079073708, + "grad_norm": 3.907553560672167, + "learning_rate": 1.9348456444913098e-05, + "loss": 1.0481, "step": 3019 }, { - "epoch": 0.63325644789264, - "grad_norm": 5.795178709519663, - "learning_rate": 1.8329680094803265e-05, - "loss": 1.7572, + "epoch": 0.4264332109573567, + "grad_norm": 3.41435332853642, + "learning_rate": 1.934791507257105e-05, + "loss": 0.8571, "step": 3020 }, { - "epoch": 0.6334661354581673, - "grad_norm": 6.35618531789427, - "learning_rate": 1.832842722613038e-05, - "loss": 2.0481, + "epoch": 0.4265744140073426, + "grad_norm": 3.12967297619145, + "learning_rate": 1.9347373482987497e-05, + "loss": 0.7906, "step": 3021 }, { - "epoch": 0.6336758230236947, - "grad_norm": 6.0666129248367655, - "learning_rate": 1.832717393061115e-05, - "loss": 2.2085, + "epoch": 0.4267156170573284, + "grad_norm": 3.463511263098065, + "learning_rate": 1.934683167617502e-05, + "loss": 0.853, "step": 3022 }, { - "epoch": 0.633885510589222, - "grad_norm": 6.085193114885912, - "learning_rate": 1.8325920208309815e-05, - "loss": 1.9534, + "epoch": 0.4268568201073143, + "grad_norm": 3.8317183693774783, + "learning_rate": 1.9346289652146212e-05, + "loss": 0.9446, "step": 3023 }, { - "epoch": 0.6340951981547495, - "grad_norm": 6.6428480631854425, - "learning_rate": 1.832466605929063e-05, - "loss": 2.0879, + "epoch": 0.4269980231573002, + "grad_norm": 3.5875320589148276, + "learning_rate": 1.9345747410913666e-05, + "loss": 0.9279, "step": 3024 }, { - "epoch": 0.6343048857202768, - "grad_norm": 5.973788170683085, - "learning_rate": 1.832341148361787e-05, - "loss": 2.2544, + "epoch": 0.42713922620728606, + "grad_norm": 4.345646340930474, + "learning_rate": 1.934520495248999e-05, + "loss": 1.1124, "step": 3025 }, { - "epoch": 0.6345145732858042, - "grad_norm": 5.8470876802096745, - "learning_rate": 1.8322156481355836e-05, - "loss": 1.5665, + "epoch": 0.42728042925727194, + "grad_norm": 4.747252224652471, + "learning_rate": 1.9344662276887787e-05, + "loss": 1.1809, "step": 3026 }, { - "epoch": 0.6347242608513315, - "grad_norm": 5.952277485838748, - "learning_rate": 1.832090105256885e-05, - "loss": 2.2062, + "epoch": 0.4274216323072578, + "grad_norm": 3.4596524485931672, + "learning_rate": 1.9344119384119665e-05, + "loss": 0.8293, "step": 3027 }, { - "epoch": 0.6349339484168589, - "grad_norm": 6.991806731698505, - "learning_rate": 1.831964519732125e-05, - "loss": 1.8965, + "epoch": 0.4275628353572437, + "grad_norm": 5.000673678340825, + "learning_rate": 1.9343576274198246e-05, + "loss": 1.3002, "step": 3028 }, { - "epoch": 0.6351436359823862, - "grad_norm": 5.56202844364899, - "learning_rate": 1.8318388915677406e-05, - "loss": 1.7468, + "epoch": 0.4277040384072296, + "grad_norm": 4.259529042781315, + "learning_rate": 1.934303294713615e-05, + "loss": 0.8908, "step": 3029 }, { - "epoch": 0.6353533235479136, - "grad_norm": 6.136952608218808, - "learning_rate": 1.8317132207701703e-05, - "loss": 2.3373, + "epoch": 0.4278452414572155, + "grad_norm": 3.7582437419678008, + "learning_rate": 1.9342489402945997e-05, + "loss": 1.0599, "step": 3030 }, { - "epoch": 0.6355630111134409, - "grad_norm": 6.120249658991906, - "learning_rate": 1.8315875073458546e-05, - "loss": 1.7408, + "epoch": 0.42798644450720136, + "grad_norm": 3.883050328807419, + "learning_rate": 1.9341945641640432e-05, + "loss": 1.1235, "step": 3031 }, { - "epoch": 0.6357726986789684, - "grad_norm": 5.20310201475147, - "learning_rate": 1.831461751301237e-05, - "loss": 1.8269, + "epoch": 0.42812764755718724, + "grad_norm": 4.255648148703228, + "learning_rate": 1.9341401663232083e-05, + "loss": 1.2607, "step": 3032 }, { - "epoch": 0.6359823862444957, - "grad_norm": 5.952074442396482, - "learning_rate": 1.8313359526427627e-05, - "loss": 1.5961, + "epoch": 0.4282688506071731, + "grad_norm": 3.9189560493364577, + "learning_rate": 1.9340857467733595e-05, + "loss": 0.9827, "step": 3033 }, { - "epoch": 0.6361920738100231, - "grad_norm": 5.316538412482053, - "learning_rate": 1.8312101113768786e-05, - "loss": 1.9048, + "epoch": 0.428410053657159, + "grad_norm": 3.9164510829482806, + "learning_rate": 1.934031305515761e-05, + "loss": 1.0906, "step": 3034 }, { - "epoch": 0.6364017613755504, - "grad_norm": 6.7088514617458594, - "learning_rate": 1.8310842275100352e-05, - "loss": 2.0127, + "epoch": 0.4285512567071449, + "grad_norm": 3.45283831547893, + "learning_rate": 1.9339768425516786e-05, + "loss": 0.9213, "step": 3035 }, { - "epoch": 0.6366114489410778, - "grad_norm": 6.0326708649622764, - "learning_rate": 1.8309583010486832e-05, - "loss": 2.2911, + "epoch": 0.4286924597571308, + "grad_norm": 3.997385854545787, + "learning_rate": 1.933922357882378e-05, + "loss": 1.1353, "step": 3036 }, { - "epoch": 0.6368211365066052, - "grad_norm": 6.820922445631784, - "learning_rate": 1.8308323319992773e-05, - "loss": 2.1033, + "epoch": 0.42883366280711666, + "grad_norm": 3.937757684435129, + "learning_rate": 1.9338678515091243e-05, + "loss": 1.1365, "step": 3037 }, { - "epoch": 0.6370308240721325, - "grad_norm": 6.996388408391846, - "learning_rate": 1.830706320368273e-05, - "loss": 1.8596, + "epoch": 0.42897486585710254, + "grad_norm": 3.2627494341419583, + "learning_rate": 1.933813323433186e-05, + "loss": 0.8759, "step": 3038 }, { - "epoch": 0.6372405116376599, - "grad_norm": 5.5482173129981245, - "learning_rate": 1.8305802661621293e-05, - "loss": 1.947, + "epoch": 0.42911606890708837, + "grad_norm": 3.8664787103024207, + "learning_rate": 1.9337587736558286e-05, + "loss": 1.0441, "step": 3039 }, { - "epoch": 0.6374501992031872, - "grad_norm": 5.6801980060072745, - "learning_rate": 1.8304541693873066e-05, - "loss": 2.1226, + "epoch": 0.42925727195707425, + "grad_norm": 4.549203146952161, + "learning_rate": 1.933704202178321e-05, + "loss": 1.2241, "step": 3040 }, { - "epoch": 0.6376598867687147, - "grad_norm": 5.497440890326025, - "learning_rate": 1.830328030050267e-05, - "loss": 1.744, + "epoch": 0.42939847500706013, + "grad_norm": 3.670039295681777, + "learning_rate": 1.9336496090019307e-05, + "loss": 1.0967, "step": 3041 }, { - "epoch": 0.637869574334242, - "grad_norm": 6.902921978402993, - "learning_rate": 1.8302018481574756e-05, - "loss": 2.0559, + "epoch": 0.429539678057046, + "grad_norm": 4.163687334596823, + "learning_rate": 1.9335949941279267e-05, + "loss": 1.1813, "step": 3042 }, { - "epoch": 0.6380792618997694, - "grad_norm": 6.088228193402139, - "learning_rate": 1.8300756237154e-05, - "loss": 1.9644, + "epoch": 0.4296808811070319, + "grad_norm": 4.433612787254471, + "learning_rate": 1.9335403575575787e-05, + "loss": 1.3643, "step": 3043 }, { - "epoch": 0.6382889494652967, - "grad_norm": 6.510881861310974, - "learning_rate": 1.8299493567305088e-05, - "loss": 2.0613, + "epoch": 0.4298220841570178, + "grad_norm": 4.825446832411553, + "learning_rate": 1.9334856992921555e-05, + "loss": 1.1961, "step": 3044 }, { - "epoch": 0.6384986370308241, - "grad_norm": 5.9988705028592815, - "learning_rate": 1.8298230472092738e-05, - "loss": 1.8969, + "epoch": 0.42996328720700366, + "grad_norm": 3.7880358755130525, + "learning_rate": 1.9334310193329276e-05, + "loss": 0.9831, "step": 3045 }, { - "epoch": 0.6387083245963514, - "grad_norm": 6.041217302938949, - "learning_rate": 1.829696695158168e-05, - "loss": 1.7567, + "epoch": 0.43010449025698955, + "grad_norm": 3.86781106719311, + "learning_rate": 1.9333763176811663e-05, + "loss": 1.1971, "step": 3046 }, { - "epoch": 0.6389180121618788, - "grad_norm": 7.05934025622722, - "learning_rate": 1.8295703005836677e-05, - "loss": 1.9822, + "epoch": 0.43024569330697543, + "grad_norm": 3.801751242889334, + "learning_rate": 1.9333215943381425e-05, + "loss": 0.978, "step": 3047 }, { - "epoch": 0.6391276997274061, - "grad_norm": 6.24086378561901, - "learning_rate": 1.8294438634922507e-05, - "loss": 1.8842, + "epoch": 0.4303868963569613, + "grad_norm": 3.825655518979746, + "learning_rate": 1.933266849305128e-05, + "loss": 1.0388, "step": 3048 }, { - "epoch": 0.6393373872929335, - "grad_norm": 4.860105322229299, - "learning_rate": 1.829317383890397e-05, - "loss": 1.7116, + "epoch": 0.4305280994069472, + "grad_norm": 3.671328006227849, + "learning_rate": 1.9332120825833948e-05, + "loss": 0.9056, "step": 3049 }, { - "epoch": 0.6395470748584609, - "grad_norm": 5.947512241161165, - "learning_rate": 1.829190861784589e-05, - "loss": 1.863, + "epoch": 0.4306693024569331, + "grad_norm": 3.6053679729218024, + "learning_rate": 1.9331572941742157e-05, + "loss": 1.0565, "step": 3050 }, { - "epoch": 0.6397567624239883, - "grad_norm": 6.027982894227071, - "learning_rate": 1.8290642971813113e-05, - "loss": 2.1154, + "epoch": 0.43081050550691896, + "grad_norm": 3.747455967972404, + "learning_rate": 1.933102484078864e-05, + "loss": 1.0793, "step": 3051 }, { - "epoch": 0.6399664499895156, - "grad_norm": 6.550770136585487, - "learning_rate": 1.8289376900870506e-05, - "loss": 1.801, + "epoch": 0.43095170855690484, + "grad_norm": 3.172952655342485, + "learning_rate": 1.9330476522986136e-05, + "loss": 0.8129, "step": 3052 }, { - "epoch": 0.640176137555043, - "grad_norm": 6.877263008135769, - "learning_rate": 1.8288110405082956e-05, - "loss": 1.7381, + "epoch": 0.4310929116068907, + "grad_norm": 4.242937112399398, + "learning_rate": 1.932992798834739e-05, + "loss": 1.1198, "step": 3053 }, { - "epoch": 0.6403858251205703, - "grad_norm": 5.87457880262923, - "learning_rate": 1.828684348451537e-05, - "loss": 2.0457, + "epoch": 0.4312341146568766, + "grad_norm": 3.7048135181453277, + "learning_rate": 1.9329379236885145e-05, + "loss": 0.9356, "step": 3054 }, { - "epoch": 0.6405955126860977, - "grad_norm": 6.61273343295273, - "learning_rate": 1.828557613923268e-05, - "loss": 2.2301, + "epoch": 0.4313753177068625, + "grad_norm": 4.173652898442035, + "learning_rate": 1.9328830268612155e-05, + "loss": 1.0386, "step": 3055 }, { - "epoch": 0.6408052002516251, - "grad_norm": 6.761473456770218, - "learning_rate": 1.8284308369299845e-05, - "loss": 1.7939, + "epoch": 0.4315165207568483, + "grad_norm": 4.644994445099839, + "learning_rate": 1.932828108354118e-05, + "loss": 1.244, "step": 3056 }, { - "epoch": 0.6410148878171524, - "grad_norm": 5.568846549803721, - "learning_rate": 1.828304017478184e-05, - "loss": 1.9008, + "epoch": 0.4316577238068342, + "grad_norm": 3.4156460497909555, + "learning_rate": 1.932773168168498e-05, + "loss": 1.0108, "step": 3057 }, { - "epoch": 0.6412245753826799, - "grad_norm": 7.1327175395719555, - "learning_rate": 1.8281771555743655e-05, - "loss": 1.7472, + "epoch": 0.4317989268568201, + "grad_norm": 3.4952582420354483, + "learning_rate": 1.9327182063056325e-05, + "loss": 0.9767, "step": 3058 }, { - "epoch": 0.6414342629482072, - "grad_norm": 6.028220563181006, - "learning_rate": 1.828050251225032e-05, - "loss": 2.1141, + "epoch": 0.43194012990680597, + "grad_norm": 3.5005816237212275, + "learning_rate": 1.932663222766799e-05, + "loss": 0.8376, "step": 3059 }, { - "epoch": 0.6416439505137346, - "grad_norm": 7.268321161030788, - "learning_rate": 1.8279233044366864e-05, - "loss": 2.0093, + "epoch": 0.43208133295679185, + "grad_norm": 3.970449591273115, + "learning_rate": 1.9326082175532744e-05, + "loss": 1.1309, "step": 3060 }, { - "epoch": 0.6418536380792619, - "grad_norm": 6.604429447192501, - "learning_rate": 1.827796315215836e-05, - "loss": 2.2255, + "epoch": 0.43222253600677774, + "grad_norm": 3.9419487217418596, + "learning_rate": 1.9325531906663377e-05, + "loss": 1.2564, "step": 3061 }, { - "epoch": 0.6420633256447893, - "grad_norm": 6.111561761445589, - "learning_rate": 1.827669283568988e-05, - "loss": 1.9586, + "epoch": 0.4323637390567636, + "grad_norm": 4.119349468406248, + "learning_rate": 1.932498142107268e-05, + "loss": 1.2772, "step": 3062 }, { - "epoch": 0.6422730132103166, - "grad_norm": 6.961044945941901, - "learning_rate": 1.8275422095026544e-05, - "loss": 2.1318, + "epoch": 0.4325049421067495, + "grad_norm": 4.359452465240006, + "learning_rate": 1.9324430718773436e-05, + "loss": 1.0907, "step": 3063 }, { - "epoch": 0.642482700775844, - "grad_norm": 6.753699687823845, - "learning_rate": 1.827415093023347e-05, - "loss": 2.2353, + "epoch": 0.4326461451567354, + "grad_norm": 3.846599677665212, + "learning_rate": 1.9323879799778452e-05, + "loss": 0.9929, "step": 3064 }, { - "epoch": 0.6426923883413713, - "grad_norm": 6.806147524542361, - "learning_rate": 1.8272879341375808e-05, - "loss": 2.1543, + "epoch": 0.43278734820672127, + "grad_norm": 4.157127194315688, + "learning_rate": 1.9323328664100527e-05, + "loss": 1.1051, "step": 3065 }, { - "epoch": 0.6429020759068987, - "grad_norm": 5.336034839169556, - "learning_rate": 1.8271607328518732e-05, - "loss": 1.8104, + "epoch": 0.43292855125670715, + "grad_norm": 3.82641484847605, + "learning_rate": 1.9322777311752473e-05, + "loss": 1.1926, "step": 3066 }, { - "epoch": 0.643111763472426, - "grad_norm": 6.742453529827101, - "learning_rate": 1.8270334891727433e-05, - "loss": 1.9633, + "epoch": 0.43306975430669303, + "grad_norm": 3.6778994874121422, + "learning_rate": 1.93222257427471e-05, + "loss": 0.9989, "step": 3067 }, { - "epoch": 0.6433214510379535, - "grad_norm": 6.12031899943188, - "learning_rate": 1.826906203106713e-05, - "loss": 1.8327, + "epoch": 0.4332109573566789, + "grad_norm": 4.2429486436424035, + "learning_rate": 1.9321673957097226e-05, + "loss": 0.9301, "step": 3068 }, { - "epoch": 0.6435311386034808, - "grad_norm": 6.136631624615883, - "learning_rate": 1.8267788746603055e-05, - "loss": 1.734, + "epoch": 0.4333521604066648, + "grad_norm": 3.645462968627209, + "learning_rate": 1.9321121954815675e-05, + "loss": 0.9476, "step": 3069 }, { - "epoch": 0.6437408261690082, - "grad_norm": 7.398613128319719, - "learning_rate": 1.826651503840047e-05, - "loss": 2.2307, + "epoch": 0.4334933634566507, + "grad_norm": 3.989111425830674, + "learning_rate": 1.9320569735915273e-05, + "loss": 1.0978, "step": 3070 }, { - "epoch": 0.6439505137345355, - "grad_norm": 6.067959128754426, - "learning_rate": 1.8265240906524644e-05, - "loss": 1.9187, + "epoch": 0.43363456650663657, + "grad_norm": 4.784009524915928, + "learning_rate": 1.932001730040886e-05, + "loss": 1.1225, "step": 3071 }, { - "epoch": 0.6441602013000629, - "grad_norm": 5.438652440044395, - "learning_rate": 1.826396635104089e-05, - "loss": 2.0244, + "epoch": 0.43377576955662245, + "grad_norm": 4.604163182369936, + "learning_rate": 1.9319464648309265e-05, + "loss": 0.9683, "step": 3072 }, { - "epoch": 0.6443698888655903, - "grad_norm": 5.793018233781782, - "learning_rate": 1.826269137201453e-05, - "loss": 1.9017, + "epoch": 0.4339169726066083, + "grad_norm": 3.9060449651149365, + "learning_rate": 1.9318911779629337e-05, + "loss": 0.8944, "step": 3073 }, { - "epoch": 0.6445795764311176, - "grad_norm": 6.288640526570729, - "learning_rate": 1.82614159695109e-05, - "loss": 2.2585, + "epoch": 0.43405817565659416, + "grad_norm": 4.20274958217526, + "learning_rate": 1.9318358694381926e-05, + "loss": 1.0341, "step": 3074 }, { - "epoch": 0.644789263996645, - "grad_norm": 5.862238751583479, - "learning_rate": 1.826014014359538e-05, - "loss": 1.8447, + "epoch": 0.43419937870658004, + "grad_norm": 4.065063222993628, + "learning_rate": 1.9317805392579886e-05, + "loss": 1.1294, "step": 3075 }, { - "epoch": 0.6449989515621724, - "grad_norm": 5.642957180478201, - "learning_rate": 1.825886389433335e-05, - "loss": 1.861, + "epoch": 0.4343405817565659, + "grad_norm": 3.7964382943949677, + "learning_rate": 1.9317251874236066e-05, + "loss": 1.1115, "step": 3076 }, { - "epoch": 0.6452086391276998, - "grad_norm": 6.10056544438305, - "learning_rate": 1.8257587221790218e-05, - "loss": 1.928, + "epoch": 0.4344817848065518, + "grad_norm": 4.222133006828667, + "learning_rate": 1.931669813936334e-05, + "loss": 1.0976, "step": 3077 }, { - "epoch": 0.6454183266932271, - "grad_norm": 5.919663961991319, - "learning_rate": 1.825631012603142e-05, - "loss": 1.6966, + "epoch": 0.4346229878565377, + "grad_norm": 3.442718841787935, + "learning_rate": 1.931614418797457e-05, + "loss": 0.9777, "step": 3078 }, { - "epoch": 0.6456280142587545, - "grad_norm": 5.9090245248108, - "learning_rate": 1.8255032607122406e-05, - "loss": 1.8555, + "epoch": 0.4347641909065236, + "grad_norm": 4.399964640953782, + "learning_rate": 1.9315590020082637e-05, + "loss": 1.0787, "step": 3079 }, { - "epoch": 0.6458377018242818, - "grad_norm": 6.233306103273052, - "learning_rate": 1.8253754665128656e-05, - "loss": 1.6421, + "epoch": 0.43490539395650946, + "grad_norm": 3.4758442010782633, + "learning_rate": 1.9315035635700412e-05, + "loss": 0.9132, "step": 3080 }, { - "epoch": 0.6460473893898092, - "grad_norm": 5.937586057974155, - "learning_rate": 1.825247630011566e-05, - "loss": 2.4088, + "epoch": 0.43504659700649534, + "grad_norm": 4.157572424347774, + "learning_rate": 1.9314481034840783e-05, + "loss": 1.1864, "step": 3081 }, { - "epoch": 0.6462570769553365, - "grad_norm": 5.363858077334569, - "learning_rate": 1.825119751214895e-05, - "loss": 2.2506, + "epoch": 0.4351878000564812, + "grad_norm": 3.458396471551333, + "learning_rate": 1.9313926217516637e-05, + "loss": 0.8976, "step": 3082 }, { - "epoch": 0.6464667645208639, - "grad_norm": 5.544056082084755, - "learning_rate": 1.8249918301294046e-05, - "loss": 1.6543, + "epoch": 0.4353290031064671, + "grad_norm": 3.5817117213904512, + "learning_rate": 1.9313371183740868e-05, + "loss": 0.8821, "step": 3083 }, { - "epoch": 0.6466764520863912, - "grad_norm": 5.871512424746385, - "learning_rate": 1.8248638667616526e-05, - "loss": 2.0845, + "epoch": 0.435470206156453, + "grad_norm": 3.805290334510746, + "learning_rate": 1.9312815933526375e-05, + "loss": 0.998, "step": 3084 }, { - "epoch": 0.6468861396519187, - "grad_norm": 6.684895913363462, - "learning_rate": 1.8247358611181965e-05, - "loss": 2.2184, + "epoch": 0.43561140920643887, + "grad_norm": 3.896199253742902, + "learning_rate": 1.931226046688606e-05, + "loss": 1.0963, "step": 3085 }, { - "epoch": 0.647095827217446, - "grad_norm": 5.45504528562288, - "learning_rate": 1.8246078132055975e-05, - "loss": 2.1095, + "epoch": 0.43575261225642475, + "grad_norm": 4.434914638021939, + "learning_rate": 1.9311704783832835e-05, + "loss": 1.3411, "step": 3086 }, { - "epoch": 0.6473055147829734, - "grad_norm": 8.272426907997717, - "learning_rate": 1.8244797230304176e-05, - "loss": 1.7937, + "epoch": 0.43589381530641064, + "grad_norm": 3.79571125628213, + "learning_rate": 1.9311148884379616e-05, + "loss": 1.0854, "step": 3087 }, { - "epoch": 0.6475152023485007, - "grad_norm": 5.8539053275208035, - "learning_rate": 1.8243515905992217e-05, - "loss": 2.13, + "epoch": 0.4360350183563965, + "grad_norm": 3.682735872139414, + "learning_rate": 1.9310592768539315e-05, + "loss": 0.9987, "step": 3088 }, { - "epoch": 0.6477248899140281, - "grad_norm": 7.058377458417762, - "learning_rate": 1.8242234159185774e-05, - "loss": 1.6912, + "epoch": 0.4361762214063824, + "grad_norm": 4.081997895492289, + "learning_rate": 1.9310036436324857e-05, + "loss": 0.9855, "step": 3089 }, { - "epoch": 0.6479345774795554, - "grad_norm": 6.326104161111226, - "learning_rate": 1.8240951989950532e-05, - "loss": 1.988, + "epoch": 0.43631742445636823, + "grad_norm": 3.6621109158471827, + "learning_rate": 1.9309479887749175e-05, + "loss": 0.9773, "step": 3090 }, { - "epoch": 0.6481442650450828, - "grad_norm": 5.299640883892419, - "learning_rate": 1.823966939835221e-05, - "loss": 1.6287, + "epoch": 0.4364586275063541, + "grad_norm": 3.887623603044863, + "learning_rate": 1.93089231228252e-05, + "loss": 1.1032, "step": 3091 }, { - "epoch": 0.6483539526106102, - "grad_norm": 6.780734969667526, - "learning_rate": 1.823838638445654e-05, - "loss": 1.8447, + "epoch": 0.43659983055634, + "grad_norm": 3.3838150639669, + "learning_rate": 1.930836614156587e-05, + "loss": 0.8962, "step": 3092 }, { - "epoch": 0.6485636401761375, - "grad_norm": 6.071306870888296, - "learning_rate": 1.8237102948329276e-05, - "loss": 2.0756, + "epoch": 0.4367410336063259, + "grad_norm": 3.2860702019130885, + "learning_rate": 1.9307808943984132e-05, + "loss": 0.8299, "step": 3093 }, { - "epoch": 0.648773327741665, - "grad_norm": 6.553205057201114, - "learning_rate": 1.82358190900362e-05, - "loss": 1.8641, + "epoch": 0.43688223665631176, + "grad_norm": 3.190590588823567, + "learning_rate": 1.9307251530092937e-05, + "loss": 0.8649, "step": 3094 }, { - "epoch": 0.6489830153071923, - "grad_norm": 6.238687972865546, - "learning_rate": 1.8234534809643113e-05, - "loss": 1.7182, + "epoch": 0.43702343970629765, + "grad_norm": 4.188406345952829, + "learning_rate": 1.9306693899905232e-05, + "loss": 0.9387, "step": 3095 }, { - "epoch": 0.6491927028727197, - "grad_norm": 5.996039594722565, - "learning_rate": 1.8233250107215835e-05, - "loss": 1.8194, + "epoch": 0.43716464275628353, + "grad_norm": 3.0633340570693552, + "learning_rate": 1.930613605343398e-05, + "loss": 0.8007, "step": 3096 }, { - "epoch": 0.649402390438247, - "grad_norm": 5.756966442994074, - "learning_rate": 1.823196498282021e-05, - "loss": 1.6929, + "epoch": 0.4373058458062694, + "grad_norm": 4.238478260569049, + "learning_rate": 1.9305577990692148e-05, + "loss": 1.2214, "step": 3097 }, { - "epoch": 0.6496120780037744, - "grad_norm": 7.032618136284973, - "learning_rate": 1.82306794365221e-05, - "loss": 2.0718, + "epoch": 0.4374470488562553, + "grad_norm": 3.6520101148347135, + "learning_rate": 1.93050197116927e-05, + "loss": 1.0285, "step": 3098 }, { - "epoch": 0.6498217655693017, - "grad_norm": 6.879859607243169, - "learning_rate": 1.822939346838739e-05, - "loss": 1.7797, + "epoch": 0.4375882519062412, + "grad_norm": 3.431828767800515, + "learning_rate": 1.9304461216448612e-05, + "loss": 0.9831, "step": 3099 }, { - "epoch": 0.6500314531348291, - "grad_norm": 7.483046624586785, - "learning_rate": 1.8228107078482e-05, - "loss": 2.0442, + "epoch": 0.43772945495622706, + "grad_norm": 3.4110434426749006, + "learning_rate": 1.9303902504972866e-05, + "loss": 0.8503, "step": 3100 }, { - "epoch": 0.6502411407003564, - "grad_norm": 6.199752119143285, - "learning_rate": 1.8226820266871846e-05, - "loss": 1.9179, + "epoch": 0.43787065800621294, + "grad_norm": 3.9110158690603223, + "learning_rate": 1.9303343577278442e-05, + "loss": 1.0785, "step": 3101 }, { - "epoch": 0.6504508282658839, - "grad_norm": 6.330111137169876, - "learning_rate": 1.8225533033622885e-05, - "loss": 2.1492, + "epoch": 0.4380118610561988, + "grad_norm": 4.232109840186461, + "learning_rate": 1.9302784433378333e-05, + "loss": 1.2743, "step": 3102 }, { - "epoch": 0.6506605158314112, - "grad_norm": 6.355188291172677, - "learning_rate": 1.822424537880109e-05, - "loss": 2.0496, + "epoch": 0.4381530641061847, + "grad_norm": 3.986420497115517, + "learning_rate": 1.930222507328553e-05, + "loss": 0.9571, "step": 3103 }, { - "epoch": 0.6508702033969386, - "grad_norm": 6.083462761673524, - "learning_rate": 1.8222957302472454e-05, - "loss": 1.7815, + "epoch": 0.4382942671561706, + "grad_norm": 3.7563424477687417, + "learning_rate": 1.9301665497013034e-05, + "loss": 1.057, "step": 3104 }, { - "epoch": 0.6510798909624659, - "grad_norm": 6.823633269025465, - "learning_rate": 1.8221668804702995e-05, - "loss": 1.8783, + "epoch": 0.4384354702061565, + "grad_norm": 5.4073795662568545, + "learning_rate": 1.930110570457385e-05, + "loss": 1.052, "step": 3105 }, { - "epoch": 0.6512895785279933, - "grad_norm": 7.418728386520889, - "learning_rate": 1.822037988555875e-05, - "loss": 1.7743, + "epoch": 0.43857667325614236, + "grad_norm": 4.2123607959689835, + "learning_rate": 1.9300545695980985e-05, + "loss": 1.1207, "step": 3106 }, { - "epoch": 0.6514992660935206, - "grad_norm": 6.2550496740962105, - "learning_rate": 1.821909054510578e-05, - "loss": 2.0167, + "epoch": 0.4387178763061282, + "grad_norm": 5.140158271535641, + "learning_rate": 1.929998547124745e-05, + "loss": 1.4517, "step": 3107 }, { - "epoch": 0.651708953659048, - "grad_norm": 6.050550669136302, - "learning_rate": 1.821780078341016e-05, - "loss": 1.8372, + "epoch": 0.43885907935611407, + "grad_norm": 4.623403812100402, + "learning_rate": 1.929942503038628e-05, + "loss": 1.2624, "step": 3108 }, { - "epoch": 0.6519186412245754, - "grad_norm": 5.559842794261759, - "learning_rate": 1.8216510600538006e-05, - "loss": 1.8968, + "epoch": 0.43900028240609995, + "grad_norm": 4.211892215808612, + "learning_rate": 1.9298864373410477e-05, + "loss": 1.1706, "step": 3109 }, { - "epoch": 0.6521283287901027, - "grad_norm": 6.758661495195963, - "learning_rate": 1.821521999655542e-05, - "loss": 1.9693, + "epoch": 0.43914148545608583, + "grad_norm": 3.717323900919158, + "learning_rate": 1.9298303500333088e-05, + "loss": 0.9268, "step": 3110 }, { - "epoch": 0.6523380163556302, - "grad_norm": 6.1762410322381225, - "learning_rate": 1.821392897152857e-05, - "loss": 1.9382, + "epoch": 0.4392826885060717, + "grad_norm": 3.7191573374826246, + "learning_rate": 1.929774241116714e-05, + "loss": 0.9278, "step": 3111 }, { - "epoch": 0.6525477039211575, - "grad_norm": 5.5700317873183645, - "learning_rate": 1.821263752552361e-05, - "loss": 1.9557, + "epoch": 0.4394238915560576, + "grad_norm": 3.6876162709137055, + "learning_rate": 1.9297181105925675e-05, + "loss": 1.029, "step": 3112 }, { - "epoch": 0.6527573914866849, - "grad_norm": 6.422690278154433, - "learning_rate": 1.8211345658606737e-05, - "loss": 2.1507, + "epoch": 0.4395650946060435, + "grad_norm": 3.9312474403043147, + "learning_rate": 1.9296619584621737e-05, + "loss": 1.2042, "step": 3113 }, { - "epoch": 0.6529670790522122, - "grad_norm": 5.913869206930769, - "learning_rate": 1.8210053370844152e-05, - "loss": 2.03, + "epoch": 0.43970629765602937, + "grad_norm": 3.852238466815394, + "learning_rate": 1.929605784726837e-05, + "loss": 1.0399, "step": 3114 }, { - "epoch": 0.6531767666177396, - "grad_norm": 8.213758666560123, - "learning_rate": 1.8208760662302097e-05, - "loss": 1.5969, + "epoch": 0.43984750070601525, + "grad_norm": 3.4566880499485877, + "learning_rate": 1.9295495893878638e-05, + "loss": 0.9795, "step": 3115 }, { - "epoch": 0.6533864541832669, - "grad_norm": 5.581285935631441, - "learning_rate": 1.8207467533046817e-05, - "loss": 2.1011, + "epoch": 0.43998870375600113, + "grad_norm": 4.113201618084296, + "learning_rate": 1.9294933724465593e-05, + "loss": 1.3562, "step": 3116 }, { - "epoch": 0.6535961417487943, - "grad_norm": 6.218754340953845, - "learning_rate": 1.820617398314459e-05, - "loss": 2.1834, + "epoch": 0.440129906805987, + "grad_norm": 3.8468804061149426, + "learning_rate": 1.9294371339042305e-05, + "loss": 1.1798, "step": 3117 }, { - "epoch": 0.6538058293143216, - "grad_norm": 5.9414474657047425, - "learning_rate": 1.8204880012661717e-05, - "loss": 1.9557, + "epoch": 0.4402711098559729, + "grad_norm": 4.35408255915857, + "learning_rate": 1.9293808737621837e-05, + "loss": 1.0999, "step": 3118 }, { - "epoch": 0.654015516879849, - "grad_norm": 5.312276878650203, - "learning_rate": 1.820358562166451e-05, - "loss": 2.1391, + "epoch": 0.4404123129059588, + "grad_norm": 4.129514889494459, + "learning_rate": 1.929324592021727e-05, + "loss": 1.0395, "step": 3119 }, { - "epoch": 0.6542252044453764, - "grad_norm": 5.73033774277521, - "learning_rate": 1.8202290810219313e-05, - "loss": 1.9547, + "epoch": 0.44055351595594466, + "grad_norm": 3.8047420571519686, + "learning_rate": 1.9292682886841683e-05, + "loss": 1.0139, "step": 3120 }, { - "epoch": 0.6544348920109038, - "grad_norm": 5.908047660799942, - "learning_rate": 1.8200995578392484e-05, - "loss": 2.2818, + "epoch": 0.44069471900593055, + "grad_norm": 3.9663320981329457, + "learning_rate": 1.9292119637508157e-05, + "loss": 0.9917, "step": 3121 }, { - "epoch": 0.6546445795764311, - "grad_norm": 5.761058241598259, - "learning_rate": 1.8199699926250408e-05, - "loss": 1.9812, + "epoch": 0.44083592205591643, + "grad_norm": 3.586973470481621, + "learning_rate": 1.9291556172229784e-05, + "loss": 0.9907, "step": 3122 }, { - "epoch": 0.6548542671419585, - "grad_norm": 5.0810986758352845, - "learning_rate": 1.819840385385949e-05, - "loss": 1.9485, + "epoch": 0.4409771251059023, + "grad_norm": 4.442665727267057, + "learning_rate": 1.9290992491019657e-05, + "loss": 1.0727, "step": 3123 }, { - "epoch": 0.6550639547074858, - "grad_norm": 6.67215763714508, - "learning_rate": 1.8197107361286157e-05, - "loss": 1.9369, + "epoch": 0.44111832815588814, + "grad_norm": 4.094689875073246, + "learning_rate": 1.929042859389088e-05, + "loss": 1.0971, "step": 3124 }, { - "epoch": 0.6552736422730132, - "grad_norm": 6.30665138181098, - "learning_rate": 1.8195810448596852e-05, - "loss": 2.0806, + "epoch": 0.441259531205874, + "grad_norm": 3.6442186111302775, + "learning_rate": 1.928986448085655e-05, + "loss": 1.0681, "step": 3125 }, { - "epoch": 0.6554833298385406, - "grad_norm": 6.313203246063494, - "learning_rate": 1.8194513115858046e-05, - "loss": 2.2184, + "epoch": 0.4414007342558599, + "grad_norm": 3.4861332755296544, + "learning_rate": 1.9289300151929784e-05, + "loss": 0.9432, "step": 3126 }, { - "epoch": 0.6556930174040679, - "grad_norm": 5.971525330938641, - "learning_rate": 1.8193215363136233e-05, - "loss": 2.0386, + "epoch": 0.4415419373058458, + "grad_norm": 4.248028679586546, + "learning_rate": 1.9288735607123695e-05, + "loss": 0.9943, "step": 3127 }, { - "epoch": 0.6559027049695954, - "grad_norm": 6.704912562677806, - "learning_rate": 1.8191917190497917e-05, - "loss": 1.9626, + "epoch": 0.44168314035583167, + "grad_norm": 3.705672576117425, + "learning_rate": 1.9288170846451402e-05, + "loss": 0.9565, "step": 3128 }, { - "epoch": 0.6561123925351227, - "grad_norm": 7.055745721364275, - "learning_rate": 1.8190618598009642e-05, - "loss": 2.0895, + "epoch": 0.44182434340581755, + "grad_norm": 4.355428299227021, + "learning_rate": 1.928760586992603e-05, + "loss": 1.1321, "step": 3129 }, { - "epoch": 0.6563220801006501, - "grad_norm": 5.705712211192097, - "learning_rate": 1.8189319585737958e-05, - "loss": 1.6538, + "epoch": 0.44196554645580344, + "grad_norm": 3.9657743822931932, + "learning_rate": 1.928704067756071e-05, + "loss": 1.1105, "step": 3130 }, { - "epoch": 0.6565317676661774, - "grad_norm": 6.206017737800097, - "learning_rate": 1.818802015374944e-05, - "loss": 2.0474, + "epoch": 0.4421067495057893, + "grad_norm": 3.894343404317645, + "learning_rate": 1.9286475269368574e-05, + "loss": 1.0944, "step": 3131 }, { - "epoch": 0.6567414552317048, - "grad_norm": 5.416482164873523, - "learning_rate": 1.8186720302110687e-05, - "loss": 1.6146, + "epoch": 0.4422479525557752, + "grad_norm": 3.9497216808509483, + "learning_rate": 1.928590964536276e-05, + "loss": 1.0607, "step": 3132 }, { - "epoch": 0.6569511427972321, - "grad_norm": 5.663122953888279, - "learning_rate": 1.8185420030888322e-05, - "loss": 1.9269, + "epoch": 0.4423891556057611, + "grad_norm": 3.454483643893276, + "learning_rate": 1.9285343805556418e-05, + "loss": 0.8495, "step": 3133 }, { - "epoch": 0.6571608303627595, - "grad_norm": 6.132219005526236, - "learning_rate": 1.8184119340148983e-05, - "loss": 1.8909, + "epoch": 0.44253035865574697, + "grad_norm": 3.8400205721348146, + "learning_rate": 1.9284777749962696e-05, + "loss": 1.1792, "step": 3134 }, { - "epoch": 0.6573705179282868, - "grad_norm": 5.323081498737843, - "learning_rate": 1.8182818229959332e-05, - "loss": 1.6997, + "epoch": 0.44267156170573285, + "grad_norm": 3.8687367168419704, + "learning_rate": 1.928421147859475e-05, + "loss": 1.1146, "step": 3135 }, { - "epoch": 0.6575802054938142, - "grad_norm": 6.3772133343287765, - "learning_rate": 1.818151670038605e-05, - "loss": 2.0824, + "epoch": 0.44281276475571874, + "grad_norm": 4.504090587302897, + "learning_rate": 1.928364499146574e-05, + "loss": 1.0815, "step": 3136 }, { - "epoch": 0.6577898930593415, - "grad_norm": 6.099044204103873, - "learning_rate": 1.818021475149585e-05, - "loss": 1.9255, + "epoch": 0.4429539678057046, + "grad_norm": 3.9472332764295297, + "learning_rate": 1.9283078288588826e-05, + "loss": 0.8455, "step": 3137 }, { - "epoch": 0.657999580624869, - "grad_norm": 6.425465340660839, - "learning_rate": 1.817891238335546e-05, - "loss": 1.8174, + "epoch": 0.4430951708556905, + "grad_norm": 4.642613878242063, + "learning_rate": 1.9282511369977185e-05, + "loss": 1.2216, "step": 3138 }, { - "epoch": 0.6582092681903963, - "grad_norm": 6.248321496759999, - "learning_rate": 1.817760959603162e-05, - "loss": 2.1289, + "epoch": 0.4432363739056764, + "grad_norm": 3.710934033699281, + "learning_rate": 1.9281944235643986e-05, + "loss": 0.92, "step": 3139 }, { - "epoch": 0.6584189557559237, - "grad_norm": 6.09749499931928, - "learning_rate": 1.817630638959111e-05, - "loss": 1.7929, + "epoch": 0.44337757695566227, + "grad_norm": 3.5441624949324897, + "learning_rate": 1.9281376885602412e-05, + "loss": 0.9521, "step": 3140 }, { - "epoch": 0.658628643321451, - "grad_norm": 6.431517971119232, - "learning_rate": 1.8175002764100714e-05, - "loss": 2.0492, + "epoch": 0.4435187800056481, + "grad_norm": 4.0213182031788905, + "learning_rate": 1.928080931986565e-05, + "loss": 1.1498, "step": 3141 }, { - "epoch": 0.6588383308869784, - "grad_norm": 5.302483425033261, - "learning_rate": 1.8173698719627245e-05, - "loss": 1.7843, + "epoch": 0.443659983055634, + "grad_norm": 3.5433944095505976, + "learning_rate": 1.9280241538446885e-05, + "loss": 1.0138, "step": 3142 }, { - "epoch": 0.6590480184525057, - "grad_norm": 6.6870306880329515, - "learning_rate": 1.8172394256237542e-05, - "loss": 1.8519, + "epoch": 0.44380118610561986, + "grad_norm": 3.738954619398003, + "learning_rate": 1.9279673541359313e-05, + "loss": 0.9401, "step": 3143 }, { - "epoch": 0.6592577060180331, - "grad_norm": 5.73647608556456, - "learning_rate": 1.817108937399846e-05, - "loss": 1.5772, + "epoch": 0.44394238915560574, + "grad_norm": 4.276428807787635, + "learning_rate": 1.927910532861614e-05, + "loss": 1.194, "step": 3144 }, { - "epoch": 0.6594673935835605, - "grad_norm": 6.183413960499327, - "learning_rate": 1.8169784072976875e-05, - "loss": 1.9048, + "epoch": 0.4440835922055916, + "grad_norm": 4.546884445078243, + "learning_rate": 1.9278536900230564e-05, + "loss": 1.0508, "step": 3145 }, { - "epoch": 0.6596770811490879, - "grad_norm": 6.016127980556942, - "learning_rate": 1.8168478353239685e-05, - "loss": 1.699, + "epoch": 0.4442247952555775, + "grad_norm": 3.7622834397976104, + "learning_rate": 1.9277968256215794e-05, + "loss": 1.0597, "step": 3146 }, { - "epoch": 0.6598867687146153, - "grad_norm": 5.651412614976321, - "learning_rate": 1.816717221485381e-05, - "loss": 1.9678, + "epoch": 0.4443659983055634, + "grad_norm": 4.1101496165031595, + "learning_rate": 1.9277399396585054e-05, + "loss": 0.9759, "step": 3147 }, { - "epoch": 0.6600964562801426, - "grad_norm": 5.941819015814283, - "learning_rate": 1.81658656578862e-05, - "loss": 1.769, + "epoch": 0.4445072013555493, + "grad_norm": 3.896317784091958, + "learning_rate": 1.9276830321351558e-05, + "loss": 1.2466, "step": 3148 }, { - "epoch": 0.66030614384567, - "grad_norm": 7.040237613396364, - "learning_rate": 1.8164558682403807e-05, - "loss": 1.7248, + "epoch": 0.44464840440553516, + "grad_norm": 3.8148628675246576, + "learning_rate": 1.927626103052853e-05, + "loss": 1.2526, "step": 3149 }, { - "epoch": 0.6605158314111973, - "grad_norm": 7.07162331659186, - "learning_rate": 1.8163251288473623e-05, - "loss": 1.9441, + "epoch": 0.44478960745552104, + "grad_norm": 4.048905476139718, + "learning_rate": 1.9275691524129203e-05, + "loss": 1.0715, "step": 3150 }, { - "epoch": 0.6607255189767247, - "grad_norm": 6.6492587134559065, - "learning_rate": 1.8161943476162652e-05, - "loss": 2.1139, + "epoch": 0.4449308105055069, + "grad_norm": 3.4410239215511442, + "learning_rate": 1.927512180216681e-05, + "loss": 1.0097, "step": 3151 }, { - "epoch": 0.660935206542252, - "grad_norm": 6.119506534146399, - "learning_rate": 1.816063524553792e-05, - "loss": 1.8542, + "epoch": 0.4450720135554928, + "grad_norm": 3.4607608419963447, + "learning_rate": 1.9274551864654593e-05, + "loss": 1.0933, "step": 3152 }, { - "epoch": 0.6611448941077794, - "grad_norm": 5.76246901298754, - "learning_rate": 1.815932659666648e-05, - "loss": 1.6826, + "epoch": 0.4452132166054787, + "grad_norm": 4.46369376071989, + "learning_rate": 1.9273981711605793e-05, + "loss": 1.1699, "step": 3153 }, { - "epoch": 0.6613545816733067, - "grad_norm": 5.955827511190346, - "learning_rate": 1.8158017529615398e-05, - "loss": 2.3246, + "epoch": 0.4453544196554646, + "grad_norm": 4.210818690772342, + "learning_rate": 1.9273411343033667e-05, + "loss": 1.2356, "step": 3154 }, { - "epoch": 0.6615642692388342, - "grad_norm": 6.276874995340356, - "learning_rate": 1.815670804445177e-05, - "loss": 1.9315, + "epoch": 0.44549562270545046, + "grad_norm": 4.389419686466223, + "learning_rate": 1.9272840758951464e-05, + "loss": 0.9513, "step": 3155 }, { - "epoch": 0.6617739568043615, - "grad_norm": 6.094621171179124, - "learning_rate": 1.8155398141242707e-05, - "loss": 2.1074, + "epoch": 0.44563682575543634, + "grad_norm": 3.891153503625258, + "learning_rate": 1.9272269959372444e-05, + "loss": 1.043, "step": 3156 }, { - "epoch": 0.6619836443698889, - "grad_norm": 5.979021126257763, - "learning_rate": 1.8154087820055345e-05, - "loss": 1.9156, + "epoch": 0.4457780288054222, + "grad_norm": 4.208935098734498, + "learning_rate": 1.927169894430988e-05, + "loss": 1.1196, "step": 3157 }, { - "epoch": 0.6621933319354162, - "grad_norm": 6.205656242935968, - "learning_rate": 1.815277708095684e-05, - "loss": 1.8972, + "epoch": 0.44591923185540805, + "grad_norm": 3.620125914607161, + "learning_rate": 1.9271127713777033e-05, + "loss": 1.0904, "step": 3158 }, { - "epoch": 0.6624030195009436, - "grad_norm": 6.610765105239977, - "learning_rate": 1.815146592401437e-05, - "loss": 1.9385, + "epoch": 0.44606043490539393, + "grad_norm": 5.048032495465287, + "learning_rate": 1.9270556267787184e-05, + "loss": 1.5335, "step": 3159 }, { - "epoch": 0.6626127070664709, - "grad_norm": 6.963971902854117, - "learning_rate": 1.815015434929513e-05, - "loss": 2.191, + "epoch": 0.4462016379553798, + "grad_norm": 4.419334627418588, + "learning_rate": 1.926998460635361e-05, + "loss": 1.0454, "step": 3160 }, { - "epoch": 0.6628223946319983, - "grad_norm": 6.085067625033411, - "learning_rate": 1.8148842356866344e-05, - "loss": 2.1619, + "epoch": 0.4463428410053657, + "grad_norm": 3.872187238600624, + "learning_rate": 1.9269412729489597e-05, + "loss": 1.1314, "step": 3161 }, { - "epoch": 0.6630320821975257, - "grad_norm": 6.375680670326424, - "learning_rate": 1.8147529946795256e-05, - "loss": 2.0022, + "epoch": 0.4464840440553516, + "grad_norm": 3.336098507424238, + "learning_rate": 1.9268840637208436e-05, + "loss": 1.0939, "step": 3162 }, { - "epoch": 0.663241769763053, - "grad_norm": 5.889061394899102, - "learning_rate": 1.814621711914913e-05, - "loss": 1.8135, + "epoch": 0.44662524710533746, + "grad_norm": 4.1961448539736885, + "learning_rate": 1.9268268329523422e-05, + "loss": 1.0461, "step": 3163 }, { - "epoch": 0.6634514573285805, - "grad_norm": 6.977891226784837, - "learning_rate": 1.8144903873995244e-05, - "loss": 1.9745, + "epoch": 0.44676645015532335, + "grad_norm": 3.999357295496932, + "learning_rate": 1.926769580644785e-05, + "loss": 1.1098, "step": 3164 }, { - "epoch": 0.6636611448941078, - "grad_norm": 5.614293695853713, - "learning_rate": 1.8143590211400907e-05, - "loss": 2.1332, + "epoch": 0.44690765320530923, + "grad_norm": 3.336350848220422, + "learning_rate": 1.9267123067995035e-05, + "loss": 0.9214, "step": 3165 }, { - "epoch": 0.6638708324596352, - "grad_norm": 6.494376532493229, - "learning_rate": 1.814227613143345e-05, - "loss": 1.9431, + "epoch": 0.4470488562552951, + "grad_norm": 3.812111391485075, + "learning_rate": 1.926655011417828e-05, + "loss": 1.1797, "step": 3166 }, { - "epoch": 0.6640805200251625, - "grad_norm": 5.978828161545406, - "learning_rate": 1.8140961634160216e-05, - "loss": 1.7568, + "epoch": 0.447190059305281, + "grad_norm": 4.202307440587516, + "learning_rate": 1.92659769450109e-05, + "loss": 1.0052, "step": 3167 }, { - "epoch": 0.6642902075906899, - "grad_norm": 6.363451461464999, - "learning_rate": 1.813964671964858e-05, - "loss": 1.9357, + "epoch": 0.4473312623552669, + "grad_norm": 3.878800013574339, + "learning_rate": 1.9265403560506223e-05, + "loss": 0.9326, "step": 3168 }, { - "epoch": 0.6644998951562172, - "grad_norm": 6.428008802473638, - "learning_rate": 1.8138331387965932e-05, - "loss": 1.7691, + "epoch": 0.44747246540525276, + "grad_norm": 3.83858370828189, + "learning_rate": 1.9264829960677564e-05, + "loss": 1.0843, "step": 3169 }, { - "epoch": 0.6647095827217446, - "grad_norm": 7.025479013111277, - "learning_rate": 1.813701563917969e-05, - "loss": 2.3041, + "epoch": 0.44761366845523864, + "grad_norm": 4.029127778150835, + "learning_rate": 1.9264256145538262e-05, + "loss": 1.1457, "step": 3170 }, { - "epoch": 0.6649192702872719, - "grad_norm": 5.905271174198737, - "learning_rate": 1.8135699473357275e-05, - "loss": 1.9377, + "epoch": 0.4477548715052245, + "grad_norm": 4.195250814615195, + "learning_rate": 1.9263682115101644e-05, + "loss": 1.0376, "step": 3171 }, { - "epoch": 0.6651289578527994, - "grad_norm": 5.912755324416049, - "learning_rate": 1.8134382890566156e-05, - "loss": 1.9143, + "epoch": 0.4478960745552104, + "grad_norm": 3.555874022346718, + "learning_rate": 1.926310786938106e-05, + "loss": 1.0378, "step": 3172 }, { - "epoch": 0.6653386454183267, - "grad_norm": 6.108354556888889, - "learning_rate": 1.8133065890873806e-05, - "loss": 1.7986, + "epoch": 0.4480372776051963, + "grad_norm": 4.780502820428922, + "learning_rate": 1.9262533408389842e-05, + "loss": 1.1452, "step": 3173 }, { - "epoch": 0.6655483329838541, - "grad_norm": 6.954863788148386, - "learning_rate": 1.813174847434772e-05, - "loss": 1.999, + "epoch": 0.4481784806551822, + "grad_norm": 4.249107807085745, + "learning_rate": 1.9261958732141352e-05, + "loss": 1.0874, "step": 3174 }, { - "epoch": 0.6657580205493814, - "grad_norm": 7.120107211779747, - "learning_rate": 1.8130430641055423e-05, - "loss": 2.0092, + "epoch": 0.448319683705168, + "grad_norm": 3.966767663947841, + "learning_rate": 1.9261383840648943e-05, + "loss": 1.0838, "step": 3175 }, { - "epoch": 0.6659677081149088, - "grad_norm": 6.256929362352591, - "learning_rate": 1.8129112391064454e-05, - "loss": 1.9962, + "epoch": 0.4484608867551539, + "grad_norm": 3.273933104918183, + "learning_rate": 1.926080873392597e-05, + "loss": 0.9047, "step": 3176 }, { - "epoch": 0.6661773956804361, - "grad_norm": 5.832168941411809, - "learning_rate": 1.8127793724442374e-05, - "loss": 1.7261, + "epoch": 0.44860208980513977, + "grad_norm": 4.252825690251586, + "learning_rate": 1.92602334119858e-05, + "loss": 1.3435, "step": 3177 }, { - "epoch": 0.6663870832459635, - "grad_norm": 6.043609628309598, - "learning_rate": 1.812647464125677e-05, - "loss": 1.8731, + "epoch": 0.44874329285512565, + "grad_norm": 4.014431225672729, + "learning_rate": 1.925965787484181e-05, + "loss": 0.9438, "step": 3178 }, { - "epoch": 0.6665967708114908, - "grad_norm": 6.106789644038843, - "learning_rate": 1.8125155141575246e-05, - "loss": 2.1362, + "epoch": 0.44888449590511154, + "grad_norm": 4.702349910312314, + "learning_rate": 1.9259082122507365e-05, + "loss": 1.1884, "step": 3179 }, { - "epoch": 0.6668064583770182, - "grad_norm": 5.632182617457892, - "learning_rate": 1.8123835225465432e-05, - "loss": 1.7333, + "epoch": 0.4490256989550974, + "grad_norm": 3.6141768518181725, + "learning_rate": 1.9258506154995854e-05, + "loss": 0.8488, "step": 3180 }, { - "epoch": 0.6670161459425457, - "grad_norm": 5.444439523677967, - "learning_rate": 1.812251489299497e-05, - "loss": 1.6666, + "epoch": 0.4491669020050833, + "grad_norm": 4.015523625208543, + "learning_rate": 1.9257929972320653e-05, + "loss": 1.0322, "step": 3181 }, { - "epoch": 0.667225833508073, - "grad_norm": 6.804852434386522, - "learning_rate": 1.812119414423153e-05, - "loss": 1.627, + "epoch": 0.4493081050550692, + "grad_norm": 3.4938026643625673, + "learning_rate": 1.9257353574495164e-05, + "loss": 1.0651, "step": 3182 }, { - "epoch": 0.6674355210736004, - "grad_norm": 5.893428175041616, - "learning_rate": 1.811987297924281e-05, - "loss": 2.4804, + "epoch": 0.44944930810505507, + "grad_norm": 3.4102269969836176, + "learning_rate": 1.9256776961532773e-05, + "loss": 0.8418, "step": 3183 }, { - "epoch": 0.6676452086391277, - "grad_norm": 5.722632087161769, - "learning_rate": 1.811855139809651e-05, - "loss": 1.7029, + "epoch": 0.44959051115504095, + "grad_norm": 5.2259054057796686, + "learning_rate": 1.925620013344688e-05, + "loss": 1.4204, "step": 3184 }, { - "epoch": 0.6678548962046551, - "grad_norm": 5.863186777028515, - "learning_rate": 1.811722940086037e-05, - "loss": 2.3825, + "epoch": 0.44973171420502683, + "grad_norm": 4.621444602251797, + "learning_rate": 1.92556230902509e-05, + "loss": 1.1346, "step": 3185 }, { - "epoch": 0.6680645837701824, - "grad_norm": 6.259101407820408, - "learning_rate": 1.811590698760215e-05, - "loss": 2.1024, + "epoch": 0.4498729172550127, + "grad_norm": 4.677880473354801, + "learning_rate": 1.925504583195823e-05, + "loss": 1.2539, "step": 3186 }, { - "epoch": 0.6682742713357098, - "grad_norm": 5.515913406165411, - "learning_rate": 1.8114584158389615e-05, - "loss": 1.8011, + "epoch": 0.4500141203049986, + "grad_norm": 4.905825545203969, + "learning_rate": 1.9254468358582293e-05, + "loss": 1.2135, "step": 3187 }, { - "epoch": 0.6684839589012371, - "grad_norm": 5.745546846859845, - "learning_rate": 1.811326091329057e-05, - "loss": 1.6167, + "epoch": 0.4501553233549845, + "grad_norm": 3.5688093651142174, + "learning_rate": 1.925389067013651e-05, + "loss": 0.987, "step": 3188 }, { - "epoch": 0.6686936464667645, - "grad_norm": 6.530695489850643, - "learning_rate": 1.8111937252372833e-05, - "loss": 1.674, + "epoch": 0.45029652640497037, + "grad_norm": 3.473850155130419, + "learning_rate": 1.9253312766634308e-05, + "loss": 1.0975, "step": 3189 }, { - "epoch": 0.6689033340322919, - "grad_norm": 6.235472766979741, - "learning_rate": 1.8110613175704243e-05, - "loss": 1.7064, + "epoch": 0.45043772945495625, + "grad_norm": 4.176480677601507, + "learning_rate": 1.925273464808911e-05, + "loss": 0.9384, "step": 3190 }, { - "epoch": 0.6691130215978193, - "grad_norm": 5.609018975139266, - "learning_rate": 1.8109288683352656e-05, - "loss": 1.9706, + "epoch": 0.45057893250494213, + "grad_norm": 4.151353910240842, + "learning_rate": 1.9252156314514353e-05, + "loss": 1.1771, "step": 3191 }, { - "epoch": 0.6693227091633466, - "grad_norm": 6.415608593325858, - "learning_rate": 1.8107963775385963e-05, - "loss": 1.869, + "epoch": 0.450720135554928, + "grad_norm": 4.212822469303504, + "learning_rate": 1.925157776592348e-05, + "loss": 1.0241, "step": 3192 }, { - "epoch": 0.669532396728874, - "grad_norm": 5.981825121751691, - "learning_rate": 1.8106638451872065e-05, - "loss": 1.8882, + "epoch": 0.45086133860491384, + "grad_norm": 4.138883514757742, + "learning_rate": 1.9250999002329937e-05, + "loss": 1.2045, "step": 3193 }, { - "epoch": 0.6697420842944013, - "grad_norm": 5.759014511741097, - "learning_rate": 1.8105312712878884e-05, - "loss": 1.7248, + "epoch": 0.4510025416548997, + "grad_norm": 4.450799634740918, + "learning_rate": 1.925042002374717e-05, + "loss": 1.2044, "step": 3194 }, { - "epoch": 0.6699517718599287, - "grad_norm": 6.165726035914848, - "learning_rate": 1.810398655847437e-05, - "loss": 1.8585, + "epoch": 0.4511437447048856, + "grad_norm": 3.8158990033837963, + "learning_rate": 1.9249840830188636e-05, + "loss": 1.0626, "step": 3195 }, { - "epoch": 0.670161459425456, - "grad_norm": 5.623744311347049, - "learning_rate": 1.8102659988726485e-05, - "loss": 1.7314, + "epoch": 0.4512849477548715, + "grad_norm": 3.5693973545212283, + "learning_rate": 1.9249261421667796e-05, + "loss": 0.9432, "step": 3196 }, { - "epoch": 0.6703711469909834, - "grad_norm": 6.7093902158353655, - "learning_rate": 1.8101333003703224e-05, - "loss": 1.8175, + "epoch": 0.4514261508048574, + "grad_norm": 3.776950976413844, + "learning_rate": 1.9248681798198115e-05, + "loss": 1.0015, "step": 3197 }, { - "epoch": 0.6705808345565109, - "grad_norm": 6.684429856044499, - "learning_rate": 1.8100005603472597e-05, - "loss": 1.9869, + "epoch": 0.45156735385484326, + "grad_norm": 3.6542122613715495, + "learning_rate": 1.9248101959793066e-05, + "loss": 1.111, "step": 3198 }, { - "epoch": 0.6707905221220382, - "grad_norm": 6.364278707254858, - "learning_rate": 1.8098677788102636e-05, - "loss": 1.9899, + "epoch": 0.45170855690482914, + "grad_norm": 4.157102983928154, + "learning_rate": 1.924752190646612e-05, + "loss": 1.0073, "step": 3199 }, { - "epoch": 0.6710002096875656, - "grad_norm": 6.140847072729251, - "learning_rate": 1.809734955766139e-05, - "loss": 1.7042, + "epoch": 0.451849759954815, + "grad_norm": 4.324627321850429, + "learning_rate": 1.924694163823076e-05, + "loss": 1.1751, "step": 3200 }, { - "epoch": 0.6712098972530929, - "grad_norm": 6.21746854063012, - "learning_rate": 1.8096020912216932e-05, - "loss": 1.8085, + "epoch": 0.4519909630048009, + "grad_norm": 4.329472017987981, + "learning_rate": 1.9246361155100466e-05, + "loss": 1.2319, "step": 3201 }, { - "epoch": 0.6714195848186203, - "grad_norm": 6.033990021401269, - "learning_rate": 1.8094691851837366e-05, - "loss": 2.097, + "epoch": 0.4521321660547868, + "grad_norm": 3.8949344853269987, + "learning_rate": 1.9245780457088736e-05, + "loss": 0.8497, "step": 3202 }, { - "epoch": 0.6716292723841476, - "grad_norm": 5.946716949760104, - "learning_rate": 1.80933623765908e-05, - "loss": 1.9133, + "epoch": 0.45227336910477267, + "grad_norm": 4.076618400045525, + "learning_rate": 1.924519954420906e-05, + "loss": 1.1222, "step": 3203 }, { - "epoch": 0.671838959949675, - "grad_norm": 6.340688326596011, - "learning_rate": 1.8092032486545374e-05, - "loss": 1.8363, + "epoch": 0.45241457215475855, + "grad_norm": 3.422944268458539, + "learning_rate": 1.9244618416474938e-05, + "loss": 0.8355, "step": 3204 }, { - "epoch": 0.6720486475152023, - "grad_norm": 7.012621078767644, - "learning_rate": 1.8090702181769247e-05, - "loss": 2.3349, + "epoch": 0.45255577520474444, + "grad_norm": 3.6937382444021916, + "learning_rate": 1.9244037073899876e-05, + "loss": 1.2263, "step": 3205 }, { - "epoch": 0.6722583350807297, - "grad_norm": 8.638722903648386, - "learning_rate": 1.8089371462330607e-05, - "loss": 1.5909, + "epoch": 0.4526969782547303, + "grad_norm": 3.540297081990537, + "learning_rate": 1.9243455516497388e-05, + "loss": 0.9721, "step": 3206 }, { - "epoch": 0.672468022646257, - "grad_norm": 6.362569720148795, - "learning_rate": 1.8088040328297644e-05, - "loss": 2.1324, + "epoch": 0.4528381813047162, + "grad_norm": 3.2691056635944826, + "learning_rate": 1.924287374428098e-05, + "loss": 0.9816, "step": 3207 }, { - "epoch": 0.6726777102117845, - "grad_norm": 5.389985791607408, - "learning_rate": 1.8086708779738588e-05, - "loss": 1.7115, + "epoch": 0.4529793843547021, + "grad_norm": 4.34274123705014, + "learning_rate": 1.924229175726418e-05, + "loss": 1.1461, "step": 3208 }, { - "epoch": 0.6728873977773118, - "grad_norm": 7.0497891618394135, - "learning_rate": 1.808537681672168e-05, - "loss": 1.7978, + "epoch": 0.45312058740468797, + "grad_norm": 5.786347674953957, + "learning_rate": 1.9241709555460514e-05, + "loss": 0.8701, "step": 3209 }, { - "epoch": 0.6730970853428392, - "grad_norm": 5.81800661749597, - "learning_rate": 1.808404443931519e-05, - "loss": 1.9682, + "epoch": 0.4532617904546738, + "grad_norm": 3.8476425170383246, + "learning_rate": 1.9241127138883508e-05, + "loss": 1.0792, "step": 3210 }, { - "epoch": 0.6733067729083665, - "grad_norm": 7.809124529242358, - "learning_rate": 1.8082711647587397e-05, - "loss": 1.9835, + "epoch": 0.4534029935046597, + "grad_norm": 3.4908081564891686, + "learning_rate": 1.9240544507546696e-05, + "loss": 1.0428, "step": 3211 }, { - "epoch": 0.6735164604738939, - "grad_norm": 6.127695232328836, - "learning_rate": 1.8081378441606613e-05, - "loss": 2.1506, + "epoch": 0.45354419655464556, + "grad_norm": 4.68394375366079, + "learning_rate": 1.9239961661463623e-05, + "loss": 1.3825, "step": 3212 }, { - "epoch": 0.6737261480394212, - "grad_norm": 6.012062530311532, - "learning_rate": 1.808004482144117e-05, - "loss": 1.7999, + "epoch": 0.45368539960463145, + "grad_norm": 4.0428243504724435, + "learning_rate": 1.923937860064783e-05, + "loss": 1.0483, "step": 3213 }, { - "epoch": 0.6739358356049486, - "grad_norm": 5.805935547418881, - "learning_rate": 1.8078710787159415e-05, - "loss": 1.8612, + "epoch": 0.45382660265461733, + "grad_norm": 3.411877559676967, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8916, "step": 3214 }, { - "epoch": 0.6741455231704759, - "grad_norm": 6.755489030972783, - "learning_rate": 1.807737633882972e-05, - "loss": 1.9856, + "epoch": 0.4539678057046032, + "grad_norm": 4.400073803298403, + "learning_rate": 1.9238211834872293e-05, + "loss": 0.9855, "step": 3215 }, { - "epoch": 0.6743552107360034, - "grad_norm": 7.412176658113902, - "learning_rate": 1.8076041476520482e-05, - "loss": 2.2692, + "epoch": 0.4541090087545891, + "grad_norm": 3.8278615298839997, + "learning_rate": 1.9237628129939665e-05, + "loss": 1.1385, "step": 3216 }, { - "epoch": 0.6745648983015308, - "grad_norm": 6.040010209966778, - "learning_rate": 1.807470620030011e-05, - "loss": 1.8721, + "epoch": 0.454250211804575, + "grad_norm": 3.828211544190985, + "learning_rate": 1.923704421032855e-05, + "loss": 1.2312, "step": 3217 }, { - "epoch": 0.6747745858670581, - "grad_norm": 5.886847429444838, - "learning_rate": 1.8073370510237035e-05, - "loss": 2.2316, + "epoch": 0.45439141485456086, + "grad_norm": 3.7602848217758735, + "learning_rate": 1.9236460076052515e-05, + "loss": 0.9298, "step": 3218 }, { - "epoch": 0.6749842734325855, - "grad_norm": 5.880141646145295, - "learning_rate": 1.8072034406399722e-05, - "loss": 1.9596, + "epoch": 0.45453261790454674, + "grad_norm": 3.391953023647811, + "learning_rate": 1.923587572712514e-05, + "loss": 0.9322, "step": 3219 }, { - "epoch": 0.6751939609981128, - "grad_norm": 6.2079691587909664, - "learning_rate": 1.8070697888856647e-05, - "loss": 1.8824, + "epoch": 0.4546738209545326, + "grad_norm": 3.914184702358827, + "learning_rate": 1.9235291163559996e-05, + "loss": 1.0262, "step": 3220 }, { - "epoch": 0.6754036485636402, - "grad_norm": 6.046700516667364, - "learning_rate": 1.8069360957676305e-05, - "loss": 2.1636, + "epoch": 0.4548150240045185, + "grad_norm": 3.6848019564135193, + "learning_rate": 1.9234706385370677e-05, + "loss": 1.0193, "step": 3221 }, { - "epoch": 0.6756133361291675, - "grad_norm": 5.892219682228525, - "learning_rate": 1.806802361292722e-05, - "loss": 1.9598, + "epoch": 0.4549562270545044, + "grad_norm": 3.977238516663836, + "learning_rate": 1.923412139257077e-05, + "loss": 0.9245, "step": 3222 }, { - "epoch": 0.6758230236946949, - "grad_norm": 6.7077331847729385, - "learning_rate": 1.806668585467793e-05, - "loss": 1.8905, + "epoch": 0.4550974301044903, + "grad_norm": 3.3967267589522443, + "learning_rate": 1.923353618517387e-05, + "loss": 0.8006, "step": 3223 }, { - "epoch": 0.6760327112602222, - "grad_norm": 5.604560933553084, - "learning_rate": 1.8065347682997e-05, - "loss": 1.7328, + "epoch": 0.45523863315447616, + "grad_norm": 4.487494452625829, + "learning_rate": 1.9232950763193576e-05, + "loss": 1.1628, "step": 3224 }, { - "epoch": 0.6762423988257497, - "grad_norm": 5.858242686694441, - "learning_rate": 1.8064009097953014e-05, - "loss": 2.0333, + "epoch": 0.45537983620446204, + "grad_norm": 3.4242316200166485, + "learning_rate": 1.9232365126643494e-05, + "loss": 1.0291, "step": 3225 }, { - "epoch": 0.676452086391277, - "grad_norm": 5.285037726619053, - "learning_rate": 1.806267009961457e-05, - "loss": 1.8122, + "epoch": 0.4555210392544479, + "grad_norm": 3.6325689754040855, + "learning_rate": 1.9231779275537233e-05, + "loss": 0.9864, "step": 3226 }, { - "epoch": 0.6766617739568044, - "grad_norm": 6.393781963263367, - "learning_rate": 1.8061330688050306e-05, - "loss": 1.9621, + "epoch": 0.45566224230443375, + "grad_norm": 5.153986691590836, + "learning_rate": 1.923119320988841e-05, + "loss": 0.9568, "step": 3227 }, { - "epoch": 0.6768714615223317, - "grad_norm": 6.007219534272445, - "learning_rate": 1.8059990863328857e-05, - "loss": 2.2242, + "epoch": 0.45580344535441963, + "grad_norm": 3.5827889421150916, + "learning_rate": 1.923060692971064e-05, + "loss": 1.0245, "step": 3228 }, { - "epoch": 0.6770811490878591, - "grad_norm": 5.996645683955281, - "learning_rate": 1.8058650625518904e-05, - "loss": 1.7635, + "epoch": 0.4559446484044055, + "grad_norm": 4.048807565714518, + "learning_rate": 1.9230020435017553e-05, + "loss": 1.0826, "step": 3229 }, { - "epoch": 0.6772908366533864, - "grad_norm": 6.45365201126418, - "learning_rate": 1.8057309974689122e-05, - "loss": 2.1099, + "epoch": 0.4560858514543914, + "grad_norm": 3.8985654337209708, + "learning_rate": 1.9229433725822776e-05, + "loss": 1.2116, "step": 3230 }, { - "epoch": 0.6775005242189138, - "grad_norm": 7.635443419482907, - "learning_rate": 1.8055968910908234e-05, - "loss": 1.7415, + "epoch": 0.4562270545043773, + "grad_norm": 3.494945232905836, + "learning_rate": 1.9228846802139947e-05, + "loss": 0.8652, "step": 3231 }, { - "epoch": 0.6777102117844411, - "grad_norm": 5.724378847655993, - "learning_rate": 1.805462743424496e-05, - "loss": 1.9009, + "epoch": 0.45636825755436317, + "grad_norm": 3.694960939011435, + "learning_rate": 1.9228259663982705e-05, + "loss": 1.0792, "step": 3232 }, { - "epoch": 0.6779198993499685, - "grad_norm": 6.324330869650399, - "learning_rate": 1.8053285544768065e-05, - "loss": 1.9163, + "epoch": 0.45650946060434905, + "grad_norm": 3.6436933758651238, + "learning_rate": 1.9227672311364692e-05, + "loss": 0.9096, "step": 3233 }, { - "epoch": 0.678129586915496, - "grad_norm": 5.699776196850578, - "learning_rate": 1.8051943242546323e-05, - "loss": 1.7713, + "epoch": 0.45665066365433493, + "grad_norm": 4.147361020507568, + "learning_rate": 1.922708474429956e-05, + "loss": 1.3041, "step": 3234 }, { - "epoch": 0.6783392744810233, - "grad_norm": 5.710073861730657, - "learning_rate": 1.805060052764852e-05, - "loss": 2.0643, + "epoch": 0.4567918667043208, + "grad_norm": 3.6368159879778172, + "learning_rate": 1.9226496962800967e-05, + "loss": 0.9192, "step": 3235 }, { - "epoch": 0.6785489620465507, - "grad_norm": 5.462464488146465, - "learning_rate": 1.8049257400143476e-05, - "loss": 2.0281, + "epoch": 0.4569330697543067, + "grad_norm": 3.761061322999015, + "learning_rate": 1.9225908966882563e-05, + "loss": 0.906, "step": 3236 }, { - "epoch": 0.678758649612078, - "grad_norm": 6.461800779497199, - "learning_rate": 1.804791386010003e-05, - "loss": 1.9406, + "epoch": 0.4570742728042926, + "grad_norm": 3.5783071665991844, + "learning_rate": 1.9225320756558023e-05, + "loss": 0.8649, "step": 3237 }, { - "epoch": 0.6789683371776054, - "grad_norm": 5.9276722915766795, - "learning_rate": 1.804656990758704e-05, - "loss": 2.043, + "epoch": 0.45721547585427846, + "grad_norm": 3.4016674278946257, + "learning_rate": 1.922473233184101e-05, + "loss": 0.9599, "step": 3238 }, { - "epoch": 0.6791780247431327, - "grad_norm": 5.386339130503794, - "learning_rate": 1.8045225542673385e-05, - "loss": 1.8506, + "epoch": 0.45735667890426435, + "grad_norm": 3.4131824281945202, + "learning_rate": 1.9224143692745207e-05, + "loss": 1.072, "step": 3239 }, { - "epoch": 0.6793877123086601, - "grad_norm": 5.333098945509606, - "learning_rate": 1.804388076542797e-05, - "loss": 1.7762, + "epoch": 0.45749788195425023, + "grad_norm": 3.2038719797443207, + "learning_rate": 1.922355483928428e-05, + "loss": 0.8694, "step": 3240 }, { - "epoch": 0.6795973998741874, - "grad_norm": 5.939505899239394, - "learning_rate": 1.804253557591971e-05, - "loss": 2.145, + "epoch": 0.4576390850042361, + "grad_norm": 6.298507128308925, + "learning_rate": 1.9222965771471926e-05, + "loss": 1.1447, "step": 3241 }, { - "epoch": 0.6798070874397149, - "grad_norm": 5.453731557953743, - "learning_rate": 1.804118997421756e-05, - "loss": 1.7836, + "epoch": 0.457780288054222, + "grad_norm": 3.8624347210468044, + "learning_rate": 1.922237648932183e-05, + "loss": 1.0609, "step": 3242 }, { - "epoch": 0.6800167750052422, - "grad_norm": 6.029157950361201, - "learning_rate": 1.803984396039047e-05, - "loss": 2.0945, + "epoch": 0.4579214911042079, + "grad_norm": 3.762926437835544, + "learning_rate": 1.922178699284769e-05, + "loss": 1.1322, "step": 3243 }, { - "epoch": 0.6802264625707696, - "grad_norm": 6.497347529231123, - "learning_rate": 1.8038497534507438e-05, - "loss": 1.9885, + "epoch": 0.4580626941541937, + "grad_norm": 4.122545857131325, + "learning_rate": 1.92211972820632e-05, + "loss": 0.9244, "step": 3244 }, { - "epoch": 0.6804361501362969, - "grad_norm": 6.014855966395795, - "learning_rate": 1.803715069663746e-05, - "loss": 1.9769, + "epoch": 0.4582038972041796, + "grad_norm": 3.7140080514467635, + "learning_rate": 1.9220607356982072e-05, + "loss": 1.1503, "step": 3245 }, { - "epoch": 0.6806458377018243, - "grad_norm": 5.2524134894148045, - "learning_rate": 1.8035803446849574e-05, - "loss": 1.5727, + "epoch": 0.45834510025416547, + "grad_norm": 3.870346059549802, + "learning_rate": 1.9220017217618006e-05, + "loss": 1.0078, "step": 3246 }, { - "epoch": 0.6808555252673516, - "grad_norm": 6.957197559763476, - "learning_rate": 1.8034455785212822e-05, - "loss": 1.8986, + "epoch": 0.45848630330415135, + "grad_norm": 3.837957507562975, + "learning_rate": 1.921942686398472e-05, + "loss": 0.9192, "step": 3247 }, { - "epoch": 0.681065212832879, - "grad_norm": 5.697668530796028, - "learning_rate": 1.8033107711796274e-05, - "loss": 1.8886, + "epoch": 0.45862750635413724, + "grad_norm": 3.0234868647361375, + "learning_rate": 1.921883629609594e-05, + "loss": 0.8122, "step": 3248 }, { - "epoch": 0.6812749003984063, - "grad_norm": 5.716232562746799, - "learning_rate": 1.8031759226669026e-05, - "loss": 1.8254, + "epoch": 0.4587687094041231, + "grad_norm": 3.60289038908179, + "learning_rate": 1.9218245513965384e-05, + "loss": 1.1266, "step": 3249 }, { - "epoch": 0.6814845879639337, - "grad_norm": 6.935459690598786, - "learning_rate": 1.8030410329900183e-05, - "loss": 2.2387, + "epoch": 0.458909912454109, + "grad_norm": 4.022192950712532, + "learning_rate": 1.9217654517606786e-05, + "loss": 1.0814, "step": 3250 }, { - "epoch": 0.6816942755294612, - "grad_norm": 6.100784979379584, - "learning_rate": 1.802906102155889e-05, - "loss": 1.7785, + "epoch": 0.4590511155040949, + "grad_norm": 3.8035203402327795, + "learning_rate": 1.9217063307033873e-05, + "loss": 1.1504, "step": 3251 }, { - "epoch": 0.6819039630949885, - "grad_norm": 6.244975202197563, - "learning_rate": 1.8027711301714288e-05, - "loss": 1.9904, + "epoch": 0.45919231855408077, + "grad_norm": 2.873008313082648, + "learning_rate": 1.921647188226039e-05, + "loss": 0.5885, "step": 3252 }, { - "epoch": 0.6821136506605159, - "grad_norm": 6.529715717151556, - "learning_rate": 1.802636117043556e-05, - "loss": 2.036, + "epoch": 0.45933352160406665, + "grad_norm": 3.645328013036918, + "learning_rate": 1.9215880243300082e-05, + "loss": 1.0346, "step": 3253 }, { - "epoch": 0.6823233382260432, - "grad_norm": 6.07735667683381, - "learning_rate": 1.8025010627791898e-05, - "loss": 1.9323, + "epoch": 0.45947472465405254, + "grad_norm": 4.140143924945763, + "learning_rate": 1.92152883901667e-05, + "loss": 0.98, "step": 3254 }, { - "epoch": 0.6825330257915706, - "grad_norm": 5.38011765413242, - "learning_rate": 1.8023659673852527e-05, - "loss": 2.1259, + "epoch": 0.4596159277040384, + "grad_norm": 4.649721394491685, + "learning_rate": 1.921469632287399e-05, + "loss": 1.1418, "step": 3255 }, { - "epoch": 0.6827427133570979, - "grad_norm": 7.150730400588157, - "learning_rate": 1.802230830868668e-05, - "loss": 2.0635, + "epoch": 0.4597571307540243, + "grad_norm": 3.73452511273414, + "learning_rate": 1.921410404143572e-05, + "loss": 1.1916, "step": 3256 }, { - "epoch": 0.6829524009226253, - "grad_norm": 6.081813390930069, - "learning_rate": 1.8020956532363618e-05, - "loss": 1.9236, + "epoch": 0.4598983338040102, + "grad_norm": 3.053882117152354, + "learning_rate": 1.921351154586565e-05, + "loss": 0.9649, "step": 3257 }, { - "epoch": 0.6831620884881526, - "grad_norm": 6.672804195158333, - "learning_rate": 1.8019604344952623e-05, - "loss": 1.914, + "epoch": 0.46003953685399607, + "grad_norm": 3.7891000619228676, + "learning_rate": 1.9212918836177555e-05, + "loss": 1.2185, "step": 3258 }, { - "epoch": 0.68337177605368, - "grad_norm": 6.065829739684739, - "learning_rate": 1.8018251746522994e-05, - "loss": 1.9232, + "epoch": 0.46018073990398195, + "grad_norm": 3.5687267693323674, + "learning_rate": 1.9212325912385202e-05, + "loss": 0.8575, "step": 3259 }, { - "epoch": 0.6835814636192074, - "grad_norm": 6.6451259402415985, - "learning_rate": 1.801689873714406e-05, - "loss": 2.0083, + "epoch": 0.46032194295396783, + "grad_norm": 3.4753484635705605, + "learning_rate": 1.9211732774502372e-05, + "loss": 0.8893, "step": 3260 }, { - "epoch": 0.6837911511847348, - "grad_norm": 5.730980029079904, - "learning_rate": 1.8015545316885156e-05, - "loss": 1.7769, + "epoch": 0.46046314600395366, + "grad_norm": 3.4974104775920845, + "learning_rate": 1.9211139422542853e-05, + "loss": 0.9122, "step": 3261 }, { - "epoch": 0.6840008387502621, - "grad_norm": 7.169657659611324, - "learning_rate": 1.8014191485815655e-05, - "loss": 2.0141, + "epoch": 0.46060434905393954, + "grad_norm": 3.855428089650259, + "learning_rate": 1.921054585652043e-05, + "loss": 1.0834, "step": 3262 }, { - "epoch": 0.6842105263157895, - "grad_norm": 6.441308945278863, - "learning_rate": 1.8012837244004943e-05, - "loss": 1.9117, + "epoch": 0.4607455521039254, + "grad_norm": 3.9315202051609317, + "learning_rate": 1.92099520764489e-05, + "loss": 0.9906, "step": 3263 }, { - "epoch": 0.6844202138813168, - "grad_norm": 6.518712670344159, - "learning_rate": 1.801148259152242e-05, - "loss": 1.7167, + "epoch": 0.4608867551539113, + "grad_norm": 4.95147550440065, + "learning_rate": 1.920935808234206e-05, + "loss": 1.0549, "step": 3264 }, { - "epoch": 0.6846299014468442, - "grad_norm": 6.976697898546187, - "learning_rate": 1.801012752843752e-05, - "loss": 2.0057, + "epoch": 0.4610279582038972, + "grad_norm": 3.3676060998763693, + "learning_rate": 1.920876387421372e-05, + "loss": 0.9737, "step": 3265 }, { - "epoch": 0.6848395890123715, - "grad_norm": 7.201288513312941, - "learning_rate": 1.800877205481969e-05, - "loss": 1.597, + "epoch": 0.4611691612538831, + "grad_norm": 3.17282493529078, + "learning_rate": 1.9208169452077678e-05, + "loss": 0.8634, "step": 3266 }, { - "epoch": 0.6850492765778989, - "grad_norm": 7.798083684687535, - "learning_rate": 1.8007416170738407e-05, - "loss": 2.0361, + "epoch": 0.46131036430386896, + "grad_norm": 3.485659920294613, + "learning_rate": 1.9207574815947757e-05, + "loss": 0.9477, "step": 3267 }, { - "epoch": 0.6852589641434262, - "grad_norm": 8.54899695963752, - "learning_rate": 1.800605987626315e-05, - "loss": 2.1742, + "epoch": 0.46145156735385484, + "grad_norm": 3.997013210249467, + "learning_rate": 1.9206979965837775e-05, + "loss": 1.0195, "step": 3268 }, { - "epoch": 0.6854686517089537, - "grad_norm": 7.230219947881708, - "learning_rate": 1.8004703171463445e-05, - "loss": 2.1699, + "epoch": 0.4615927704038407, + "grad_norm": 3.2430318444835815, + "learning_rate": 1.920638490176155e-05, + "loss": 0.9415, "step": 3269 }, { - "epoch": 0.6856783392744811, - "grad_norm": 7.296853109874198, - "learning_rate": 1.8003346056408813e-05, - "loss": 2.0253, + "epoch": 0.4617339734538266, + "grad_norm": 3.569377441483567, + "learning_rate": 1.9205789623732923e-05, + "loss": 1.0643, "step": 3270 }, { - "epoch": 0.6858880268400084, - "grad_norm": 6.671660838051233, - "learning_rate": 1.800198853116882e-05, - "loss": 2.1672, + "epoch": 0.4618751765038125, + "grad_norm": 3.565221312060194, + "learning_rate": 1.920519413176572e-05, + "loss": 1.0857, "step": 3271 }, { - "epoch": 0.6860977144055358, - "grad_norm": 7.096620651572791, - "learning_rate": 1.8000630595813035e-05, - "loss": 2.0051, + "epoch": 0.4620163795537984, + "grad_norm": 4.101146110350876, + "learning_rate": 1.9204598425873773e-05, + "loss": 1.2772, "step": 3272 }, { - "epoch": 0.6863074019710631, - "grad_norm": 6.669286440996504, - "learning_rate": 1.7999272250411053e-05, - "loss": 1.9375, + "epoch": 0.46215758260378426, + "grad_norm": 3.805401357939564, + "learning_rate": 1.9204002506070944e-05, + "loss": 1.1069, "step": 3273 }, { - "epoch": 0.6865170895365905, - "grad_norm": 6.084563485225219, - "learning_rate": 1.7997913495032497e-05, - "loss": 2.0118, + "epoch": 0.46229878565377014, + "grad_norm": 4.044722092141315, + "learning_rate": 1.9203406372371065e-05, + "loss": 1.411, "step": 3274 }, { - "epoch": 0.6867267771021178, - "grad_norm": 6.532932599629294, - "learning_rate": 1.7996554329747003e-05, - "loss": 1.777, + "epoch": 0.462439988703756, + "grad_norm": 3.7655287109120072, + "learning_rate": 1.9202810024787998e-05, + "loss": 1.1042, "step": 3275 }, { - "epoch": 0.6869364646676452, - "grad_norm": 5.56489706745577, - "learning_rate": 1.7995194754624225e-05, - "loss": 2.3794, + "epoch": 0.4625811917537419, + "grad_norm": 4.322226676382161, + "learning_rate": 1.92022134633356e-05, + "loss": 1.1562, "step": 3276 }, { - "epoch": 0.6871461522331725, - "grad_norm": 5.824595590753818, - "learning_rate": 1.7993834769733856e-05, - "loss": 2.0992, + "epoch": 0.4627223948037278, + "grad_norm": 3.233857206635366, + "learning_rate": 1.920161668802774e-05, + "loss": 1.0165, "step": 3277 }, { - "epoch": 0.6873558397987, - "grad_norm": 6.207846223883358, - "learning_rate": 1.7992474375145586e-05, - "loss": 1.8199, + "epoch": 0.4628635978537136, + "grad_norm": 3.777777701762446, + "learning_rate": 1.9201019698878272e-05, + "loss": 1.1327, "step": 3278 }, { - "epoch": 0.6875655273642273, - "grad_norm": 5.706951818363814, - "learning_rate": 1.7991113570929145e-05, - "loss": 1.8976, + "epoch": 0.4630048009036995, + "grad_norm": 4.224321182739448, + "learning_rate": 1.920042249590109e-05, + "loss": 0.9552, "step": 3279 }, { - "epoch": 0.6877752149297547, - "grad_norm": 7.082112765803049, - "learning_rate": 1.798975235715427e-05, - "loss": 2.2045, + "epoch": 0.4631460039536854, + "grad_norm": 3.234864432949824, + "learning_rate": 1.919982507911006e-05, + "loss": 1.0041, "step": 3280 }, { - "epoch": 0.687984902495282, - "grad_norm": 6.59657977612307, - "learning_rate": 1.798839073389073e-05, - "loss": 2.2119, + "epoch": 0.46328720700367126, + "grad_norm": 4.439937662265556, + "learning_rate": 1.9199227448519065e-05, + "loss": 1.4331, "step": 3281 }, { - "epoch": 0.6881945900608094, - "grad_norm": 6.075742386494613, - "learning_rate": 1.798702870120831e-05, - "loss": 2.0579, + "epoch": 0.46342841005365715, + "grad_norm": 3.667322510482209, + "learning_rate": 1.9198629604141996e-05, + "loss": 0.9611, "step": 3282 }, { - "epoch": 0.6884042776263367, - "grad_norm": 5.895622096392346, - "learning_rate": 1.7985666259176815e-05, - "loss": 1.8532, + "epoch": 0.46356961310364303, + "grad_norm": 3.304941181394408, + "learning_rate": 1.919803154599275e-05, + "loss": 0.9377, "step": 3283 }, { - "epoch": 0.6886139651918641, - "grad_norm": 6.498667180294317, - "learning_rate": 1.7984303407866078e-05, - "loss": 1.6078, + "epoch": 0.4637108161536289, + "grad_norm": 3.962774236512375, + "learning_rate": 1.9197433274085225e-05, + "loss": 1.1008, "step": 3284 }, { - "epoch": 0.6888236527573914, - "grad_norm": 7.0110623323380565, - "learning_rate": 1.798294014734594e-05, - "loss": 1.9654, + "epoch": 0.4638520192036148, + "grad_norm": 3.603592865761391, + "learning_rate": 1.9196834788433323e-05, + "loss": 0.9999, "step": 3285 }, { - "epoch": 0.6890333403229189, - "grad_norm": 7.400397006988793, - "learning_rate": 1.7981576477686272e-05, - "loss": 1.725, + "epoch": 0.4639932222536007, + "grad_norm": 4.158152705938979, + "learning_rate": 1.919623608905095e-05, + "loss": 1.092, "step": 3286 }, { - "epoch": 0.6892430278884463, - "grad_norm": 6.1842453610868775, - "learning_rate": 1.798021239895697e-05, - "loss": 1.9258, + "epoch": 0.46413442530358656, + "grad_norm": 3.726925852976475, + "learning_rate": 1.919563717595202e-05, + "loss": 0.9415, "step": 3287 }, { - "epoch": 0.6894527154539736, - "grad_norm": 5.813718125604629, - "learning_rate": 1.7978847911227938e-05, - "loss": 2.0203, + "epoch": 0.46427562835357244, + "grad_norm": 5.409670889653258, + "learning_rate": 1.9195038049150455e-05, + "loss": 1.3175, "step": 3288 }, { - "epoch": 0.689662403019501, - "grad_norm": 5.997215289742811, - "learning_rate": 1.7977483014569118e-05, - "loss": 2.0934, + "epoch": 0.46441683140355833, + "grad_norm": 3.906924641959383, + "learning_rate": 1.919443870866018e-05, + "loss": 1.0551, "step": 3289 }, { - "epoch": 0.6898720905850283, - "grad_norm": 6.216867165912658, - "learning_rate": 1.7976117709050452e-05, - "loss": 1.9093, + "epoch": 0.4645580344535442, + "grad_norm": 3.3237005905549597, + "learning_rate": 1.919383915449512e-05, + "loss": 0.9839, "step": 3290 }, { - "epoch": 0.6900817781505557, - "grad_norm": 5.771666782904703, - "learning_rate": 1.7974751994741922e-05, - "loss": 1.7338, + "epoch": 0.4646992375035301, + "grad_norm": 4.38796995181864, + "learning_rate": 1.9193239386669203e-05, + "loss": 1.1641, "step": 3291 }, { - "epoch": 0.690291465716083, - "grad_norm": 6.089641578852998, - "learning_rate": 1.797338587171352e-05, - "loss": 1.7365, + "epoch": 0.464840440553516, + "grad_norm": 4.140249326545954, + "learning_rate": 1.9192639405196377e-05, + "loss": 1.1435, "step": 3292 }, { - "epoch": 0.6905011532816104, - "grad_norm": 6.292746586876868, - "learning_rate": 1.797201934003526e-05, - "loss": 1.7147, + "epoch": 0.46498164360350186, + "grad_norm": 4.202280684250058, + "learning_rate": 1.919203921009058e-05, + "loss": 1.2459, "step": 3293 }, { - "epoch": 0.6907108408471377, - "grad_norm": 6.270441726279288, - "learning_rate": 1.7970652399777185e-05, - "loss": 1.375, + "epoch": 0.46512284665348774, + "grad_norm": 3.775757864882608, + "learning_rate": 1.9191438801365763e-05, + "loss": 1.0507, "step": 3294 }, { - "epoch": 0.6909205284126652, - "grad_norm": 7.530413820118031, - "learning_rate": 1.7969285051009354e-05, - "loss": 1.7615, + "epoch": 0.46526404970347357, + "grad_norm": 3.6886057590269186, + "learning_rate": 1.9190838179035873e-05, + "loss": 1.1869, "step": 3295 }, { - "epoch": 0.6911302159781925, - "grad_norm": 6.462696144853078, - "learning_rate": 1.7967917293801836e-05, - "loss": 1.9219, + "epoch": 0.46540525275345945, + "grad_norm": 3.8683924393917333, + "learning_rate": 1.919023734311488e-05, + "loss": 1.1382, "step": 3296 }, { - "epoch": 0.6913399035437199, - "grad_norm": 6.042842423204209, - "learning_rate": 1.796654912822474e-05, - "loss": 2.1346, + "epoch": 0.46554645580344534, + "grad_norm": 3.3853266904557278, + "learning_rate": 1.9189636293616733e-05, + "loss": 1.074, "step": 3297 }, { - "epoch": 0.6915495911092472, - "grad_norm": 5.807817323679866, - "learning_rate": 1.796518055434818e-05, - "loss": 1.6588, + "epoch": 0.4656876588534312, + "grad_norm": 3.631061322892458, + "learning_rate": 1.918903503055541e-05, + "loss": 1.0372, "step": 3298 }, { - "epoch": 0.6917592786747746, - "grad_norm": 6.759429940389732, - "learning_rate": 1.7963811572242306e-05, - "loss": 1.8292, + "epoch": 0.4658288619034171, + "grad_norm": 3.3759101287018254, + "learning_rate": 1.9188433553944885e-05, + "loss": 0.9557, "step": 3299 }, { - "epoch": 0.6919689662403019, - "grad_norm": 6.467017050341589, - "learning_rate": 1.7962442181977274e-05, - "loss": 1.9037, + "epoch": 0.465970064953403, + "grad_norm": 4.3798646728298385, + "learning_rate": 1.918783186379913e-05, + "loss": 0.9693, "step": 3300 }, { - "epoch": 0.6921786538058293, - "grad_norm": 6.969881479071096, - "learning_rate": 1.796107238362327e-05, - "loss": 2.156, + "epoch": 0.46611126800338887, + "grad_norm": 4.331400534055562, + "learning_rate": 1.9187229960132128e-05, + "loss": 1.5027, "step": 3301 }, { - "epoch": 0.6923883413713566, - "grad_norm": 6.724932934258681, - "learning_rate": 1.7959702177250503e-05, - "loss": 1.8348, + "epoch": 0.46625247105337475, + "grad_norm": 4.720676014817762, + "learning_rate": 1.9186627842957873e-05, + "loss": 1.195, "step": 3302 }, { - "epoch": 0.692598028936884, - "grad_norm": 6.577170506067217, - "learning_rate": 1.7958331562929188e-05, - "loss": 1.926, + "epoch": 0.46639367410336063, + "grad_norm": 3.6493280427964048, + "learning_rate": 1.9186025512290352e-05, + "loss": 0.9998, "step": 3303 }, { - "epoch": 0.6928077165024114, - "grad_norm": 5.9009958327439005, - "learning_rate": 1.7956960540729582e-05, - "loss": 1.6556, + "epoch": 0.4665348771533465, + "grad_norm": 4.510650857367312, + "learning_rate": 1.9185422968143566e-05, + "loss": 1.187, "step": 3304 }, { - "epoch": 0.6930174040679388, - "grad_norm": 7.116648502987047, - "learning_rate": 1.7955589110721946e-05, - "loss": 2.2525, + "epoch": 0.4666760802033324, + "grad_norm": 4.006477593473193, + "learning_rate": 1.9184820210531517e-05, + "loss": 1.0461, "step": 3305 }, { - "epoch": 0.6932270916334662, - "grad_norm": 6.606844015841355, - "learning_rate": 1.795421727297657e-05, - "loss": 2.0384, + "epoch": 0.4668172832533183, + "grad_norm": 4.389180114036907, + "learning_rate": 1.9184217239468213e-05, + "loss": 1.1824, "step": 3306 }, { - "epoch": 0.6934367791989935, - "grad_norm": 5.968572739395609, - "learning_rate": 1.7952845027563758e-05, - "loss": 1.8678, + "epoch": 0.46695848630330417, + "grad_norm": 3.1148121885518014, + "learning_rate": 1.9183614054967666e-05, + "loss": 0.8554, "step": 3307 }, { - "epoch": 0.6936464667645209, - "grad_norm": 7.133876930285767, - "learning_rate": 1.7951472374553852e-05, - "loss": 1.7548, + "epoch": 0.46709968935329005, + "grad_norm": 3.5454517097745555, + "learning_rate": 1.9183010657043894e-05, + "loss": 1.1337, "step": 3308 }, { - "epoch": 0.6938561543300482, - "grad_norm": 6.348168747093362, - "learning_rate": 1.7950099314017194e-05, - "loss": 1.812, + "epoch": 0.46724089240327593, + "grad_norm": 3.6391331391301347, + "learning_rate": 1.9182407045710923e-05, + "loss": 1.0003, "step": 3309 }, { - "epoch": 0.6940658418955756, - "grad_norm": 5.773455863040737, - "learning_rate": 1.7948725846024153e-05, - "loss": 1.8128, + "epoch": 0.4673820954532618, + "grad_norm": 3.909022071053452, + "learning_rate": 1.9181803220982776e-05, + "loss": 1.1596, "step": 3310 }, { - "epoch": 0.6942755294611029, - "grad_norm": 5.904478604261118, - "learning_rate": 1.794735197064513e-05, - "loss": 1.7768, + "epoch": 0.4675232985032477, + "grad_norm": 3.6685462954574475, + "learning_rate": 1.9181199182873488e-05, + "loss": 0.9759, "step": 3311 }, { - "epoch": 0.6944852170266304, - "grad_norm": 6.033866059869585, - "learning_rate": 1.7945977687950535e-05, - "loss": 1.7166, + "epoch": 0.4676645015532335, + "grad_norm": 4.084127123071047, + "learning_rate": 1.9180594931397094e-05, + "loss": 0.9751, "step": 3312 }, { - "epoch": 0.6946949045921577, - "grad_norm": 6.425278205080608, - "learning_rate": 1.79446029980108e-05, - "loss": 1.5628, + "epoch": 0.4678057046032194, + "grad_norm": 3.651758091998305, + "learning_rate": 1.917999046656764e-05, + "loss": 0.8351, "step": 3313 }, { - "epoch": 0.6949045921576851, - "grad_norm": 6.628248806756656, - "learning_rate": 1.794322790089638e-05, - "loss": 2.0108, + "epoch": 0.4679469076532053, + "grad_norm": 3.37359982088584, + "learning_rate": 1.9179385788399176e-05, + "loss": 1.0016, "step": 3314 }, { - "epoch": 0.6951142797232124, - "grad_norm": 6.9408348481054825, - "learning_rate": 1.7941852396677757e-05, - "loss": 1.9478, + "epoch": 0.4680881107031912, + "grad_norm": 3.8193073034472342, + "learning_rate": 1.917878089690574e-05, + "loss": 0.7642, "step": 3315 }, { - "epoch": 0.6953239672887398, - "grad_norm": 6.896784300131047, - "learning_rate": 1.7940476485425424e-05, - "loss": 1.9928, + "epoch": 0.46822931375317706, + "grad_norm": 4.243218305957036, + "learning_rate": 1.917817579210141e-05, + "loss": 1.1405, "step": 3316 }, { - "epoch": 0.6955336548542671, - "grad_norm": 6.506923866164512, - "learning_rate": 1.79391001672099e-05, - "loss": 2.1301, + "epoch": 0.46837051680316294, + "grad_norm": 3.7397283469993203, + "learning_rate": 1.9177570474000236e-05, + "loss": 1.0378, "step": 3317 }, { - "epoch": 0.6957433424197945, - "grad_norm": 6.314360375140578, - "learning_rate": 1.793772344210172e-05, - "loss": 1.9989, + "epoch": 0.4685117198531488, + "grad_norm": 3.8524499008169655, + "learning_rate": 1.9176964942616286e-05, + "loss": 0.9126, "step": 3318 }, { - "epoch": 0.6959530299853218, - "grad_norm": 5.6281101347474385, - "learning_rate": 1.793634631017145e-05, - "loss": 2.0314, + "epoch": 0.4686529229031347, + "grad_norm": 4.358334008192972, + "learning_rate": 1.9176359197963634e-05, + "loss": 0.9826, "step": 3319 }, { - "epoch": 0.6961627175508492, - "grad_norm": 5.279294921431496, - "learning_rate": 1.7934968771489663e-05, - "loss": 1.6397, + "epoch": 0.4687941259531206, + "grad_norm": 3.146033549140663, + "learning_rate": 1.917575324005636e-05, + "loss": 0.8685, "step": 3320 }, { - "epoch": 0.6963724051163765, - "grad_norm": 5.926645901085874, - "learning_rate": 1.7933590826126964e-05, - "loss": 1.5286, + "epoch": 0.46893532900310647, + "grad_norm": 3.5361657760773944, + "learning_rate": 1.9175147068908543e-05, + "loss": 0.9492, "step": 3321 }, { - "epoch": 0.696582092681904, - "grad_norm": 6.326367018370093, - "learning_rate": 1.7932212474153977e-05, - "loss": 2.4343, + "epoch": 0.46907653205309235, + "grad_norm": 4.050771780955828, + "learning_rate": 1.9174540684534267e-05, + "loss": 1.1089, "step": 3322 }, { - "epoch": 0.6967917802474314, - "grad_norm": 6.550728000977204, - "learning_rate": 1.793083371564134e-05, - "loss": 1.9441, + "epoch": 0.46921773510307824, + "grad_norm": 4.031123374446859, + "learning_rate": 1.9173934086947626e-05, + "loss": 1.1549, "step": 3323 }, { - "epoch": 0.6970014678129587, - "grad_norm": 5.863870606832622, - "learning_rate": 1.7929454550659723e-05, - "loss": 1.9915, + "epoch": 0.4693589381530641, + "grad_norm": 3.6808566040096102, + "learning_rate": 1.9173327276162724e-05, + "loss": 0.9391, "step": 3324 }, { - "epoch": 0.6972111553784861, - "grad_norm": 5.236499753861973, - "learning_rate": 1.7928074979279806e-05, - "loss": 1.5367, + "epoch": 0.46950014120305, + "grad_norm": 3.414192561757775, + "learning_rate": 1.9172720252193657e-05, + "loss": 0.9217, "step": 3325 }, { - "epoch": 0.6974208429440134, - "grad_norm": 6.350525551722535, - "learning_rate": 1.7926695001572295e-05, - "loss": 2.2273, + "epoch": 0.4696413442530359, + "grad_norm": 3.534271114506234, + "learning_rate": 1.917211301505453e-05, + "loss": 1.0627, "step": 3326 }, { - "epoch": 0.6976305305095408, - "grad_norm": 7.140496641874133, - "learning_rate": 1.7925314617607918e-05, - "loss": 2.6077, + "epoch": 0.46978254730302177, + "grad_norm": 3.1371871383904257, + "learning_rate": 1.9171505564759463e-05, + "loss": 0.8956, "step": 3327 }, { - "epoch": 0.6978402180750681, - "grad_norm": 6.743028823358207, - "learning_rate": 1.792393382745742e-05, - "loss": 2.1445, + "epoch": 0.46992375035300765, + "grad_norm": 3.1148931676990053, + "learning_rate": 1.9170897901322563e-05, + "loss": 0.9354, "step": 3328 }, { - "epoch": 0.6980499056405955, - "grad_norm": 6.235060124169527, - "learning_rate": 1.7922552631191573e-05, - "loss": 1.9662, + "epoch": 0.4700649534029935, + "grad_norm": 3.7373959335949323, + "learning_rate": 1.9170290024757958e-05, + "loss": 0.9131, "step": 3329 }, { - "epoch": 0.6982595932061229, - "grad_norm": 6.530651720778435, - "learning_rate": 1.7921171028881158e-05, - "loss": 1.7024, + "epoch": 0.47020615645297936, + "grad_norm": 4.379979492777393, + "learning_rate": 1.916968193507977e-05, + "loss": 1.2751, "step": 3330 }, { - "epoch": 0.6984692807716503, - "grad_norm": 5.902371859499319, - "learning_rate": 1.7919789020596993e-05, - "loss": 1.891, + "epoch": 0.47034735950296525, + "grad_norm": 4.096582047031259, + "learning_rate": 1.916907363230214e-05, + "loss": 0.9532, "step": 3331 }, { - "epoch": 0.6986789683371776, - "grad_norm": 6.127509526290477, - "learning_rate": 1.7918406606409905e-05, - "loss": 1.7999, + "epoch": 0.47048856255295113, + "grad_norm": 4.223248714636114, + "learning_rate": 1.9168465116439196e-05, + "loss": 1.3101, "step": 3332 }, { - "epoch": 0.698888655902705, - "grad_norm": 6.2433513930696405, - "learning_rate": 1.7917023786390742e-05, - "loss": 2.0957, + "epoch": 0.470629765602937, + "grad_norm": 3.678626427008912, + "learning_rate": 1.9167856387505077e-05, + "loss": 1.1846, "step": 3333 }, { - "epoch": 0.6990983434682323, - "grad_norm": 5.741783854511772, - "learning_rate": 1.791564056061038e-05, - "loss": 2.03, + "epoch": 0.4707709686529229, + "grad_norm": 3.431736025451667, + "learning_rate": 1.916724744551394e-05, + "loss": 0.8871, "step": 3334 }, { - "epoch": 0.6993080310337597, - "grad_norm": 7.270415587957947, - "learning_rate": 1.7914256929139712e-05, - "loss": 2.281, + "epoch": 0.4709121717029088, + "grad_norm": 4.16266559232517, + "learning_rate": 1.916663829047993e-05, + "loss": 1.145, "step": 3335 }, { - "epoch": 0.699517718599287, - "grad_norm": 5.771215892814484, - "learning_rate": 1.7912872892049648e-05, - "loss": 2.257, + "epoch": 0.47105337475289466, + "grad_norm": 3.7829351922468515, + "learning_rate": 1.9166028922417208e-05, + "loss": 1.0593, "step": 3336 }, { - "epoch": 0.6997274061648144, - "grad_norm": 6.500445579308896, - "learning_rate": 1.791148844941112e-05, - "loss": 1.8889, + "epoch": 0.47119457780288054, + "grad_norm": 4.4505326592376795, + "learning_rate": 1.916541934133993e-05, + "loss": 1.2792, "step": 3337 }, { - "epoch": 0.6999370937303417, - "grad_norm": 5.245848412344272, - "learning_rate": 1.791010360129509e-05, - "loss": 1.5329, + "epoch": 0.4713357808528664, + "grad_norm": 4.345264219268654, + "learning_rate": 1.9164809547262262e-05, + "loss": 1.1158, "step": 3338 }, { - "epoch": 0.7001467812958692, - "grad_norm": 5.709369565352258, - "learning_rate": 1.7908718347772534e-05, - "loss": 2.0267, + "epoch": 0.4714769839028523, + "grad_norm": 4.263010309211308, + "learning_rate": 1.9164199540198382e-05, + "loss": 1.1482, "step": 3339 }, { - "epoch": 0.7003564688613965, - "grad_norm": 5.615565906771907, - "learning_rate": 1.7907332688914442e-05, - "loss": 1.8282, + "epoch": 0.4716181869528382, + "grad_norm": 3.9620576835071293, + "learning_rate": 1.916358932016246e-05, + "loss": 1.1323, "step": 3340 }, { - "epoch": 0.7005661564269239, - "grad_norm": 6.3906253558790675, - "learning_rate": 1.7905946624791837e-05, - "loss": 1.9056, + "epoch": 0.4717593900028241, + "grad_norm": 3.8253566121026914, + "learning_rate": 1.916297888716868e-05, + "loss": 0.9799, "step": 3341 }, { - "epoch": 0.7007758439924513, - "grad_norm": 5.33055691632164, - "learning_rate": 1.7904560155475756e-05, - "loss": 1.7913, + "epoch": 0.47190059305280996, + "grad_norm": 3.868849443730833, + "learning_rate": 1.916236824123123e-05, + "loss": 1.1715, "step": 3342 }, { - "epoch": 0.7009855315579786, - "grad_norm": 6.921507024779086, - "learning_rate": 1.7903173281037256e-05, - "loss": 2.0845, + "epoch": 0.47204179610279584, + "grad_norm": 3.4331809920474283, + "learning_rate": 1.9161757382364295e-05, + "loss": 1.0173, "step": 3343 }, { - "epoch": 0.701195219123506, - "grad_norm": 6.781939569117684, - "learning_rate": 1.7901786001547418e-05, - "loss": 2.1192, + "epoch": 0.4721829991527817, + "grad_norm": 3.3121973968804315, + "learning_rate": 1.916114631058208e-05, + "loss": 0.8776, "step": 3344 }, { - "epoch": 0.7014049066890333, - "grad_norm": 5.337855876491265, - "learning_rate": 1.7900398317077345e-05, - "loss": 1.3577, + "epoch": 0.4723242022027676, + "grad_norm": 3.6196048207023357, + "learning_rate": 1.9160535025898777e-05, + "loss": 1.0114, "step": 3345 }, { - "epoch": 0.7016145942545607, - "grad_norm": 5.3120907022779305, - "learning_rate": 1.789901022769815e-05, - "loss": 1.6773, + "epoch": 0.47246540525275343, + "grad_norm": 3.841139430324358, + "learning_rate": 1.91599235283286e-05, + "loss": 0.976, "step": 3346 }, { - "epoch": 0.701824281820088, - "grad_norm": 6.891698336317076, - "learning_rate": 1.7897621733480988e-05, - "loss": 2.3032, + "epoch": 0.4726066083027393, + "grad_norm": 3.2535983588129977, + "learning_rate": 1.9159311817885756e-05, + "loss": 0.924, "step": 3347 }, { - "epoch": 0.7020339693856155, - "grad_norm": 6.200992405614746, - "learning_rate": 1.789623283449701e-05, - "loss": 2.0126, + "epoch": 0.4727478113527252, + "grad_norm": 4.207795702308853, + "learning_rate": 1.9158699894584456e-05, + "loss": 1.0479, "step": 3348 }, { - "epoch": 0.7022436569511428, - "grad_norm": 6.494642524254688, - "learning_rate": 1.7894843530817402e-05, - "loss": 2.3382, + "epoch": 0.4728890144027111, + "grad_norm": 3.7235157780424117, + "learning_rate": 1.915808775843893e-05, + "loss": 0.9604, "step": 3349 }, { - "epoch": 0.7024533445166702, - "grad_norm": 6.633325064273848, - "learning_rate": 1.7893453822513375e-05, - "loss": 1.7037, + "epoch": 0.47303021745269697, + "grad_norm": 3.3869835210676302, + "learning_rate": 1.9157475409463396e-05, + "loss": 1.0391, "step": 3350 }, { - "epoch": 0.7026630320821975, - "grad_norm": 6.718264609507503, - "learning_rate": 1.7892063709656146e-05, - "loss": 2.0064, + "epoch": 0.47317142050268285, + "grad_norm": 3.771252396405334, + "learning_rate": 1.915686284767209e-05, + "loss": 1.1861, "step": 3351 }, { - "epoch": 0.7028727196477249, - "grad_norm": 6.074534570205052, - "learning_rate": 1.7890673192316965e-05, - "loss": 2.0068, + "epoch": 0.47331262355266873, + "grad_norm": 4.913802258641976, + "learning_rate": 1.915625007307925e-05, + "loss": 0.9709, "step": 3352 }, { - "epoch": 0.7030824072132522, - "grad_norm": 6.501024863482129, - "learning_rate": 1.7889282270567096e-05, - "loss": 1.8064, + "epoch": 0.4734538266026546, + "grad_norm": 4.169855488151496, + "learning_rate": 1.915563708569911e-05, + "loss": 1.2919, "step": 3353 }, { - "epoch": 0.7032920947787796, - "grad_norm": 6.045726403512422, - "learning_rate": 1.788789094447783e-05, - "loss": 1.7423, + "epoch": 0.4735950296526405, + "grad_norm": 4.841798757193291, + "learning_rate": 1.9155023885545914e-05, + "loss": 1.3269, "step": 3354 }, { - "epoch": 0.7035017823443069, - "grad_norm": 6.051485253260123, - "learning_rate": 1.788649921412047e-05, - "loss": 2.0434, + "epoch": 0.4737362327026264, + "grad_norm": 4.344741116668644, + "learning_rate": 1.915441047263392e-05, + "loss": 1.195, "step": 3355 }, { - "epoch": 0.7037114699098344, - "grad_norm": 5.729579933642841, - "learning_rate": 1.788510707956635e-05, - "loss": 1.7226, + "epoch": 0.47387743575261226, + "grad_norm": 5.05795557088238, + "learning_rate": 1.915379684697738e-05, + "loss": 0.9277, "step": 3356 }, { - "epoch": 0.7039211574753617, - "grad_norm": 6.192300906660107, - "learning_rate": 1.7883714540886808e-05, - "loss": 1.6466, + "epoch": 0.47401863880259815, + "grad_norm": 4.0369855445860425, + "learning_rate": 1.9153183008590556e-05, + "loss": 1.1293, "step": 3357 }, { - "epoch": 0.7041308450408891, - "grad_norm": 7.558846996806186, - "learning_rate": 1.7882321598153228e-05, - "loss": 2.1446, + "epoch": 0.47415984185258403, + "grad_norm": 4.10432725311996, + "learning_rate": 1.915256895748771e-05, + "loss": 1.0313, "step": 3358 }, { - "epoch": 0.7043405326064165, - "grad_norm": 7.608434490734879, - "learning_rate": 1.7880928251436994e-05, - "loss": 2.1729, + "epoch": 0.4743010449025699, + "grad_norm": 4.338154144495249, + "learning_rate": 1.9151954693683114e-05, + "loss": 1.3287, "step": 3359 }, { - "epoch": 0.7045502201719438, - "grad_norm": 6.796783426780534, - "learning_rate": 1.787953450080952e-05, - "loss": 1.9216, + "epoch": 0.4744422479525558, + "grad_norm": 4.6749347006831155, + "learning_rate": 1.9151340217191042e-05, + "loss": 1.166, "step": 3360 }, { - "epoch": 0.7047599077374712, - "grad_norm": 5.877450019657079, - "learning_rate": 1.7878140346342238e-05, - "loss": 1.8575, + "epoch": 0.4745834510025417, + "grad_norm": 3.657026285979219, + "learning_rate": 1.915072552802578e-05, + "loss": 1.0037, "step": 3361 }, { - "epoch": 0.7049695953029985, - "grad_norm": 6.210527339023781, - "learning_rate": 1.7876745788106594e-05, - "loss": 1.912, + "epoch": 0.47472465405252756, + "grad_norm": 3.7791820910598624, + "learning_rate": 1.9150110626201604e-05, + "loss": 0.9711, "step": 3362 }, { - "epoch": 0.7051792828685259, - "grad_norm": 6.347573630162063, - "learning_rate": 1.7875350826174068e-05, - "loss": 1.8913, + "epoch": 0.4748658571025134, + "grad_norm": 3.5461768543719345, + "learning_rate": 1.914949551173281e-05, + "loss": 1.0482, "step": 3363 }, { - "epoch": 0.7053889704340532, - "grad_norm": 4.937858577764903, - "learning_rate": 1.7873955460616156e-05, - "loss": 1.4774, + "epoch": 0.47500706015249927, + "grad_norm": 4.8062586768195645, + "learning_rate": 1.9148880184633695e-05, + "loss": 1.3734, "step": 3364 }, { - "epoch": 0.7055986579995807, - "grad_norm": 6.480753068912573, - "learning_rate": 1.787255969150437e-05, - "loss": 2.1709, + "epoch": 0.47514826320248515, + "grad_norm": 4.374910670456668, + "learning_rate": 1.9148264644918552e-05, + "loss": 1.1175, "step": 3365 }, { - "epoch": 0.705808345565108, - "grad_norm": 6.426998139006233, - "learning_rate": 1.787116351891024e-05, - "loss": 2.1211, + "epoch": 0.47528946625247104, + "grad_norm": 4.026065751800876, + "learning_rate": 1.914764889260169e-05, + "loss": 1.1894, "step": 3366 }, { - "epoch": 0.7060180331306354, - "grad_norm": 5.990185495027281, - "learning_rate": 1.7869766942905334e-05, - "loss": 1.705, + "epoch": 0.4754306693024569, + "grad_norm": 3.7920521662779607, + "learning_rate": 1.914703292769742e-05, + "loss": 1.0866, "step": 3367 }, { - "epoch": 0.7062277206961627, - "grad_norm": 6.031116383127443, - "learning_rate": 1.786836996356122e-05, - "loss": 1.9427, + "epoch": 0.4755718723524428, + "grad_norm": 5.109543316912532, + "learning_rate": 1.914641675022005e-05, + "loss": 1.3375, "step": 3368 }, { - "epoch": 0.7064374082616901, - "grad_norm": 5.174960391881702, - "learning_rate": 1.78669725809495e-05, - "loss": 1.8994, + "epoch": 0.4757130754024287, + "grad_norm": 4.096015476329699, + "learning_rate": 1.914580036018391e-05, + "loss": 0.8131, "step": 3369 }, { - "epoch": 0.7066470958272174, - "grad_norm": 5.470374618937298, - "learning_rate": 1.786557479514179e-05, - "loss": 1.9317, + "epoch": 0.47585427845241457, + "grad_norm": 3.739770391142123, + "learning_rate": 1.914518375760332e-05, + "loss": 1.1147, "step": 3370 }, { - "epoch": 0.7068567833927448, - "grad_norm": 6.4224013499481245, - "learning_rate": 1.786417660620973e-05, - "loss": 1.8964, + "epoch": 0.47599548150240045, + "grad_norm": 3.429525721763435, + "learning_rate": 1.9144566942492605e-05, + "loss": 0.97, "step": 3371 }, { - "epoch": 0.7070664709582721, - "grad_norm": 5.973755420207963, - "learning_rate": 1.7862778014224977e-05, - "loss": 2.202, + "epoch": 0.47613668455238634, + "grad_norm": 4.086845182527498, + "learning_rate": 1.9143949914866106e-05, + "loss": 0.8889, "step": 3372 }, { - "epoch": 0.7072761585237995, - "grad_norm": 6.481451465039927, - "learning_rate": 1.7861379019259216e-05, - "loss": 1.8383, + "epoch": 0.4762778876023722, + "grad_norm": 4.007898923481087, + "learning_rate": 1.914333267473816e-05, + "loss": 1.1152, "step": 3373 }, { - "epoch": 0.7074858460893269, - "grad_norm": 5.86573343697531, - "learning_rate": 1.7859979621384144e-05, - "loss": 2.0065, + "epoch": 0.4764190906523581, + "grad_norm": 3.5293601277830806, + "learning_rate": 1.914271522212311e-05, + "loss": 0.8811, "step": 3374 }, { - "epoch": 0.7076955336548543, - "grad_norm": 6.145310765648565, - "learning_rate": 1.785857982067148e-05, - "loss": 2.0286, + "epoch": 0.476560293702344, + "grad_norm": 3.6795014599239586, + "learning_rate": 1.914209755703531e-05, + "loss": 0.9058, "step": 3375 }, { - "epoch": 0.7079052212203817, - "grad_norm": 5.788095836267299, - "learning_rate": 1.7857179617192978e-05, - "loss": 2.0236, + "epoch": 0.47670149675232987, + "grad_norm": 3.1694543516812894, + "learning_rate": 1.9141479679489107e-05, + "loss": 0.9004, "step": 3376 }, { - "epoch": 0.708114908785909, - "grad_norm": 5.39349603457023, - "learning_rate": 1.785577901102039e-05, - "loss": 2.1388, + "epoch": 0.47684269980231575, + "grad_norm": 3.4168555152900706, + "learning_rate": 1.9140861589498866e-05, + "loss": 0.7709, "step": 3377 }, { - "epoch": 0.7083245963514364, - "grad_norm": 5.881033799477456, - "learning_rate": 1.78543780022255e-05, - "loss": 2.013, + "epoch": 0.47698390285230163, + "grad_norm": 3.0760120411511913, + "learning_rate": 1.914024328707895e-05, + "loss": 0.7947, "step": 3378 }, { - "epoch": 0.7085342839169637, - "grad_norm": 5.5549538177314455, - "learning_rate": 1.785297659088012e-05, - "loss": 1.8628, + "epoch": 0.4771251059022875, + "grad_norm": 4.9695802275087715, + "learning_rate": 1.9139624772243724e-05, + "loss": 1.1513, "step": 3379 }, { - "epoch": 0.7087439714824911, - "grad_norm": 5.727954416263593, - "learning_rate": 1.7851574777056065e-05, - "loss": 1.9335, + "epoch": 0.47726630895227334, + "grad_norm": 3.2886750952298045, + "learning_rate": 1.9139006045007567e-05, + "loss": 0.9815, "step": 3380 }, { - "epoch": 0.7089536590480184, - "grad_norm": 5.699244342926899, - "learning_rate": 1.7850172560825184e-05, - "loss": 2.0594, + "epoch": 0.4774075120022592, + "grad_norm": 3.6092828861646953, + "learning_rate": 1.913838710538486e-05, + "loss": 0.8794, "step": 3381 }, { - "epoch": 0.7091633466135459, - "grad_norm": 5.4522084708225105, - "learning_rate": 1.784876994225935e-05, - "loss": 1.7892, + "epoch": 0.4775487150522451, + "grad_norm": 3.9928262874300344, + "learning_rate": 1.913776795338998e-05, + "loss": 1.0127, "step": 3382 }, { - "epoch": 0.7093730341790732, - "grad_norm": 6.225143918071901, - "learning_rate": 1.7847366921430434e-05, - "loss": 1.6638, + "epoch": 0.477689918102231, + "grad_norm": 4.057518777495451, + "learning_rate": 1.9137148589037314e-05, + "loss": 0.9195, "step": 3383 }, { - "epoch": 0.7095827217446006, - "grad_norm": 5.682056341800336, - "learning_rate": 1.7845963498410358e-05, - "loss": 1.8477, + "epoch": 0.4778311211522169, + "grad_norm": 3.7191258122122197, + "learning_rate": 1.9136529012341268e-05, + "loss": 0.8754, "step": 3384 }, { - "epoch": 0.7097924093101279, - "grad_norm": 6.382984546014219, - "learning_rate": 1.7844559673271046e-05, - "loss": 1.8111, + "epoch": 0.47797232420220276, + "grad_norm": 4.162240763158855, + "learning_rate": 1.913590922331623e-05, + "loss": 1.3772, "step": 3385 }, { - "epoch": 0.7100020968756553, - "grad_norm": 6.405746068556613, - "learning_rate": 1.7843155446084442e-05, - "loss": 1.9071, + "epoch": 0.47811352725218864, + "grad_norm": 4.587661154824074, + "learning_rate": 1.9135289221976608e-05, + "loss": 1.2216, "step": 3386 }, { - "epoch": 0.7102117844411826, - "grad_norm": 6.666519173822794, - "learning_rate": 1.784175081692252e-05, - "loss": 2.1207, + "epoch": 0.4782547303021745, + "grad_norm": 3.6660419428148123, + "learning_rate": 1.9134669008336814e-05, + "loss": 0.9977, "step": 3387 }, { - "epoch": 0.71042147200671, - "grad_norm": 7.325758610660215, - "learning_rate": 1.7840345785857266e-05, - "loss": 2.1251, + "epoch": 0.4783959333521604, + "grad_norm": 3.920102964329545, + "learning_rate": 1.9134048582411253e-05, + "loss": 1.1895, "step": 3388 }, { - "epoch": 0.7106311595722373, - "grad_norm": 5.717875962677902, - "learning_rate": 1.7838940352960695e-05, - "loss": 2.2822, + "epoch": 0.4785371364021463, + "grad_norm": 3.5015718995174887, + "learning_rate": 1.9133427944214348e-05, + "loss": 0.9435, "step": 3389 }, { - "epoch": 0.7108408471377647, - "grad_norm": 6.736777192336412, - "learning_rate": 1.7837534518304834e-05, - "loss": 1.768, + "epoch": 0.4786783394521322, + "grad_norm": 3.4630652288161676, + "learning_rate": 1.9132807093760523e-05, + "loss": 0.8616, "step": 3390 }, { - "epoch": 0.711050534703292, - "grad_norm": 5.278442364109976, - "learning_rate": 1.7836128281961737e-05, - "loss": 1.8642, + "epoch": 0.47881954250211806, + "grad_norm": 3.9356124631918106, + "learning_rate": 1.9132186031064203e-05, + "loss": 1.092, "step": 3391 }, { - "epoch": 0.7112602222688195, - "grad_norm": 8.095060226811032, - "learning_rate": 1.783472164400347e-05, - "loss": 1.8471, + "epoch": 0.47896074555210394, + "grad_norm": 3.371554290552784, + "learning_rate": 1.9131564756139824e-05, + "loss": 1.0282, "step": 3392 }, { - "epoch": 0.7114699098343468, - "grad_norm": 7.429160335813844, - "learning_rate": 1.783331460450213e-05, - "loss": 1.7619, + "epoch": 0.4791019486020898, + "grad_norm": 3.3672220502779506, + "learning_rate": 1.9130943269001826e-05, + "loss": 1.0422, "step": 3393 }, { - "epoch": 0.7116795973998742, - "grad_norm": 6.196982050832858, - "learning_rate": 1.7831907163529835e-05, - "loss": 1.6366, + "epoch": 0.4792431516520757, + "grad_norm": 3.4616552002763203, + "learning_rate": 1.9130321569664646e-05, + "loss": 0.9711, "step": 3394 }, { - "epoch": 0.7118892849654016, - "grad_norm": 6.708373573212274, - "learning_rate": 1.7830499321158714e-05, - "loss": 1.9427, + "epoch": 0.4793843547020616, + "grad_norm": 3.2863179515317205, + "learning_rate": 1.9129699658142738e-05, + "loss": 0.9863, "step": 3395 }, { - "epoch": 0.7120989725309289, - "grad_norm": 5.854080512468067, - "learning_rate": 1.782909107746092e-05, - "loss": 2.0476, + "epoch": 0.47952555775204747, + "grad_norm": 3.292545804068655, + "learning_rate": 1.9129077534450556e-05, + "loss": 0.9525, "step": 3396 }, { - "epoch": 0.7123086600964563, - "grad_norm": 5.8416101560721065, - "learning_rate": 1.7827682432508626e-05, - "loss": 2.0414, + "epoch": 0.4796667608020333, + "grad_norm": 4.005660207315239, + "learning_rate": 1.912845519860255e-05, + "loss": 1.2757, "step": 3397 }, { - "epoch": 0.7125183476619836, - "grad_norm": 6.213846082530136, - "learning_rate": 1.7826273386374033e-05, - "loss": 1.8577, + "epoch": 0.4798079638520192, + "grad_norm": 3.480320998315098, + "learning_rate": 1.912783265061319e-05, + "loss": 1.0295, "step": 3398 }, { - "epoch": 0.712728035227511, - "grad_norm": 6.5905807246393655, - "learning_rate": 1.7824863939129357e-05, - "loss": 1.8998, + "epoch": 0.47994916690200506, + "grad_norm": 3.851292362227884, + "learning_rate": 1.9127209890496942e-05, + "loss": 0.9082, "step": 3399 }, { - "epoch": 0.7129377227930384, - "grad_norm": 5.497870526272098, - "learning_rate": 1.782345409084683e-05, - "loss": 1.9242, + "epoch": 0.48009036995199095, + "grad_norm": 3.722702756613766, + "learning_rate": 1.9126586918268275e-05, + "loss": 0.9807, "step": 3400 }, { - "epoch": 0.7131474103585658, - "grad_norm": 6.723813363271084, - "learning_rate": 1.7822043841598713e-05, - "loss": 2.1381, + "epoch": 0.48023157300197683, + "grad_norm": 3.0578769712701255, + "learning_rate": 1.912596373394167e-05, + "loss": 0.9171, "step": 3401 }, { - "epoch": 0.7133570979240931, - "grad_norm": 6.9534229885764836, - "learning_rate": 1.7820633191457283e-05, - "loss": 2.1141, + "epoch": 0.4803727760519627, + "grad_norm": 3.847021197440844, + "learning_rate": 1.9125340337531612e-05, + "loss": 0.902, "step": 3402 }, { - "epoch": 0.7135667854896205, - "grad_norm": 5.605395298320408, - "learning_rate": 1.7819222140494838e-05, - "loss": 1.9769, + "epoch": 0.4805139791019486, + "grad_norm": 4.155291127453842, + "learning_rate": 1.912471672905258e-05, + "loss": 1.2731, "step": 3403 }, { - "epoch": 0.7137764730551478, - "grad_norm": 6.677481636063306, - "learning_rate": 1.7817810688783694e-05, - "loss": 2.0158, + "epoch": 0.4806551821519345, + "grad_norm": 4.1879517239443835, + "learning_rate": 1.912409290851908e-05, + "loss": 1.0017, "step": 3404 }, { - "epoch": 0.7139861606206752, - "grad_norm": 6.128042331587318, - "learning_rate": 1.7816398836396194e-05, - "loss": 1.9847, + "epoch": 0.48079638520192036, + "grad_norm": 3.575517224720626, + "learning_rate": 1.9123468875945594e-05, + "loss": 0.8847, "step": 3405 }, { - "epoch": 0.7141958481862025, - "grad_norm": 6.991839667544735, - "learning_rate": 1.78149865834047e-05, - "loss": 1.6439, + "epoch": 0.48093758825190625, + "grad_norm": 3.6504548416653932, + "learning_rate": 1.9122844631346632e-05, + "loss": 1.0449, "step": 3406 }, { - "epoch": 0.7144055357517299, - "grad_norm": 5.997705132795919, - "learning_rate": 1.7813573929881583e-05, - "loss": 1.5963, + "epoch": 0.48107879130189213, + "grad_norm": 4.349637777887388, + "learning_rate": 1.9122220174736706e-05, + "loss": 1.2529, "step": 3407 }, { - "epoch": 0.7146152233172572, - "grad_norm": 6.755305063871415, - "learning_rate": 1.7812160875899257e-05, - "loss": 1.9644, + "epoch": 0.481219994351878, + "grad_norm": 3.7312052831405973, + "learning_rate": 1.9121595506130317e-05, + "loss": 0.9394, "step": 3408 }, { - "epoch": 0.7148249108827847, - "grad_norm": 6.491557307878263, - "learning_rate": 1.781074742153013e-05, - "loss": 1.7461, + "epoch": 0.4813611974018639, + "grad_norm": 3.5449884862195407, + "learning_rate": 1.912097062554199e-05, + "loss": 1.0042, "step": 3409 }, { - "epoch": 0.715034598448312, - "grad_norm": 5.939749072308881, - "learning_rate": 1.7809333566846655e-05, - "loss": 1.9942, + "epoch": 0.4815024004518498, + "grad_norm": 3.8421657485544785, + "learning_rate": 1.9120345532986243e-05, + "loss": 1.1366, "step": 3410 }, { - "epoch": 0.7152442860138394, - "grad_norm": 6.709428110954093, - "learning_rate": 1.780791931192129e-05, - "loss": 1.9898, + "epoch": 0.48164360350183566, + "grad_norm": 3.902178560780491, + "learning_rate": 1.9119720228477607e-05, + "loss": 1.0216, "step": 3411 }, { - "epoch": 0.7154539735793668, - "grad_norm": 6.380867611272872, - "learning_rate": 1.780650465682652e-05, - "loss": 1.7385, + "epoch": 0.48178480655182154, + "grad_norm": 3.680504901300528, + "learning_rate": 1.911909471203061e-05, + "loss": 1.0282, "step": 3412 }, { - "epoch": 0.7156636611448941, - "grad_norm": 8.219747417994995, - "learning_rate": 1.780508960163484e-05, - "loss": 1.8948, + "epoch": 0.4819260096018074, + "grad_norm": 4.018592863261293, + "learning_rate": 1.911846898365979e-05, + "loss": 1.202, "step": 3413 }, { - "epoch": 0.7158733487104215, - "grad_norm": 7.883292899507356, - "learning_rate": 1.780367414641879e-05, - "loss": 2.2643, + "epoch": 0.48206721265179325, + "grad_norm": 3.034031123555134, + "learning_rate": 1.911784304337969e-05, + "loss": 0.7451, "step": 3414 }, { - "epoch": 0.7160830362759488, - "grad_norm": 6.66438142305715, - "learning_rate": 1.78022582912509e-05, - "loss": 1.5956, + "epoch": 0.48220841570177914, + "grad_norm": 3.778542591206886, + "learning_rate": 1.9117216891204856e-05, + "loss": 1.0307, "step": 3415 }, { - "epoch": 0.7162927238414762, - "grad_norm": 7.205451132554181, - "learning_rate": 1.7800842036203735e-05, - "loss": 2.2804, + "epoch": 0.482349618751765, + "grad_norm": 3.8424665883673006, + "learning_rate": 1.911659052714984e-05, + "loss": 1.1489, "step": 3416 }, { - "epoch": 0.7165024114070035, - "grad_norm": 6.1311294261339455, - "learning_rate": 1.7799425381349892e-05, - "loss": 1.8552, + "epoch": 0.4824908218017509, + "grad_norm": 3.831411086354194, + "learning_rate": 1.9115963951229194e-05, + "loss": 0.9748, "step": 3417 }, { - "epoch": 0.716712098972531, - "grad_norm": 6.580376636123548, - "learning_rate": 1.779800832676197e-05, - "loss": 1.929, + "epoch": 0.4826320248517368, + "grad_norm": 4.234331546700752, + "learning_rate": 1.911533716345748e-05, + "loss": 1.2547, "step": 3418 }, { - "epoch": 0.7169217865380583, - "grad_norm": 5.8636877000384375, - "learning_rate": 1.7796590872512597e-05, - "loss": 1.982, + "epoch": 0.48277322790172267, + "grad_norm": 3.418024359677057, + "learning_rate": 1.9114710163849273e-05, + "loss": 0.9555, "step": 3419 }, { - "epoch": 0.7171314741035857, - "grad_norm": 6.151850748745358, - "learning_rate": 1.7795173018674415e-05, - "loss": 1.9775, + "epoch": 0.48291443095170855, + "grad_norm": 3.529881097803363, + "learning_rate": 1.9114082952419134e-05, + "loss": 1.1858, "step": 3420 }, { - "epoch": 0.717341161669113, - "grad_norm": 6.301660568891588, - "learning_rate": 1.77937547653201e-05, - "loss": 2.0825, + "epoch": 0.48305563400169443, + "grad_norm": 3.371579821420477, + "learning_rate": 1.9113455529181645e-05, + "loss": 1.0823, "step": 3421 }, { - "epoch": 0.7175508492346404, - "grad_norm": 6.690238399078991, - "learning_rate": 1.7792336112522334e-05, - "loss": 2.2239, + "epoch": 0.4831968370516803, + "grad_norm": 3.923038124732678, + "learning_rate": 1.9112827894151386e-05, + "loss": 0.9391, "step": 3422 }, { - "epoch": 0.7177605368001677, - "grad_norm": 6.869769473789559, - "learning_rate": 1.7790917060353828e-05, - "loss": 1.9454, + "epoch": 0.4833380401016662, + "grad_norm": 3.401025704207994, + "learning_rate": 1.911220004734294e-05, + "loss": 1.005, "step": 3423 }, { - "epoch": 0.7179702243656951, - "grad_norm": 6.579389203467918, - "learning_rate": 1.7789497608887308e-05, - "loss": 1.7717, + "epoch": 0.4834792431516521, + "grad_norm": 4.420814529132933, + "learning_rate": 1.9111571988770903e-05, + "loss": 1.1184, "step": 3424 }, { - "epoch": 0.7181799119312224, - "grad_norm": 6.36883170452858, - "learning_rate": 1.778807775819553e-05, - "loss": 2.1342, + "epoch": 0.48362044620163797, + "grad_norm": 3.885034489127312, + "learning_rate": 1.9110943718449867e-05, + "loss": 1.1469, "step": 3425 }, { - "epoch": 0.7183895994967499, - "grad_norm": 5.822765740596492, - "learning_rate": 1.7786657508351254e-05, - "loss": 1.6524, + "epoch": 0.48376164925162385, + "grad_norm": 4.410805753695287, + "learning_rate": 1.9110315236394434e-05, + "loss": 1.4134, "step": 3426 }, { - "epoch": 0.7185992870622772, - "grad_norm": 6.091814132077698, - "learning_rate": 1.778523685942728e-05, - "loss": 2.0153, + "epoch": 0.48390285230160973, + "grad_norm": 4.326184314873575, + "learning_rate": 1.910968654261921e-05, + "loss": 1.0962, "step": 3427 }, { - "epoch": 0.7188089746278046, - "grad_norm": 5.9271552982944895, - "learning_rate": 1.778381581149641e-05, - "loss": 1.8, + "epoch": 0.4840440553515956, + "grad_norm": 3.2805236328412475, + "learning_rate": 1.9109057637138805e-05, + "loss": 0.8128, "step": 3428 }, { - "epoch": 0.7190186621933319, - "grad_norm": 6.657554735609322, - "learning_rate": 1.778239436463148e-05, - "loss": 2.0461, + "epoch": 0.4841852584015815, + "grad_norm": 3.785453894308553, + "learning_rate": 1.9108428519967832e-05, + "loss": 0.9858, "step": 3429 }, { - "epoch": 0.7192283497588593, - "grad_norm": 5.640561712555884, - "learning_rate": 1.778097251890534e-05, - "loss": 1.6521, + "epoch": 0.4843264614515674, + "grad_norm": 4.010849863576899, + "learning_rate": 1.9107799191120913e-05, + "loss": 1.0224, "step": 3430 }, { - "epoch": 0.7194380373243867, - "grad_norm": 6.788700653261892, - "learning_rate": 1.7779550274390867e-05, - "loss": 2.0177, + "epoch": 0.4844676645015532, + "grad_norm": 3.848826125711394, + "learning_rate": 1.910716965061268e-05, + "loss": 0.9441, "step": 3431 }, { - "epoch": 0.719647724889914, - "grad_norm": 6.7073661504524225, - "learning_rate": 1.7778127631160948e-05, - "loss": 2.2163, + "epoch": 0.4846088675515391, + "grad_norm": 3.7065182647356822, + "learning_rate": 1.910653989845775e-05, + "loss": 0.9439, "step": 3432 }, { - "epoch": 0.7198574124554414, - "grad_norm": 6.190375259371967, - "learning_rate": 1.77767045892885e-05, - "loss": 2.1506, + "epoch": 0.484750070601525, + "grad_norm": 4.122174485099892, + "learning_rate": 1.9105909934670768e-05, + "loss": 1.186, "step": 3433 }, { - "epoch": 0.7200671000209687, - "grad_norm": 6.040513721093316, - "learning_rate": 1.7775281148846447e-05, - "loss": 1.6988, + "epoch": 0.48489127365151086, + "grad_norm": 3.578719099847629, + "learning_rate": 1.9105279759266376e-05, + "loss": 1.2185, "step": 3434 }, { - "epoch": 0.7202767875864962, - "grad_norm": 6.030079023648161, - "learning_rate": 1.7773857309907754e-05, - "loss": 1.831, + "epoch": 0.48503247670149674, + "grad_norm": 3.6809531029288336, + "learning_rate": 1.910464937225921e-05, + "loss": 0.9385, "step": 3435 }, { - "epoch": 0.7204864751520235, - "grad_norm": 6.79727168323244, - "learning_rate": 1.777243307254539e-05, - "loss": 1.6697, + "epoch": 0.4851736797514826, + "grad_norm": 3.929430668705745, + "learning_rate": 1.9104018773663924e-05, + "loss": 1.1098, "step": 3436 }, { - "epoch": 0.7206961627175509, - "grad_norm": 4.873972454541965, - "learning_rate": 1.7771008436832348e-05, - "loss": 1.3224, + "epoch": 0.4853148828014685, + "grad_norm": 3.9409411212468095, + "learning_rate": 1.910338796349518e-05, + "loss": 1.1119, "step": 3437 }, { - "epoch": 0.7209058502830782, - "grad_norm": 6.427984954055331, - "learning_rate": 1.776958340284165e-05, - "loss": 1.7845, + "epoch": 0.4854560858514544, + "grad_norm": 3.8260548324995924, + "learning_rate": 1.9102756941767625e-05, + "loss": 0.9604, "step": 3438 }, { - "epoch": 0.7211155378486056, - "grad_norm": 7.004834857927032, - "learning_rate": 1.7768157970646326e-05, - "loss": 1.9751, + "epoch": 0.48559728890144027, + "grad_norm": 3.9404208700750605, + "learning_rate": 1.910212570849593e-05, + "loss": 0.9844, "step": 3439 }, { - "epoch": 0.7213252254141329, - "grad_norm": 5.917936110327826, - "learning_rate": 1.7766732140319432e-05, - "loss": 1.8962, + "epoch": 0.48573849195142615, + "grad_norm": 4.126591035097861, + "learning_rate": 1.9101494263694764e-05, + "loss": 1.1435, "step": 3440 }, { - "epoch": 0.7215349129796603, - "grad_norm": 7.552700500372858, - "learning_rate": 1.7765305911934043e-05, - "loss": 1.8722, + "epoch": 0.48587969500141204, + "grad_norm": 3.645127778470133, + "learning_rate": 1.9100862607378807e-05, + "loss": 1.2501, "step": 3441 }, { - "epoch": 0.7217446005451876, - "grad_norm": 8.553586489881292, - "learning_rate": 1.776387928556326e-05, - "loss": 2.1712, + "epoch": 0.4860208980513979, + "grad_norm": 3.822226688723868, + "learning_rate": 1.9100230739562728e-05, + "loss": 1.0962, "step": 3442 }, { - "epoch": 0.721954288110715, - "grad_norm": 6.550936595278142, - "learning_rate": 1.7762452261280198e-05, - "loss": 1.793, + "epoch": 0.4861621011013838, + "grad_norm": 5.007249045939249, + "learning_rate": 1.9099598660261217e-05, + "loss": 1.1519, "step": 3443 }, { - "epoch": 0.7221639756762424, - "grad_norm": 5.722258004857852, - "learning_rate": 1.7761024839157994e-05, - "loss": 1.8859, + "epoch": 0.4863033041513697, + "grad_norm": 3.964620345455004, + "learning_rate": 1.9098966369488967e-05, + "loss": 1.183, "step": 3444 }, { - "epoch": 0.7223736632417698, - "grad_norm": 6.050880026365092, - "learning_rate": 1.7759597019269807e-05, - "loss": 1.8545, + "epoch": 0.48644450720135557, + "grad_norm": 3.812646627025024, + "learning_rate": 1.9098333867260667e-05, + "loss": 1.084, "step": 3445 }, { - "epoch": 0.7225833508072971, - "grad_norm": 5.970337553784131, - "learning_rate": 1.7758168801688812e-05, - "loss": 1.627, + "epoch": 0.48658571025134145, + "grad_norm": 3.697637291498036, + "learning_rate": 1.9097701153591015e-05, + "loss": 0.7982, "step": 3446 }, { - "epoch": 0.7227930383728245, - "grad_norm": 5.497133917244876, - "learning_rate": 1.775674018648821e-05, - "loss": 1.6159, + "epoch": 0.48672691330132734, + "grad_norm": 3.9138375830367225, + "learning_rate": 1.9097068228494716e-05, + "loss": 1.135, "step": 3447 }, { - "epoch": 0.7230027259383519, - "grad_norm": 5.879714716802198, - "learning_rate": 1.7755311173741222e-05, - "loss": 1.4422, + "epoch": 0.48686811635131316, + "grad_norm": 4.349520294198776, + "learning_rate": 1.9096435091986485e-05, + "loss": 1.0692, "step": 3448 }, { - "epoch": 0.7232124135038792, - "grad_norm": 6.272006030678814, - "learning_rate": 1.7753881763521083e-05, - "loss": 2.1879, + "epoch": 0.48700931940129905, + "grad_norm": 3.955283551908681, + "learning_rate": 1.909580174408103e-05, + "loss": 0.9699, "step": 3449 }, { - "epoch": 0.7234221010694066, - "grad_norm": 5.315662421164023, - "learning_rate": 1.775245195590106e-05, - "loss": 1.6826, + "epoch": 0.48715052245128493, + "grad_norm": 3.5521365260904094, + "learning_rate": 1.9095168184793067e-05, + "loss": 0.9232, "step": 3450 }, { - "epoch": 0.7236317886349339, - "grad_norm": 6.182095613654355, - "learning_rate": 1.775102175095442e-05, - "loss": 1.9166, + "epoch": 0.4872917255012708, + "grad_norm": 3.931929272189284, + "learning_rate": 1.9094534414137323e-05, + "loss": 0.964, "step": 3451 }, { - "epoch": 0.7238414762004614, - "grad_norm": 6.204622240673169, - "learning_rate": 1.774959114875448e-05, - "loss": 1.9598, + "epoch": 0.4874329285512567, + "grad_norm": 3.6327513392963153, + "learning_rate": 1.9093900432128532e-05, + "loss": 1.0156, "step": 3452 }, { - "epoch": 0.7240511637659887, - "grad_norm": 7.08086173344824, - "learning_rate": 1.7748160149374553e-05, - "loss": 2.2209, + "epoch": 0.4875741316012426, + "grad_norm": 4.503291832442723, + "learning_rate": 1.909326623878142e-05, + "loss": 1.0626, "step": 3453 }, { - "epoch": 0.7242608513315161, - "grad_norm": 5.474799221709456, - "learning_rate": 1.7746728752887973e-05, - "loss": 1.7998, + "epoch": 0.48771533465122846, + "grad_norm": 4.143719888525588, + "learning_rate": 1.9092631834110723e-05, + "loss": 0.9787, "step": 3454 }, { - "epoch": 0.7244705388970434, - "grad_norm": 6.905784551197978, - "learning_rate": 1.7745296959368114e-05, - "loss": 1.6539, + "epoch": 0.48785653770121434, + "grad_norm": 3.9092818547648758, + "learning_rate": 1.9091997218131195e-05, + "loss": 0.9621, "step": 3455 }, { - "epoch": 0.7246802264625708, - "grad_norm": 5.870819503536297, - "learning_rate": 1.774386476888835e-05, - "loss": 2.0138, + "epoch": 0.4879977407512002, + "grad_norm": 3.530222528244613, + "learning_rate": 1.9091362390857578e-05, + "loss": 1.168, "step": 3456 }, { - "epoch": 0.7248899140280981, - "grad_norm": 7.692670084785689, - "learning_rate": 1.7742432181522087e-05, - "loss": 1.9974, + "epoch": 0.4881389438011861, + "grad_norm": 3.675581947173463, + "learning_rate": 1.9090727352304622e-05, + "loss": 1.0877, "step": 3457 }, { - "epoch": 0.7250996015936255, - "grad_norm": 5.801440544263104, - "learning_rate": 1.7740999197342745e-05, - "loss": 1.5593, + "epoch": 0.488280146851172, + "grad_norm": 4.970543667609957, + "learning_rate": 1.909009210248709e-05, + "loss": 1.437, "step": 3458 }, { - "epoch": 0.7253092891591528, - "grad_norm": 6.798539837196515, - "learning_rate": 1.7739565816423768e-05, - "loss": 2.2256, + "epoch": 0.4884213499011579, + "grad_norm": 4.062471857437775, + "learning_rate": 1.9089456641419745e-05, + "loss": 1.0596, "step": 3459 }, { - "epoch": 0.7255189767246802, - "grad_norm": 6.745810902187541, - "learning_rate": 1.773813203883862e-05, - "loss": 1.8935, + "epoch": 0.48856255295114376, + "grad_norm": 3.9983954462824354, + "learning_rate": 1.9088820969117348e-05, + "loss": 1.0598, "step": 3460 }, { - "epoch": 0.7257286642902075, - "grad_norm": 6.2690184499351895, - "learning_rate": 1.773669786466078e-05, - "loss": 1.8115, + "epoch": 0.48870375600112964, + "grad_norm": 3.1835296124825794, + "learning_rate": 1.9088185085594682e-05, + "loss": 0.9755, "step": 3461 }, { - "epoch": 0.725938351855735, - "grad_norm": 5.979825686970501, - "learning_rate": 1.7735263293963762e-05, - "loss": 1.8996, + "epoch": 0.4888449590511155, + "grad_norm": 3.4085720454931336, + "learning_rate": 1.9087548990866515e-05, + "loss": 1.0636, "step": 3462 }, { - "epoch": 0.7261480394212623, - "grad_norm": 6.069687177172668, - "learning_rate": 1.7733828326821082e-05, - "loss": 1.5786, + "epoch": 0.4889861621011014, + "grad_norm": 4.062680833969174, + "learning_rate": 1.9086912684947638e-05, + "loss": 1.14, "step": 3463 }, { - "epoch": 0.7263577269867897, - "grad_norm": 6.616850687043835, - "learning_rate": 1.7732392963306283e-05, - "loss": 1.9543, + "epoch": 0.4891273651510873, + "grad_norm": 3.9214652401637013, + "learning_rate": 1.9086276167852834e-05, + "loss": 0.9294, "step": 3464 }, { - "epoch": 0.726567414552317, - "grad_norm": 6.054120991260907, - "learning_rate": 1.773095720349294e-05, - "loss": 1.9633, + "epoch": 0.4892685682010731, + "grad_norm": 3.5280094152266086, + "learning_rate": 1.9085639439596895e-05, + "loss": 0.914, "step": 3465 }, { - "epoch": 0.7267771021178444, - "grad_norm": 6.646569966368481, - "learning_rate": 1.7729521047454626e-05, - "loss": 1.6973, + "epoch": 0.489409771251059, + "grad_norm": 3.1130402571183082, + "learning_rate": 1.908500250019462e-05, + "loss": 0.9503, "step": 3466 }, { - "epoch": 0.7269867896833718, - "grad_norm": 6.299582360081332, - "learning_rate": 1.7728084495264952e-05, - "loss": 1.9104, + "epoch": 0.4895509743010449, + "grad_norm": 4.117890568018956, + "learning_rate": 1.908436534966081e-05, + "loss": 1.1695, "step": 3467 }, { - "epoch": 0.7271964772488991, - "grad_norm": 6.17159329839842, - "learning_rate": 1.7726647546997546e-05, - "loss": 2.1055, + "epoch": 0.48969217735103077, + "grad_norm": 3.311755863819589, + "learning_rate": 1.908372798801027e-05, + "loss": 0.8892, "step": 3468 }, { - "epoch": 0.7274061648144265, - "grad_norm": 6.994537329881502, - "learning_rate": 1.7725210202726052e-05, - "loss": 1.6156, + "epoch": 0.48983338040101665, + "grad_norm": 4.144551027499583, + "learning_rate": 1.9083090415257817e-05, + "loss": 1.1622, "step": 3469 }, { - "epoch": 0.7276158523799539, - "grad_norm": 6.857116781932151, - "learning_rate": 1.7723772462524138e-05, - "loss": 1.7521, + "epoch": 0.48997458345100253, + "grad_norm": 3.7120132939994677, + "learning_rate": 1.9082452631418265e-05, + "loss": 0.9346, "step": 3470 }, { - "epoch": 0.7278255399454813, - "grad_norm": 6.637617934924966, - "learning_rate": 1.7722334326465486e-05, - "loss": 2.0023, + "epoch": 0.4901157865009884, + "grad_norm": 3.186344405483788, + "learning_rate": 1.9081814636506438e-05, + "loss": 0.811, "step": 3471 }, { - "epoch": 0.7280352275110086, - "grad_norm": 8.33253284881359, - "learning_rate": 1.7720895794623808e-05, - "loss": 1.8228, + "epoch": 0.4902569895509743, + "grad_norm": 3.5160990970341492, + "learning_rate": 1.9081176430537158e-05, + "loss": 0.9254, "step": 3472 }, { - "epoch": 0.728244915076536, - "grad_norm": 5.9730835066963035, - "learning_rate": 1.7719456867072827e-05, - "loss": 1.6865, + "epoch": 0.4903981926009602, + "grad_norm": 3.9141559781379365, + "learning_rate": 1.908053801352526e-05, + "loss": 0.993, "step": 3473 }, { - "epoch": 0.7284546026420633, - "grad_norm": 6.621109317941283, - "learning_rate": 1.7718017543886298e-05, - "loss": 1.3069, + "epoch": 0.49053939565094606, + "grad_norm": 4.008458564745851, + "learning_rate": 1.9079899385485583e-05, + "loss": 1.1116, "step": 3474 }, { - "epoch": 0.7286642902075907, - "grad_norm": 6.48723851726757, - "learning_rate": 1.771657782513798e-05, - "loss": 2.0635, + "epoch": 0.49068059870093195, + "grad_norm": 4.204329478909524, + "learning_rate": 1.907926054643296e-05, + "loss": 1.0946, "step": 3475 }, { - "epoch": 0.728873977773118, - "grad_norm": 6.755567726469187, - "learning_rate": 1.7715137710901665e-05, - "loss": 2.0183, + "epoch": 0.49082180175091783, + "grad_norm": 4.130585278010223, + "learning_rate": 1.907862149638225e-05, + "loss": 0.9859, "step": 3476 }, { - "epoch": 0.7290836653386454, - "grad_norm": 6.522208902453115, - "learning_rate": 1.771369720125116e-05, - "loss": 2.0141, + "epoch": 0.4909630048009037, + "grad_norm": 4.26339885166948, + "learning_rate": 1.9077982235348294e-05, + "loss": 1.0285, "step": 3477 }, { - "epoch": 0.7292933529041727, - "grad_norm": 7.386968784195653, - "learning_rate": 1.7712256296260296e-05, - "loss": 1.9449, + "epoch": 0.4911042078508896, + "grad_norm": 3.3491681739287373, + "learning_rate": 1.907734276334595e-05, + "loss": 0.9969, "step": 3478 }, { - "epoch": 0.7295030404697002, - "grad_norm": 7.402302800520711, - "learning_rate": 1.771081499600292e-05, - "loss": 2.2637, + "epoch": 0.4912454109008755, + "grad_norm": 4.281517881135393, + "learning_rate": 1.9076703080390082e-05, + "loss": 1.2935, "step": 3479 }, { - "epoch": 0.7297127280352275, - "grad_norm": 6.237474209578294, - "learning_rate": 1.7709373300552904e-05, - "loss": 1.6653, + "epoch": 0.49138661395086136, + "grad_norm": 3.8894579194064605, + "learning_rate": 1.907606318649555e-05, + "loss": 1.2278, "step": 3480 }, { - "epoch": 0.7299224156007549, - "grad_norm": 5.4814038472193305, - "learning_rate": 1.770793120998413e-05, - "loss": 1.6939, + "epoch": 0.49152781700084724, + "grad_norm": 4.105243828152826, + "learning_rate": 1.907542308167724e-05, + "loss": 0.9711, "step": 3481 }, { - "epoch": 0.7301321031662822, - "grad_norm": 6.025095160487646, - "learning_rate": 1.770648872437052e-05, - "loss": 1.4771, + "epoch": 0.49166902005083307, + "grad_norm": 4.597564693004422, + "learning_rate": 1.9074782765950007e-05, + "loss": 1.207, "step": 3482 }, { - "epoch": 0.7303417907318096, - "grad_norm": 6.046824635867647, - "learning_rate": 1.770504584378599e-05, - "loss": 1.6821, + "epoch": 0.49181022310081896, + "grad_norm": 4.064432349557505, + "learning_rate": 1.9074142239328745e-05, + "loss": 1.1267, "step": 3483 }, { - "epoch": 0.730551478297337, - "grad_norm": 5.785323693565155, - "learning_rate": 1.77036025683045e-05, - "loss": 1.4389, + "epoch": 0.49195142615080484, + "grad_norm": 4.389503086047374, + "learning_rate": 1.9073501501828335e-05, + "loss": 1.1076, "step": 3484 }, { - "epoch": 0.7307611658628643, - "grad_norm": 6.790880000296394, - "learning_rate": 1.7702158898000017e-05, - "loss": 2.0217, + "epoch": 0.4920926292007907, + "grad_norm": 4.398777556596936, + "learning_rate": 1.907286055346367e-05, + "loss": 1.1999, "step": 3485 }, { - "epoch": 0.7309708534283917, - "grad_norm": 8.108517432852935, - "learning_rate": 1.770071483294653e-05, - "loss": 1.9364, + "epoch": 0.4922338322507766, + "grad_norm": 4.059877403803943, + "learning_rate": 1.9072219394249644e-05, + "loss": 1.2649, "step": 3486 }, { - "epoch": 0.731180540993919, - "grad_norm": 5.326151728198533, - "learning_rate": 1.7699270373218054e-05, - "loss": 1.7126, + "epoch": 0.4923750353007625, + "grad_norm": 3.7989982541735827, + "learning_rate": 1.9071578024201156e-05, + "loss": 1.0269, "step": 3487 }, { - "epoch": 0.7313902285594465, - "grad_norm": 7.089225512732965, - "learning_rate": 1.7697825518888616e-05, - "loss": 2.2343, + "epoch": 0.49251623835074837, + "grad_norm": 3.980681190322074, + "learning_rate": 1.9070936443333113e-05, + "loss": 1.1437, "step": 3488 }, { - "epoch": 0.7315999161249738, - "grad_norm": 6.322263670807007, - "learning_rate": 1.7696380270032268e-05, - "loss": 2.0287, + "epoch": 0.49265744140073425, + "grad_norm": 3.9641709581235163, + "learning_rate": 1.9070294651660423e-05, + "loss": 1.1662, "step": 3489 }, { - "epoch": 0.7318096036905012, - "grad_norm": 6.158213524366527, - "learning_rate": 1.7694934626723088e-05, - "loss": 1.9255, + "epoch": 0.49279864445072014, + "grad_norm": 3.2399245304851214, + "learning_rate": 1.9069652649198004e-05, + "loss": 0.8165, "step": 3490 }, { - "epoch": 0.7320192912560285, - "grad_norm": 6.843910307944522, - "learning_rate": 1.7693488589035158e-05, - "loss": 1.5493, + "epoch": 0.492939847500706, + "grad_norm": 4.246085967397897, + "learning_rate": 1.9069010435960774e-05, + "loss": 1.2267, "step": 3491 }, { - "epoch": 0.7322289788215559, - "grad_norm": 7.633805551519663, - "learning_rate": 1.7692042157042593e-05, - "loss": 1.6487, + "epoch": 0.4930810505506919, + "grad_norm": 3.218082038625546, + "learning_rate": 1.906836801196366e-05, + "loss": 0.9061, "step": 3492 }, { - "epoch": 0.7324386663870832, - "grad_norm": 7.45311166854082, - "learning_rate": 1.769059533081953e-05, - "loss": 2.0295, + "epoch": 0.4932222536006778, + "grad_norm": 3.9609534062687937, + "learning_rate": 1.9067725377221592e-05, + "loss": 0.9325, "step": 3493 }, { - "epoch": 0.7326483539526106, - "grad_norm": 6.25105710000813, - "learning_rate": 1.7689148110440114e-05, - "loss": 1.5065, + "epoch": 0.49336345665066367, + "grad_norm": 3.591459705043712, + "learning_rate": 1.9067082531749496e-05, + "loss": 1.0049, "step": 3494 }, { - "epoch": 0.7328580415181379, - "grad_norm": 6.930273741112612, - "learning_rate": 1.7687700495978523e-05, - "loss": 1.5519, + "epoch": 0.49350465970064955, + "grad_norm": 3.867334613421313, + "learning_rate": 1.9066439475562323e-05, + "loss": 0.9205, "step": 3495 }, { - "epoch": 0.7330677290836654, - "grad_norm": 6.481925845384008, - "learning_rate": 1.7686252487508948e-05, - "loss": 1.7427, + "epoch": 0.49364586275063543, + "grad_norm": 3.8024562222819047, + "learning_rate": 1.9065796208675005e-05, + "loss": 1.0396, "step": 3496 }, { - "epoch": 0.7332774166491927, - "grad_norm": 6.829471511972074, - "learning_rate": 1.76848040851056e-05, - "loss": 2.1041, + "epoch": 0.4937870658006213, + "grad_norm": 3.7371667694146304, + "learning_rate": 1.9065152731102503e-05, + "loss": 1.0713, "step": 3497 }, { - "epoch": 0.7334871042147201, - "grad_norm": 6.590349377260757, - "learning_rate": 1.7683355288842715e-05, - "loss": 1.8367, + "epoch": 0.4939282688506072, + "grad_norm": 3.6762293501513152, + "learning_rate": 1.9064509042859767e-05, + "loss": 0.9022, "step": 3498 }, { - "epoch": 0.7336967917802474, - "grad_norm": 6.882213541841275, - "learning_rate": 1.768190609879455e-05, - "loss": 1.955, + "epoch": 0.494069471900593, + "grad_norm": 3.6523766949980465, + "learning_rate": 1.9063865143961753e-05, + "loss": 1.0318, "step": 3499 }, { - "epoch": 0.7339064793457748, - "grad_norm": 5.139295179718113, - "learning_rate": 1.7680456515035366e-05, - "loss": 1.6745, + "epoch": 0.4942106749505789, + "grad_norm": 4.507429588435184, + "learning_rate": 1.906322103442343e-05, + "loss": 1.0702, "step": 3500 }, { - "epoch": 0.7341161669113022, - "grad_norm": 7.716769189430109, - "learning_rate": 1.7679006537639467e-05, - "loss": 2.0494, + "epoch": 0.4943518780005648, + "grad_norm": 3.9423261362209607, + "learning_rate": 1.9062576714259764e-05, + "loss": 1.0949, "step": 3501 }, { - "epoch": 0.7343258544768295, - "grad_norm": 6.271225379507168, - "learning_rate": 1.7677556166681164e-05, - "loss": 1.6788, + "epoch": 0.4944930810505507, + "grad_norm": 3.535695174321498, + "learning_rate": 1.9061932183485726e-05, + "loss": 0.946, "step": 3502 }, { - "epoch": 0.7345355420423569, - "grad_norm": 6.164585256650097, - "learning_rate": 1.7676105402234793e-05, - "loss": 2.1201, + "epoch": 0.49463428410053656, + "grad_norm": 3.6129699721214856, + "learning_rate": 1.9061287442116302e-05, + "loss": 1.1201, "step": 3503 }, { - "epoch": 0.7347452296078842, - "grad_norm": 7.786761850429331, - "learning_rate": 1.7674654244374707e-05, - "loss": 2.1877, + "epoch": 0.49477548715052244, + "grad_norm": 3.631276742502082, + "learning_rate": 1.906064249016647e-05, + "loss": 0.9405, "step": 3504 }, { - "epoch": 0.7349549171734117, - "grad_norm": 7.573615646008374, - "learning_rate": 1.767320269317528e-05, - "loss": 1.8912, + "epoch": 0.4949166902005083, + "grad_norm": 3.3224384105335725, + "learning_rate": 1.9059997327651218e-05, + "loss": 0.9827, "step": 3505 }, { - "epoch": 0.735164604738939, - "grad_norm": 6.349820106832358, - "learning_rate": 1.7671750748710907e-05, - "loss": 1.7058, + "epoch": 0.4950578932504942, + "grad_norm": 3.634825223274179, + "learning_rate": 1.905935195458554e-05, + "loss": 0.9433, "step": 3506 }, { - "epoch": 0.7353742923044664, - "grad_norm": 7.643900051261221, - "learning_rate": 1.7670298411056003e-05, - "loss": 2.1291, + "epoch": 0.4951990963004801, + "grad_norm": 4.80886697204205, + "learning_rate": 1.9058706370984436e-05, + "loss": 1.3736, "step": 3507 }, { - "epoch": 0.7355839798699937, - "grad_norm": 6.287763047285248, - "learning_rate": 1.7668845680284997e-05, - "loss": 1.7857, + "epoch": 0.495340299350466, + "grad_norm": 3.6857950412910703, + "learning_rate": 1.9058060576862912e-05, + "loss": 1.0958, "step": 3508 }, { - "epoch": 0.7357936674355211, - "grad_norm": 7.508387035284511, - "learning_rate": 1.7667392556472353e-05, - "loss": 1.9974, + "epoch": 0.49548150240045186, + "grad_norm": 4.333364207396368, + "learning_rate": 1.9057414572235967e-05, + "loss": 1.2163, "step": 3509 }, { - "epoch": 0.7360033550010484, - "grad_norm": 6.425994815871797, - "learning_rate": 1.766593903969254e-05, - "loss": 2.0255, + "epoch": 0.49562270545043774, + "grad_norm": 4.009949911406543, + "learning_rate": 1.905676835711862e-05, + "loss": 1.1628, "step": 3510 }, { - "epoch": 0.7362130425665758, - "grad_norm": 6.639558369019603, - "learning_rate": 1.7664485130020058e-05, - "loss": 1.7627, + "epoch": 0.4957639085004236, + "grad_norm": 3.2443932876879185, + "learning_rate": 1.905612193152589e-05, + "loss": 0.8506, "step": 3511 }, { - "epoch": 0.7364227301321031, - "grad_norm": 6.453931121376507, - "learning_rate": 1.7663030827529416e-05, - "loss": 1.8288, + "epoch": 0.4959051115504095, + "grad_norm": 3.5896949468709267, + "learning_rate": 1.9055475295472792e-05, + "loss": 0.9275, "step": 3512 }, { - "epoch": 0.7366324176976305, - "grad_norm": 6.327640958216904, - "learning_rate": 1.7661576132295157e-05, - "loss": 2.0013, + "epoch": 0.4960463146003954, + "grad_norm": 3.951651347864501, + "learning_rate": 1.9054828448974363e-05, + "loss": 1.1606, "step": 3513 }, { - "epoch": 0.7368421052631579, - "grad_norm": 6.290983683195035, - "learning_rate": 1.7660121044391833e-05, - "loss": 1.8652, + "epoch": 0.49618751765038127, + "grad_norm": 3.5598371236198965, + "learning_rate": 1.905418139204563e-05, + "loss": 1.067, "step": 3514 }, { - "epoch": 0.7370517928286853, - "grad_norm": 6.515871861467369, - "learning_rate": 1.7658665563894016e-05, - "loss": 1.7338, + "epoch": 0.49632872070036715, + "grad_norm": 4.771926602626114, + "learning_rate": 1.9053534124701633e-05, + "loss": 1.2652, "step": 3515 }, { - "epoch": 0.7372614803942126, - "grad_norm": 6.119074266501888, - "learning_rate": 1.7657209690876307e-05, - "loss": 1.6997, + "epoch": 0.496469923750353, + "grad_norm": 3.633456462245338, + "learning_rate": 1.9052886646957413e-05, + "loss": 1.0038, "step": 3516 }, { - "epoch": 0.73747116795974, - "grad_norm": 7.466766158790882, - "learning_rate": 1.7655753425413322e-05, - "loss": 1.9961, + "epoch": 0.49661112680033886, + "grad_norm": 4.6212961538377195, + "learning_rate": 1.905223895882802e-05, + "loss": 1.422, "step": 3517 }, { - "epoch": 0.7376808555252673, - "grad_norm": 7.272743425278089, - "learning_rate": 1.7654296767579695e-05, - "loss": 1.7095, + "epoch": 0.49675232985032475, + "grad_norm": 4.364820781011969, + "learning_rate": 1.9051591060328496e-05, + "loss": 1.0444, "step": 3518 }, { - "epoch": 0.7378905430907947, - "grad_norm": 6.297719906303153, - "learning_rate": 1.7652839717450084e-05, - "loss": 1.4778, + "epoch": 0.49689353290031063, + "grad_norm": 3.442508120621266, + "learning_rate": 1.9050942951473908e-05, + "loss": 0.9931, "step": 3519 }, { - "epoch": 0.7381002306563221, - "grad_norm": 6.545851691498975, - "learning_rate": 1.7651382275099163e-05, - "loss": 1.9275, + "epoch": 0.4970347359502965, + "grad_norm": 3.91237014968559, + "learning_rate": 1.9050294632279317e-05, + "loss": 1.0998, "step": 3520 }, { - "epoch": 0.7383099182218494, - "grad_norm": 5.975519901383928, - "learning_rate": 1.764992444060163e-05, - "loss": 1.5358, + "epoch": 0.4971759390002824, + "grad_norm": 4.341196239450135, + "learning_rate": 1.904964610275978e-05, + "loss": 0.9505, "step": 3521 }, { - "epoch": 0.7385196057873769, - "grad_norm": 7.350344199701822, - "learning_rate": 1.76484662140322e-05, - "loss": 1.9711, + "epoch": 0.4973171420502683, + "grad_norm": 3.9505485808056036, + "learning_rate": 1.9048997362930384e-05, + "loss": 1.0686, "step": 3522 }, { - "epoch": 0.7387292933529042, - "grad_norm": 5.780397576399049, - "learning_rate": 1.7647007595465613e-05, - "loss": 1.679, + "epoch": 0.49745834510025416, + "grad_norm": 4.446619469893406, + "learning_rate": 1.9048348412806192e-05, + "loss": 1.2636, "step": 3523 }, { - "epoch": 0.7389389809184316, - "grad_norm": 6.3756078183149505, - "learning_rate": 1.764554858497662e-05, - "loss": 1.8165, + "epoch": 0.49759954815024005, + "grad_norm": 4.172522867131995, + "learning_rate": 1.9047699252402294e-05, + "loss": 0.9826, "step": 3524 }, { - "epoch": 0.7391486684839589, - "grad_norm": 6.423580888657026, - "learning_rate": 1.7644089182640005e-05, - "loss": 2.0505, + "epoch": 0.49774075120022593, + "grad_norm": 3.264221217507679, + "learning_rate": 1.9047049881733773e-05, + "loss": 0.7252, "step": 3525 }, { - "epoch": 0.7393583560494863, - "grad_norm": 6.029032841688427, - "learning_rate": 1.7642629388530563e-05, - "loss": 1.6834, + "epoch": 0.4978819542502118, + "grad_norm": 4.214037969973725, + "learning_rate": 1.9046400300815716e-05, + "loss": 1.2387, "step": 3526 }, { - "epoch": 0.7395680436150136, - "grad_norm": 6.420524565976833, - "learning_rate": 1.7641169202723104e-05, - "loss": 2.0454, + "epoch": 0.4980231573001977, + "grad_norm": 3.418599086811761, + "learning_rate": 1.9045750509663224e-05, + "loss": 1.0176, "step": 3527 }, { - "epoch": 0.739777731180541, - "grad_norm": 7.882987302948785, - "learning_rate": 1.7639708625292473e-05, - "loss": 2.2519, + "epoch": 0.4981643603501836, + "grad_norm": 3.4531695618825715, + "learning_rate": 1.90451005082914e-05, + "loss": 1.0781, "step": 3528 }, { - "epoch": 0.7399874187460683, - "grad_norm": 6.210174798919756, - "learning_rate": 1.7638247656313527e-05, - "loss": 1.687, + "epoch": 0.49830556340016946, + "grad_norm": 3.4471045042538404, + "learning_rate": 1.9044450296715344e-05, + "loss": 0.9268, "step": 3529 }, { - "epoch": 0.7401971063115957, - "grad_norm": 6.067006147804724, - "learning_rate": 1.7636786295861136e-05, - "loss": 2.0509, + "epoch": 0.49844676645015534, + "grad_norm": 3.0908321685109867, + "learning_rate": 1.904379987495017e-05, + "loss": 0.8522, "step": 3530 }, { - "epoch": 0.740406793877123, - "grad_norm": 5.873371250416261, - "learning_rate": 1.7635324544010206e-05, - "loss": 1.6983, + "epoch": 0.4985879695001412, + "grad_norm": 3.8485659624660973, + "learning_rate": 1.9043149243010993e-05, + "loss": 1.1068, "step": 3531 }, { - "epoch": 0.7406164814426505, - "grad_norm": 5.680615814440841, - "learning_rate": 1.763386240083565e-05, - "loss": 1.9874, + "epoch": 0.4987291725501271, + "grad_norm": 3.4495626847320295, + "learning_rate": 1.904249840091293e-05, + "loss": 0.9449, "step": 3532 }, { - "epoch": 0.7408261690081778, - "grad_norm": 5.766275247099527, - "learning_rate": 1.7632399866412405e-05, - "loss": 1.8346, + "epoch": 0.49887037560011294, + "grad_norm": 4.086130604075075, + "learning_rate": 1.904184734867111e-05, + "loss": 1.2545, "step": 3533 }, { - "epoch": 0.7410358565737052, - "grad_norm": 4.865901757488386, - "learning_rate": 1.7630936940815425e-05, - "loss": 1.8454, + "epoch": 0.4990115786500988, + "grad_norm": 4.0830848190317255, + "learning_rate": 1.9041196086300666e-05, + "loss": 1.3101, "step": 3534 }, { - "epoch": 0.7412455441392325, - "grad_norm": 6.497715455120673, - "learning_rate": 1.7629473624119697e-05, - "loss": 1.7203, + "epoch": 0.4991527817000847, + "grad_norm": 3.5406241563020293, + "learning_rate": 1.9040544613816725e-05, + "loss": 0.9975, "step": 3535 }, { - "epoch": 0.7414552317047599, - "grad_norm": 7.4412090555731, - "learning_rate": 1.762800991640021e-05, - "loss": 1.9602, + "epoch": 0.4992939847500706, + "grad_norm": 4.18772346173357, + "learning_rate": 1.9039892931234434e-05, + "loss": 1.1118, "step": 3536 }, { - "epoch": 0.7416649192702873, - "grad_norm": 6.283537151889722, - "learning_rate": 1.762654581773199e-05, - "loss": 1.8062, + "epoch": 0.49943518780005647, + "grad_norm": 3.131087775256141, + "learning_rate": 1.9039241038568935e-05, + "loss": 0.7823, "step": 3537 }, { - "epoch": 0.7418746068358146, - "grad_norm": 6.136979182549098, - "learning_rate": 1.7625081328190064e-05, - "loss": 1.8542, + "epoch": 0.49957639085004235, + "grad_norm": 4.114747029399947, + "learning_rate": 1.903858893583538e-05, + "loss": 0.9766, "step": 3538 }, { - "epoch": 0.742084294401342, - "grad_norm": 7.438125356661845, - "learning_rate": 1.7623616447849496e-05, - "loss": 1.9401, + "epoch": 0.49971759390002823, + "grad_norm": 3.6890885027596374, + "learning_rate": 1.903793662304892e-05, + "loss": 0.9695, "step": 3539 }, { - "epoch": 0.7422939819668694, - "grad_norm": 6.139641260440515, - "learning_rate": 1.7622151176785363e-05, - "loss": 1.9455, + "epoch": 0.4998587969500141, + "grad_norm": 3.7762448283380086, + "learning_rate": 1.9037284100224714e-05, + "loss": 1.0643, "step": 3540 }, { - "epoch": 0.7425036695323968, - "grad_norm": 5.922848312930531, - "learning_rate": 1.762068551507276e-05, - "loss": 1.564, + "epoch": 0.5, + "grad_norm": 3.530554222585456, + "learning_rate": 1.903663136737793e-05, + "loss": 0.9581, "step": 3541 }, { - "epoch": 0.7427133570979241, - "grad_norm": 6.214216444154346, - "learning_rate": 1.7619219462786813e-05, - "loss": 1.8344, + "epoch": 0.5001412030499859, + "grad_norm": 3.892862330399702, + "learning_rate": 1.9035978424523737e-05, + "loss": 1.0917, "step": 3542 }, { - "epoch": 0.7429230446634515, - "grad_norm": 6.262885624515905, - "learning_rate": 1.7617753020002647e-05, - "loss": 1.9541, + "epoch": 0.5002824060999718, + "grad_norm": 3.686781474447699, + "learning_rate": 1.9035325271677306e-05, + "loss": 0.8535, "step": 3543 }, { - "epoch": 0.7431327322289788, - "grad_norm": 7.424142984649746, - "learning_rate": 1.761628618679543e-05, - "loss": 1.9596, + "epoch": 0.5004236091499576, + "grad_norm": 3.6155627638875756, + "learning_rate": 1.903467190885382e-05, + "loss": 1.0753, "step": 3544 }, { - "epoch": 0.7433424197945062, - "grad_norm": 6.728248078324123, - "learning_rate": 1.7614818963240337e-05, - "loss": 1.53, + "epoch": 0.5005648121999435, + "grad_norm": 3.692662502399999, + "learning_rate": 1.9034018336068457e-05, + "loss": 1.0927, "step": 3545 }, { - "epoch": 0.7435521073600335, - "grad_norm": 6.549823976519318, - "learning_rate": 1.7613351349412562e-05, - "loss": 1.8263, + "epoch": 0.5007060152499294, + "grad_norm": 3.5902172480880545, + "learning_rate": 1.903336455333641e-05, + "loss": 1.0147, "step": 3546 }, { - "epoch": 0.7437617949255609, - "grad_norm": 6.87609312026156, - "learning_rate": 1.7611883345387327e-05, - "loss": 1.975, + "epoch": 0.5008472182999153, + "grad_norm": 3.9720285326043787, + "learning_rate": 1.9032710560672875e-05, + "loss": 1.0484, "step": 3547 }, { - "epoch": 0.7439714824910882, - "grad_norm": 6.842420137979952, - "learning_rate": 1.761041495123987e-05, - "loss": 1.5892, + "epoch": 0.5009884213499012, + "grad_norm": 4.4071176828064695, + "learning_rate": 1.9032056358093048e-05, + "loss": 1.0061, "step": 3548 }, { - "epoch": 0.7441811700566157, - "grad_norm": 7.148329183404065, - "learning_rate": 1.7608946167045444e-05, - "loss": 1.7235, + "epoch": 0.5011296243998871, + "grad_norm": 3.6824348178668176, + "learning_rate": 1.9031401945612127e-05, + "loss": 1.0175, "step": 3549 }, { - "epoch": 0.744390857622143, - "grad_norm": 6.07406070172432, - "learning_rate": 1.760747699287933e-05, - "loss": 2.239, + "epoch": 0.501270827449873, + "grad_norm": 3.4601397019895455, + "learning_rate": 1.903074732324533e-05, + "loss": 1.0654, "step": 3550 }, { - "epoch": 0.7446005451876704, - "grad_norm": 5.908149400892406, - "learning_rate": 1.7606007428816826e-05, - "loss": 2.0016, + "epoch": 0.5014120304998588, + "grad_norm": 3.5378765846449385, + "learning_rate": 1.9030092491007863e-05, + "loss": 0.9111, "step": 3551 }, { - "epoch": 0.7448102327531977, - "grad_norm": 5.9254212630017555, - "learning_rate": 1.7604537474933252e-05, - "loss": 1.7518, + "epoch": 0.5015532335498447, + "grad_norm": 4.108914465178884, + "learning_rate": 1.9029437448914945e-05, + "loss": 0.983, "step": 3552 }, { - "epoch": 0.7450199203187251, - "grad_norm": 6.14527800512368, - "learning_rate": 1.760306713130394e-05, - "loss": 2.2464, + "epoch": 0.5016944365998306, + "grad_norm": 3.9997638963203324, + "learning_rate": 1.9028782196981802e-05, + "loss": 0.9203, "step": 3553 }, { - "epoch": 0.7452296078842524, - "grad_norm": 6.76904976285918, - "learning_rate": 1.7601596398004255e-05, - "loss": 1.6643, + "epoch": 0.5018356396498165, + "grad_norm": 3.876315596043194, + "learning_rate": 1.902812673522366e-05, + "loss": 1.0369, "step": 3554 }, { - "epoch": 0.7454392954497798, - "grad_norm": 5.550949931182336, - "learning_rate": 1.7600125275109565e-05, - "loss": 1.6049, + "epoch": 0.5019768426998024, + "grad_norm": 3.984966909708275, + "learning_rate": 1.902747106365575e-05, + "loss": 0.9605, "step": 3555 }, { - "epoch": 0.7456489830153072, - "grad_norm": 6.552466739811548, - "learning_rate": 1.7598653762695275e-05, - "loss": 1.8545, + "epoch": 0.5021180457497882, + "grad_norm": 3.9704851841359874, + "learning_rate": 1.9026815182293315e-05, + "loss": 0.9971, "step": 3556 }, { - "epoch": 0.7458586705808345, - "grad_norm": 7.405786551213001, - "learning_rate": 1.7597181860836802e-05, - "loss": 1.9707, + "epoch": 0.5022592487997741, + "grad_norm": 3.569795539072415, + "learning_rate": 1.902615909115159e-05, + "loss": 0.925, "step": 3557 }, { - "epoch": 0.746068358146362, - "grad_norm": 7.280680392664758, - "learning_rate": 1.759570956960958e-05, - "loss": 1.9457, + "epoch": 0.50240045184976, + "grad_norm": 4.129289270568844, + "learning_rate": 1.9025502790245824e-05, + "loss": 0.8938, "step": 3558 }, { - "epoch": 0.7462780457118893, - "grad_norm": 6.350353415331434, - "learning_rate": 1.759423688908907e-05, - "loss": 1.6103, + "epoch": 0.5025416548997458, + "grad_norm": 3.9926097051049236, + "learning_rate": 1.9024846279591275e-05, + "loss": 0.9191, "step": 3559 }, { - "epoch": 0.7464877332774167, - "grad_norm": 7.251341324953078, - "learning_rate": 1.7592763819350746e-05, - "loss": 2.1117, + "epoch": 0.5026828579497317, + "grad_norm": 3.9061456542334803, + "learning_rate": 1.9024189559203193e-05, + "loss": 1.0419, "step": 3560 }, { - "epoch": 0.746697420842944, - "grad_norm": 7.440507141373042, - "learning_rate": 1.759129036047011e-05, - "loss": 2.0942, + "epoch": 0.5028240609997175, + "grad_norm": 4.678394329802124, + "learning_rate": 1.9023532629096844e-05, + "loss": 1.1198, "step": 3561 }, { - "epoch": 0.7469071084084714, - "grad_norm": 6.746596763248542, - "learning_rate": 1.758981651252267e-05, - "loss": 1.8331, + "epoch": 0.5029652640497034, + "grad_norm": 2.9282890208204475, + "learning_rate": 1.9022875489287496e-05, + "loss": 0.8027, "step": 3562 }, { - "epoch": 0.7471167959739987, - "grad_norm": 6.687846637146918, - "learning_rate": 1.758834227558398e-05, - "loss": 1.6062, + "epoch": 0.5031064670996893, + "grad_norm": 3.9690187115056714, + "learning_rate": 1.902221813979042e-05, + "loss": 1.1128, "step": 3563 }, { - "epoch": 0.7473264835395261, - "grad_norm": 7.250030562987938, - "learning_rate": 1.758686764972958e-05, - "loss": 1.9847, + "epoch": 0.5032476701496752, + "grad_norm": 3.775291411198648, + "learning_rate": 1.9021560580620883e-05, + "loss": 1.0603, "step": 3564 }, { - "epoch": 0.7475361711050534, - "grad_norm": 6.317939530622629, - "learning_rate": 1.7585392635035055e-05, - "loss": 1.7993, + "epoch": 0.5033888731996611, + "grad_norm": 4.489152972502885, + "learning_rate": 1.902090281179418e-05, + "loss": 0.9991, "step": 3565 }, { - "epoch": 0.7477458586705809, - "grad_norm": 6.9416785543652155, - "learning_rate": 1.7583917231576002e-05, - "loss": 2.0835, + "epoch": 0.503530076249647, + "grad_norm": 4.305584358448502, + "learning_rate": 1.902024483332559e-05, + "loss": 1.2794, "step": 3566 }, { - "epoch": 0.7479555462361082, - "grad_norm": 6.030863805436057, - "learning_rate": 1.7582441439428037e-05, - "loss": 2.0406, + "epoch": 0.5036712792996328, + "grad_norm": 3.8206103977795234, + "learning_rate": 1.901958664523041e-05, + "loss": 1.1267, "step": 3567 }, { - "epoch": 0.7481652338016356, - "grad_norm": 7.284693143816317, - "learning_rate": 1.7580965258666797e-05, - "loss": 1.6491, + "epoch": 0.5038124823496187, + "grad_norm": 4.1385194589939, + "learning_rate": 1.9018928247523924e-05, + "loss": 1.0367, "step": 3568 }, { - "epoch": 0.7483749213671629, - "grad_norm": 6.137328105493726, - "learning_rate": 1.757948868936794e-05, - "loss": 1.6784, + "epoch": 0.5039536853996046, + "grad_norm": 5.049527986890978, + "learning_rate": 1.9018269640221443e-05, + "loss": 1.4399, "step": 3569 }, { - "epoch": 0.7485846089326903, - "grad_norm": 6.261534894003862, - "learning_rate": 1.757801173160714e-05, - "loss": 1.7973, + "epoch": 0.5040948884495905, + "grad_norm": 3.7904470790721705, + "learning_rate": 1.901761082333827e-05, + "loss": 0.9269, "step": 3570 }, { - "epoch": 0.7487942964982176, - "grad_norm": 5.872047246636773, - "learning_rate": 1.7576534385460096e-05, - "loss": 1.7136, + "epoch": 0.5042360914995764, + "grad_norm": 4.172804105610269, + "learning_rate": 1.901695179688972e-05, + "loss": 1.2578, "step": 3571 }, { - "epoch": 0.749003984063745, - "grad_norm": 7.304200893656662, - "learning_rate": 1.757505665100253e-05, - "loss": 1.9465, + "epoch": 0.5043772945495623, + "grad_norm": 3.8644725119199137, + "learning_rate": 1.90162925608911e-05, + "loss": 1.2958, "step": 3572 }, { - "epoch": 0.7492136716292724, - "grad_norm": 7.148031510096267, - "learning_rate": 1.7573578528310164e-05, - "loss": 1.865, + "epoch": 0.5045184975995481, + "grad_norm": 3.3924696389548075, + "learning_rate": 1.9015633115357737e-05, + "loss": 0.917, "step": 3573 }, { - "epoch": 0.7494233591947997, - "grad_norm": 6.7531473109553275, - "learning_rate": 1.7572100017458766e-05, - "loss": 2.1642, + "epoch": 0.504659700649534, + "grad_norm": 4.024868096526031, + "learning_rate": 1.901497346030495e-05, + "loss": 0.939, "step": 3574 }, { - "epoch": 0.7496330467603272, - "grad_norm": 5.944393437657226, - "learning_rate": 1.757062111852411e-05, - "loss": 1.5782, + "epoch": 0.5048009036995199, + "grad_norm": 4.473623334099498, + "learning_rate": 1.9014313595748078e-05, + "loss": 1.173, "step": 3575 }, { - "epoch": 0.7498427343258545, - "grad_norm": 5.125368828084269, - "learning_rate": 1.756914183158199e-05, - "loss": 1.6213, + "epoch": 0.5049421067495058, + "grad_norm": 3.540645539908897, + "learning_rate": 1.9013653521702448e-05, + "loss": 1.0481, "step": 3576 }, { - "epoch": 0.7500524218913819, - "grad_norm": 5.98840513150999, - "learning_rate": 1.756766215670822e-05, - "loss": 1.8489, + "epoch": 0.5050833097994917, + "grad_norm": 4.159408889706492, + "learning_rate": 1.9012993238183405e-05, + "loss": 1.2286, "step": 3577 }, { - "epoch": 0.7502621094569092, - "grad_norm": 6.5295129281532995, - "learning_rate": 1.7566182093978643e-05, - "loss": 1.9036, + "epoch": 0.5052245128494776, + "grad_norm": 3.337832299402863, + "learning_rate": 1.901233274520629e-05, + "loss": 0.8567, "step": 3578 }, { - "epoch": 0.7504717970224366, - "grad_norm": 6.830166823498713, - "learning_rate": 1.756470164346911e-05, - "loss": 1.8673, + "epoch": 0.5053657158994634, + "grad_norm": 3.3248648775172303, + "learning_rate": 1.9011672042786456e-05, + "loss": 0.9377, "step": 3579 }, { - "epoch": 0.7506814845879639, - "grad_norm": 7.089647736742841, - "learning_rate": 1.7563220805255496e-05, - "loss": 1.7548, + "epoch": 0.5055069189494493, + "grad_norm": 3.7209863149721945, + "learning_rate": 1.9011011130939254e-05, + "loss": 1.0953, "step": 3580 }, { - "epoch": 0.7508911721534913, - "grad_norm": 6.053219240037933, - "learning_rate": 1.75617395794137e-05, - "loss": 1.9925, + "epoch": 0.5056481219994352, + "grad_norm": 4.025402292113507, + "learning_rate": 1.901035000968004e-05, + "loss": 0.9544, "step": 3581 }, { - "epoch": 0.7511008597190186, - "grad_norm": 7.012176842986784, - "learning_rate": 1.7560257966019634e-05, - "loss": 1.9095, + "epoch": 0.5057893250494211, + "grad_norm": 3.3383966473815394, + "learning_rate": 1.900968867902419e-05, + "loss": 0.9679, "step": 3582 }, { - "epoch": 0.751310547284546, - "grad_norm": 5.987066930418948, - "learning_rate": 1.7558775965149236e-05, - "loss": 1.4474, + "epoch": 0.505930528099407, + "grad_norm": 3.413119006583441, + "learning_rate": 1.900902713898707e-05, + "loss": 1.0655, "step": 3583 }, { - "epoch": 0.7515202348500734, - "grad_norm": 7.393547883923078, - "learning_rate": 1.755729357687846e-05, - "loss": 2.1521, + "epoch": 0.5060717311493929, + "grad_norm": 4.549256375682376, + "learning_rate": 1.9008365389584042e-05, + "loss": 1.2091, "step": 3584 }, { - "epoch": 0.7517299224156008, - "grad_norm": 6.740133743507663, - "learning_rate": 1.7555810801283278e-05, - "loss": 1.9401, + "epoch": 0.5062129341993787, + "grad_norm": 4.1394813296709865, + "learning_rate": 1.9007703430830494e-05, + "loss": 1.2071, "step": 3585 }, { - "epoch": 0.7519396099811281, - "grad_norm": 6.389116286352003, - "learning_rate": 1.755432763843969e-05, - "loss": 1.7206, + "epoch": 0.5063541372493646, + "grad_norm": 4.18179382130419, + "learning_rate": 1.900704126274181e-05, + "loss": 1.122, "step": 3586 }, { - "epoch": 0.7521492975466555, - "grad_norm": 6.19748635459507, - "learning_rate": 1.7552844088423707e-05, - "loss": 1.8184, + "epoch": 0.5064953402993505, + "grad_norm": 3.439329744744783, + "learning_rate": 1.9006378885333376e-05, + "loss": 1.0286, "step": 3587 }, { - "epoch": 0.7523589851121828, - "grad_norm": 6.940194945997136, - "learning_rate": 1.7551360151311364e-05, - "loss": 1.7933, + "epoch": 0.5066365433493364, + "grad_norm": 3.61832909203157, + "learning_rate": 1.9005716298620585e-05, + "loss": 0.752, "step": 3588 }, { - "epoch": 0.7525686726777102, - "grad_norm": 6.176398439168147, - "learning_rate": 1.7549875827178716e-05, - "loss": 1.8535, + "epoch": 0.5067777463993223, + "grad_norm": 4.1270002863447495, + "learning_rate": 1.900505350261884e-05, + "loss": 1.3398, "step": 3589 }, { - "epoch": 0.7527783602432375, - "grad_norm": 7.055827621359366, - "learning_rate": 1.754839111610184e-05, - "loss": 1.8103, + "epoch": 0.5069189494493082, + "grad_norm": 3.4495490856261917, + "learning_rate": 1.9004390497343536e-05, + "loss": 0.839, "step": 3590 }, { - "epoch": 0.7529880478087649, - "grad_norm": 4.980668637808491, - "learning_rate": 1.7546906018156825e-05, - "loss": 1.7236, + "epoch": 0.507060152499294, + "grad_norm": 3.5863203874984544, + "learning_rate": 1.900372728281009e-05, + "loss": 0.9984, "step": 3591 }, { - "epoch": 0.7531977353742924, - "grad_norm": 6.5901203155426495, - "learning_rate": 1.754542053341979e-05, - "loss": 2.0351, + "epoch": 0.5072013555492799, + "grad_norm": 3.8961479407224235, + "learning_rate": 1.9003063859033906e-05, + "loss": 1.0233, "step": 3592 }, { - "epoch": 0.7534074229398197, - "grad_norm": 7.355011806283256, - "learning_rate": 1.7543934661966863e-05, - "loss": 1.9339, + "epoch": 0.5073425585992657, + "grad_norm": 3.779657379395044, + "learning_rate": 1.900240022603041e-05, + "loss": 0.9989, "step": 3593 }, { - "epoch": 0.7536171105053471, - "grad_norm": 7.146201136194959, - "learning_rate": 1.75424484038742e-05, - "loss": 2.0235, + "epoch": 0.5074837616492516, + "grad_norm": 3.7650598122332855, + "learning_rate": 1.9001736383815023e-05, + "loss": 1.0585, "step": 3594 }, { - "epoch": 0.7538267980708744, - "grad_norm": 7.047761182349562, - "learning_rate": 1.7540961759217976e-05, - "loss": 1.8004, + "epoch": 0.5076249646992375, + "grad_norm": 3.7421243864633733, + "learning_rate": 1.9001072332403162e-05, + "loss": 1.0768, "step": 3595 }, { - "epoch": 0.7540364856364018, - "grad_norm": 6.45243307882741, - "learning_rate": 1.7539474728074382e-05, - "loss": 1.6887, + "epoch": 0.5077661677492233, + "grad_norm": 3.0453181459687686, + "learning_rate": 1.900040807181027e-05, + "loss": 0.8341, "step": 3596 }, { - "epoch": 0.7542461732019291, - "grad_norm": 6.175569473365782, - "learning_rate": 1.7537987310519634e-05, - "loss": 1.9449, + "epoch": 0.5079073707992092, + "grad_norm": 3.654008327793602, + "learning_rate": 1.8999743602051786e-05, + "loss": 1.0276, "step": 3597 }, { - "epoch": 0.7544558607674565, - "grad_norm": 6.7896714808473515, - "learning_rate": 1.7536499506629956e-05, - "loss": 1.9703, + "epoch": 0.5080485738491951, + "grad_norm": 3.096320218006398, + "learning_rate": 1.8999078923143142e-05, + "loss": 0.9073, "step": 3598 }, { - "epoch": 0.7546655483329838, - "grad_norm": 6.844532950541191, - "learning_rate": 1.7535011316481613e-05, - "loss": 1.9322, + "epoch": 0.508189776899181, + "grad_norm": 3.7654146464714833, + "learning_rate": 1.899841403509979e-05, + "loss": 1.0303, "step": 3599 }, { - "epoch": 0.7548752358985112, - "grad_norm": 7.528371728842008, - "learning_rate": 1.753352274015087e-05, - "loss": 1.904, + "epoch": 0.5083309799491669, + "grad_norm": 3.8831456959938784, + "learning_rate": 1.8997748937937188e-05, + "loss": 1.0492, "step": 3600 }, { - "epoch": 0.7550849234640385, - "grad_norm": 6.098817790442983, - "learning_rate": 1.753203377771402e-05, - "loss": 1.7005, + "epoch": 0.5084721829991528, + "grad_norm": 3.930889411527443, + "learning_rate": 1.8997083631670783e-05, + "loss": 1.0238, "step": 3601 }, { - "epoch": 0.755294611029566, - "grad_norm": 6.266547019433409, - "learning_rate": 1.7530544429247375e-05, - "loss": 1.7496, + "epoch": 0.5086133860491386, + "grad_norm": 5.272065722120395, + "learning_rate": 1.899641811631604e-05, + "loss": 1.2447, "step": 3602 }, { - "epoch": 0.7555042985950933, - "grad_norm": 6.297473338903792, - "learning_rate": 1.7529054694827265e-05, - "loss": 1.7812, + "epoch": 0.5087545890991245, + "grad_norm": 3.4308054223790707, + "learning_rate": 1.8995752391888423e-05, + "loss": 0.978, "step": 3603 }, { - "epoch": 0.7557139861606207, - "grad_norm": 6.227443771207519, - "learning_rate": 1.7527564574530047e-05, - "loss": 1.2683, + "epoch": 0.5088957921491104, + "grad_norm": 3.3000191890233888, + "learning_rate": 1.8995086458403408e-05, + "loss": 0.9274, "step": 3604 }, { - "epoch": 0.755923673726148, - "grad_norm": 6.79213724523078, - "learning_rate": 1.7526074068432087e-05, - "loss": 1.6328, + "epoch": 0.5090369951990963, + "grad_norm": 4.573607066623763, + "learning_rate": 1.899442031587647e-05, + "loss": 1.039, "step": 3605 }, { - "epoch": 0.7561333612916754, - "grad_norm": 6.015713224600076, - "learning_rate": 1.7524583176609776e-05, - "loss": 1.9586, + "epoch": 0.5091781982490822, + "grad_norm": 4.157466779268365, + "learning_rate": 1.8993753964323086e-05, + "loss": 1.3027, "step": 3606 }, { - "epoch": 0.7563430488572027, - "grad_norm": 8.028247469594895, - "learning_rate": 1.7523091899139528e-05, - "loss": 1.9516, + "epoch": 0.509319401299068, + "grad_norm": 4.258603319206109, + "learning_rate": 1.8993087403758743e-05, + "loss": 1.1855, "step": 3607 }, { - "epoch": 0.7565527364227301, - "grad_norm": 6.925459387659107, - "learning_rate": 1.7521600236097774e-05, - "loss": 1.8299, + "epoch": 0.5094606043490539, + "grad_norm": 3.382662288388237, + "learning_rate": 1.8992420634198934e-05, + "loss": 0.9887, "step": 3608 }, { - "epoch": 0.7567624239882575, - "grad_norm": 7.0091158756271685, - "learning_rate": 1.752010818756096e-05, - "loss": 2.0454, + "epoch": 0.5096018073990398, + "grad_norm": 3.340747209817989, + "learning_rate": 1.899175365565915e-05, + "loss": 0.9784, "step": 3609 }, { - "epoch": 0.7569721115537849, - "grad_norm": 8.159946168228625, - "learning_rate": 1.751861575360556e-05, - "loss": 2.2354, + "epoch": 0.5097430104490257, + "grad_norm": 3.666786189171731, + "learning_rate": 1.8991086468154897e-05, + "loss": 1.1034, "step": 3610 }, { - "epoch": 0.7571817991193123, - "grad_norm": 6.359549518540783, - "learning_rate": 1.751712293430806e-05, - "loss": 1.5468, + "epoch": 0.5098842134990116, + "grad_norm": 3.045839866758312, + "learning_rate": 1.899041907170168e-05, + "loss": 0.893, "step": 3611 }, { - "epoch": 0.7573914866848396, - "grad_norm": 6.2245972049427785, - "learning_rate": 1.751562972974497e-05, - "loss": 1.8496, + "epoch": 0.5100254165489975, + "grad_norm": 3.8525195889236055, + "learning_rate": 1.8989751466315004e-05, + "loss": 1.0713, "step": 3612 }, { - "epoch": 0.757601174250367, - "grad_norm": 5.1841139289842575, - "learning_rate": 1.7514136139992826e-05, - "loss": 1.8064, + "epoch": 0.5101666195989834, + "grad_norm": 3.854384120283142, + "learning_rate": 1.8989083652010385e-05, + "loss": 1.0453, "step": 3613 }, { - "epoch": 0.7578108618158943, - "grad_norm": 5.859895466259266, - "learning_rate": 1.7512642165128172e-05, - "loss": 2.0262, + "epoch": 0.5103078226489692, + "grad_norm": 4.215942711172723, + "learning_rate": 1.8988415628803345e-05, + "loss": 1.1505, "step": 3614 }, { - "epoch": 0.7580205493814217, - "grad_norm": 6.5730561736042175, - "learning_rate": 1.7511147805227573e-05, - "loss": 1.9988, + "epoch": 0.5104490256989551, + "grad_norm": 3.838085119754798, + "learning_rate": 1.8987747396709405e-05, + "loss": 1.0882, "step": 3615 }, { - "epoch": 0.758230236946949, - "grad_norm": 6.218980701638524, - "learning_rate": 1.7509653060367626e-05, - "loss": 1.8955, + "epoch": 0.510590228748941, + "grad_norm": 3.7185975291506845, + "learning_rate": 1.8987078955744103e-05, + "loss": 1.1034, "step": 3616 }, { - "epoch": 0.7584399245124764, - "grad_norm": 6.192123380880596, - "learning_rate": 1.750815793062493e-05, - "loss": 2.0516, + "epoch": 0.5107314317989269, + "grad_norm": 4.298931135377946, + "learning_rate": 1.8986410305922963e-05, + "loss": 1.2667, "step": 3617 }, { - "epoch": 0.7586496120780037, - "grad_norm": 7.56922979210302, - "learning_rate": 1.750666241607612e-05, - "loss": 1.911, + "epoch": 0.5108726348489128, + "grad_norm": 3.2628869877192557, + "learning_rate": 1.898574144726153e-05, + "loss": 0.8813, "step": 3618 }, { - "epoch": 0.7588592996435312, - "grad_norm": 5.477849151068813, - "learning_rate": 1.7505166516797844e-05, - "loss": 1.0648, + "epoch": 0.5110138378988986, + "grad_norm": 4.606306841342923, + "learning_rate": 1.8985072379775346e-05, + "loss": 0.8743, "step": 3619 }, { - "epoch": 0.7590689872090585, - "grad_norm": 6.693238166822524, - "learning_rate": 1.7503670232866767e-05, - "loss": 1.5961, + "epoch": 0.5111550409488845, + "grad_norm": 4.083191276203403, + "learning_rate": 1.8984403103479957e-05, + "loss": 0.8632, "step": 3620 }, { - "epoch": 0.7592786747745859, - "grad_norm": 6.3325741439253935, - "learning_rate": 1.750217356435957e-05, - "loss": 1.646, + "epoch": 0.5112962439988704, + "grad_norm": 3.7484423362005144, + "learning_rate": 1.8983733618390924e-05, + "loss": 1.0854, "step": 3621 }, { - "epoch": 0.7594883623401132, - "grad_norm": 7.216636352147544, - "learning_rate": 1.750067651135297e-05, - "loss": 1.9933, + "epoch": 0.5114374470488563, + "grad_norm": 3.8231476452207036, + "learning_rate": 1.89830639245238e-05, + "loss": 1.077, "step": 3622 }, { - "epoch": 0.7596980499056406, - "grad_norm": 7.530584128066125, - "learning_rate": 1.7499179073923692e-05, - "loss": 1.7248, + "epoch": 0.5115786500988422, + "grad_norm": 3.6690232008708965, + "learning_rate": 1.898239402189415e-05, + "loss": 1.1201, "step": 3623 }, { - "epoch": 0.7599077374711679, - "grad_norm": 6.77064345312104, - "learning_rate": 1.7497681252148476e-05, - "loss": 2.3199, + "epoch": 0.5117198531488281, + "grad_norm": 3.8897334918718873, + "learning_rate": 1.8981723910517546e-05, + "loss": 0.9572, "step": 3624 }, { - "epoch": 0.7601174250366953, - "grad_norm": 6.554710793652707, - "learning_rate": 1.7496183046104095e-05, - "loss": 1.9308, + "epoch": 0.511861056198814, + "grad_norm": 4.231071624321432, + "learning_rate": 1.8981053590409552e-05, + "loss": 1.226, "step": 3625 }, { - "epoch": 0.7603271126022227, - "grad_norm": 5.676704730342347, - "learning_rate": 1.7494684455867328e-05, - "loss": 1.2719, + "epoch": 0.5120022592487998, + "grad_norm": 4.129271785268568, + "learning_rate": 1.8980383061585752e-05, + "loss": 0.9597, "step": 3626 }, { - "epoch": 0.76053680016775, - "grad_norm": 6.521686600516974, - "learning_rate": 1.7493185481514984e-05, - "loss": 1.5967, + "epoch": 0.5121434622987856, + "grad_norm": 3.4234180987772356, + "learning_rate": 1.8979712324061727e-05, + "loss": 0.9068, "step": 3627 }, { - "epoch": 0.7607464877332775, - "grad_norm": 6.2899336584694625, - "learning_rate": 1.7491686123123888e-05, - "loss": 1.8855, + "epoch": 0.5122846653487715, + "grad_norm": 4.00505946940984, + "learning_rate": 1.8979041377853068e-05, + "loss": 1.074, "step": 3628 }, { - "epoch": 0.7609561752988048, - "grad_norm": 6.364351430230438, - "learning_rate": 1.7490186380770887e-05, - "loss": 1.8863, + "epoch": 0.5124258683987574, + "grad_norm": 3.374078592567029, + "learning_rate": 1.8978370222975364e-05, + "loss": 0.9391, "step": 3629 }, { - "epoch": 0.7611658628643322, - "grad_norm": 6.720176726465069, - "learning_rate": 1.7488686254532844e-05, - "loss": 2.0107, + "epoch": 0.5125670714487432, + "grad_norm": 3.263779818995175, + "learning_rate": 1.8977698859444217e-05, + "loss": 0.991, "step": 3630 }, { - "epoch": 0.7613755504298595, - "grad_norm": 6.056017334625059, - "learning_rate": 1.7487185744486636e-05, - "loss": 1.7627, + "epoch": 0.5127082744987291, + "grad_norm": 3.8164569059674727, + "learning_rate": 1.8977027287275224e-05, + "loss": 1.0413, "step": 3631 }, { - "epoch": 0.7615852379953869, - "grad_norm": 6.045644452104308, - "learning_rate": 1.748568485070918e-05, - "loss": 1.512, + "epoch": 0.512849477548715, + "grad_norm": 3.470165928657235, + "learning_rate": 1.8976355506483988e-05, + "loss": 0.9671, "step": 3632 }, { - "epoch": 0.7617949255609142, - "grad_norm": 5.78160406555623, - "learning_rate": 1.7484183573277385e-05, - "loss": 1.7835, + "epoch": 0.5129906805987009, + "grad_norm": 4.146004560834866, + "learning_rate": 1.8975683517086132e-05, + "loss": 1.0802, "step": 3633 }, { - "epoch": 0.7620046131264416, - "grad_norm": 5.908498260913524, - "learning_rate": 1.7482681912268205e-05, - "loss": 1.6271, + "epoch": 0.5131318836486868, + "grad_norm": 4.284048360764937, + "learning_rate": 1.8975011319097264e-05, + "loss": 0.9566, "step": 3634 }, { - "epoch": 0.7622143006919689, - "grad_norm": 6.836265825833793, - "learning_rate": 1.74811798677586e-05, - "loss": 2.1131, + "epoch": 0.5132730866986727, + "grad_norm": 4.077226256946177, + "learning_rate": 1.8974338912533015e-05, + "loss": 1.2519, "step": 3635 }, { - "epoch": 0.7624239882574964, - "grad_norm": 6.789212375288109, - "learning_rate": 1.747967743982555e-05, - "loss": 1.5101, + "epoch": 0.5134142897486585, + "grad_norm": 4.044465943899905, + "learning_rate": 1.8973666297409e-05, + "loss": 0.989, "step": 3636 }, { - "epoch": 0.7626336758230237, - "grad_norm": 6.35568114238226, - "learning_rate": 1.7478174628546057e-05, - "loss": 1.571, + "epoch": 0.5135554927986444, + "grad_norm": 3.6787283735921896, + "learning_rate": 1.897299347374086e-05, + "loss": 1.1405, "step": 3637 }, { - "epoch": 0.7628433633885511, - "grad_norm": 6.271275619464627, - "learning_rate": 1.7476671433997147e-05, - "loss": 1.3386, + "epoch": 0.5136966958486303, + "grad_norm": 4.450199282463527, + "learning_rate": 1.8972320441544224e-05, + "loss": 1.3281, "step": 3638 }, { - "epoch": 0.7630530509540784, - "grad_norm": 7.236527020633516, - "learning_rate": 1.7475167856255857e-05, - "loss": 2.1999, + "epoch": 0.5138378988986162, + "grad_norm": 4.275791173659126, + "learning_rate": 1.8971647200834736e-05, + "loss": 1.2003, "step": 3639 }, { - "epoch": 0.7632627385196058, - "grad_norm": 7.770764995641788, - "learning_rate": 1.7473663895399247e-05, - "loss": 2.0176, + "epoch": 0.5139791019486021, + "grad_norm": 4.097974109983107, + "learning_rate": 1.897097375162804e-05, + "loss": 1.2619, "step": 3640 }, { - "epoch": 0.7634724260851331, - "grad_norm": 6.471162610445809, - "learning_rate": 1.7472159551504405e-05, - "loss": 1.8678, + "epoch": 0.514120304998588, + "grad_norm": 3.527892740027429, + "learning_rate": 1.897030009393979e-05, + "loss": 0.8972, "step": 3641 }, { - "epoch": 0.7636821136506605, - "grad_norm": 7.479839497561408, - "learning_rate": 1.747065482464842e-05, - "loss": 1.9362, + "epoch": 0.5142615080485738, + "grad_norm": 3.633855302089347, + "learning_rate": 1.896962622778564e-05, + "loss": 1.0984, "step": 3642 }, { - "epoch": 0.7638918012161878, - "grad_norm": 10.262946999574746, - "learning_rate": 1.746914971490842e-05, - "loss": 2.2274, + "epoch": 0.5144027110985597, + "grad_norm": 3.824745569351625, + "learning_rate": 1.896895215318125e-05, + "loss": 1.0004, "step": 3643 }, { - "epoch": 0.7641014887817152, - "grad_norm": 9.258574287443022, - "learning_rate": 1.7467644222361547e-05, - "loss": 2.5424, + "epoch": 0.5145439141485456, + "grad_norm": 4.445059663507518, + "learning_rate": 1.8968277870142283e-05, + "loss": 1.3118, "step": 3644 }, { - "epoch": 0.7643111763472427, - "grad_norm": 6.536040300734493, - "learning_rate": 1.7466138347084953e-05, - "loss": 1.9496, + "epoch": 0.5146851171985315, + "grad_norm": 3.411235476935329, + "learning_rate": 1.8967603378684415e-05, + "loss": 0.9442, "step": 3645 }, { - "epoch": 0.76452086391277, - "grad_norm": 6.365878404129815, - "learning_rate": 1.7464632089155818e-05, - "loss": 2.0183, + "epoch": 0.5148263202485174, + "grad_norm": 3.9768302899986874, + "learning_rate": 1.8966928678823317e-05, + "loss": 1.035, "step": 3646 }, { - "epoch": 0.7647305514782974, - "grad_norm": 5.868973886244549, - "learning_rate": 1.746312544865134e-05, - "loss": 2.0426, + "epoch": 0.5149675232985033, + "grad_norm": 3.7281461238433495, + "learning_rate": 1.8966253770574668e-05, + "loss": 1.1572, "step": 3647 }, { - "epoch": 0.7649402390438247, - "grad_norm": 5.61115259340127, - "learning_rate": 1.7461618425648742e-05, - "loss": 1.7539, + "epoch": 0.5151087263484891, + "grad_norm": 3.6161528196762234, + "learning_rate": 1.8965578653954152e-05, + "loss": 0.9144, "step": 3648 }, { - "epoch": 0.7651499266093521, - "grad_norm": 4.7846516726578665, - "learning_rate": 1.746011102022526e-05, - "loss": 1.7948, + "epoch": 0.515249929398475, + "grad_norm": 3.071135831040302, + "learning_rate": 1.8964903328977463e-05, + "loss": 0.6992, "step": 3649 }, { - "epoch": 0.7653596141748794, - "grad_norm": 5.760766993002182, - "learning_rate": 1.7458603232458144e-05, - "loss": 2.0803, + "epoch": 0.5153911324484609, + "grad_norm": 4.124959480505632, + "learning_rate": 1.896422779566029e-05, + "loss": 1.0834, "step": 3650 }, { - "epoch": 0.7655693017404068, - "grad_norm": 6.278621467586301, - "learning_rate": 1.7457095062424677e-05, - "loss": 1.7772, + "epoch": 0.5155323354984468, + "grad_norm": 4.08747495440362, + "learning_rate": 1.8963552054018335e-05, + "loss": 1.1953, "step": 3651 }, { - "epoch": 0.7657789893059341, - "grad_norm": 5.59161583500458, - "learning_rate": 1.745558651020216e-05, - "loss": 1.8771, + "epoch": 0.5156735385484327, + "grad_norm": 3.5177213996057417, + "learning_rate": 1.8962876104067303e-05, + "loss": 1.0634, "step": 3652 }, { - "epoch": 0.7659886768714615, - "grad_norm": 5.981280062825638, - "learning_rate": 1.7454077575867895e-05, - "loss": 1.7596, + "epoch": 0.5158147415984186, + "grad_norm": 3.840847992051843, + "learning_rate": 1.8962199945822898e-05, + "loss": 1.01, "step": 3653 }, { - "epoch": 0.7661983644369889, - "grad_norm": 6.09898091736078, - "learning_rate": 1.745256825949923e-05, - "loss": 2.0797, + "epoch": 0.5159559446484044, + "grad_norm": 4.726897451854293, + "learning_rate": 1.8961523579300836e-05, + "loss": 1.1062, "step": 3654 }, { - "epoch": 0.7664080520025163, - "grad_norm": 5.841052371563669, - "learning_rate": 1.745105856117352e-05, - "loss": 1.7735, + "epoch": 0.5160971476983903, + "grad_norm": 5.377347012260199, + "learning_rate": 1.896084700451684e-05, + "loss": 1.3303, "step": 3655 }, { - "epoch": 0.7666177395680436, - "grad_norm": 5.19666873342045, - "learning_rate": 1.7449548480968127e-05, - "loss": 1.7499, + "epoch": 0.5162383507483762, + "grad_norm": 3.8526661156892885, + "learning_rate": 1.896017022148663e-05, + "loss": 1.1655, "step": 3656 }, { - "epoch": 0.766827427133571, - "grad_norm": 6.338726029347207, - "learning_rate": 1.744803801896046e-05, - "loss": 2.0768, + "epoch": 0.5163795537983621, + "grad_norm": 4.000957704211775, + "learning_rate": 1.895949323022593e-05, + "loss": 0.9976, "step": 3657 }, { - "epoch": 0.7670371146990983, - "grad_norm": 5.7325334663187295, - "learning_rate": 1.7446527175227925e-05, - "loss": 2.0459, + "epoch": 0.516520756848348, + "grad_norm": 3.945864479731758, + "learning_rate": 1.895881603075048e-05, + "loss": 1.0242, "step": 3658 }, { - "epoch": 0.7672468022646257, - "grad_norm": 5.453141994128758, - "learning_rate": 1.7445015949847955e-05, - "loss": 1.7382, + "epoch": 0.5166619598983339, + "grad_norm": 4.451788436215483, + "learning_rate": 1.895813862307601e-05, + "loss": 1.2664, "step": 3659 }, { - "epoch": 0.767456489830153, - "grad_norm": 5.527711132901874, - "learning_rate": 1.744350434289801e-05, - "loss": 1.7456, + "epoch": 0.5168031629483197, + "grad_norm": 3.6535719166643714, + "learning_rate": 1.8957461007218272e-05, + "loss": 1.1365, "step": 3660 }, { - "epoch": 0.7676661773956804, - "grad_norm": 5.536202616847874, - "learning_rate": 1.744199235445555e-05, - "loss": 1.4765, + "epoch": 0.5169443659983055, + "grad_norm": 4.302465856894494, + "learning_rate": 1.8956783183193007e-05, + "loss": 1.0524, "step": 3661 }, { - "epoch": 0.7678758649612079, - "grad_norm": 6.9415768305610355, - "learning_rate": 1.744047998459808e-05, - "loss": 2.0375, + "epoch": 0.5170855690482914, + "grad_norm": 3.907881880938853, + "learning_rate": 1.8956105151015966e-05, + "loss": 1.1253, "step": 3662 }, { - "epoch": 0.7680855525267352, - "grad_norm": 6.401075433619252, - "learning_rate": 1.7438967233403103e-05, - "loss": 1.9118, + "epoch": 0.5172267720982773, + "grad_norm": 3.8202265077123196, + "learning_rate": 1.8955426910702912e-05, + "loss": 1.0908, "step": 3663 }, { - "epoch": 0.7682952400922626, - "grad_norm": 6.857058952525928, - "learning_rate": 1.7437454100948158e-05, - "loss": 1.8915, + "epoch": 0.5173679751482632, + "grad_norm": 4.753180861108541, + "learning_rate": 1.8954748462269604e-05, + "loss": 1.1679, "step": 3664 }, { - "epoch": 0.7685049276577899, - "grad_norm": 7.411573346172245, - "learning_rate": 1.7435940587310785e-05, - "loss": 1.8935, + "epoch": 0.517509178198249, + "grad_norm": 4.748993879229028, + "learning_rate": 1.895406980573181e-05, + "loss": 1.2624, "step": 3665 }, { - "epoch": 0.7687146152233173, - "grad_norm": 7.339349900624808, - "learning_rate": 1.7434426692568563e-05, - "loss": 1.808, + "epoch": 0.5176503812482349, + "grad_norm": 3.2670633510673674, + "learning_rate": 1.89533909411053e-05, + "loss": 1.2222, "step": 3666 }, { - "epoch": 0.7689243027888446, - "grad_norm": 6.073993859773652, - "learning_rate": 1.7432912416799075e-05, - "loss": 1.6403, + "epoch": 0.5177915842982208, + "grad_norm": 3.7355853854399856, + "learning_rate": 1.895271186840585e-05, + "loss": 1.0391, "step": 3667 }, { - "epoch": 0.769133990354372, - "grad_norm": 6.538471392037432, - "learning_rate": 1.743139776007994e-05, - "loss": 1.8015, + "epoch": 0.5179327873482067, + "grad_norm": 3.827090780939897, + "learning_rate": 1.895203258764924e-05, + "loss": 0.9972, "step": 3668 }, { - "epoch": 0.7693436779198993, - "grad_norm": 6.679344107350826, - "learning_rate": 1.7429882722488775e-05, - "loss": 2.023, + "epoch": 0.5180739903981926, + "grad_norm": 4.02470609056982, + "learning_rate": 1.8951353098851267e-05, + "loss": 1.0301, "step": 3669 }, { - "epoch": 0.7695533654854267, - "grad_norm": 7.6094687659735305, - "learning_rate": 1.7428367304103236e-05, - "loss": 2.0213, + "epoch": 0.5182151934481785, + "grad_norm": 4.841136148060212, + "learning_rate": 1.8950673402027706e-05, + "loss": 1.204, "step": 3670 }, { - "epoch": 0.769763053050954, - "grad_norm": 6.779798778428214, - "learning_rate": 1.742685150500099e-05, - "loss": 1.7587, + "epoch": 0.5183563964981643, + "grad_norm": 3.6355305985753796, + "learning_rate": 1.8949993497194365e-05, + "loss": 0.8103, "step": 3671 }, { - "epoch": 0.7699727406164815, - "grad_norm": 6.757766399887811, - "learning_rate": 1.742533532525972e-05, - "loss": 2.0425, + "epoch": 0.5184975995481502, + "grad_norm": 3.4794458251083507, + "learning_rate": 1.8949313384367038e-05, + "loss": 1.0257, "step": 3672 }, { - "epoch": 0.7701824281820088, - "grad_norm": 6.743488083277184, - "learning_rate": 1.742381876495714e-05, - "loss": 1.788, + "epoch": 0.5186388025981361, + "grad_norm": 4.0146358586845015, + "learning_rate": 1.8948633063561536e-05, + "loss": 1.1554, "step": 3673 }, { - "epoch": 0.7703921157475362, - "grad_norm": 5.709147954066134, - "learning_rate": 1.742230182417097e-05, - "loss": 1.4902, + "epoch": 0.518780005648122, + "grad_norm": 3.474779021786041, + "learning_rate": 1.8947952534793663e-05, + "loss": 0.8154, "step": 3674 }, { - "epoch": 0.7706018033130635, - "grad_norm": 6.977914522852681, - "learning_rate": 1.742078450297896e-05, - "loss": 1.9757, + "epoch": 0.5189212086981079, + "grad_norm": 4.125988408948386, + "learning_rate": 1.894727179807924e-05, + "loss": 1.1373, "step": 3675 }, { - "epoch": 0.7708114908785909, - "grad_norm": 6.111881360596249, - "learning_rate": 1.7419266801458866e-05, - "loss": 1.7127, + "epoch": 0.5190624117480938, + "grad_norm": 3.534989023382573, + "learning_rate": 1.894659085343408e-05, + "loss": 0.9619, "step": 3676 }, { - "epoch": 0.7710211784441182, - "grad_norm": 5.510855756553036, - "learning_rate": 1.7417748719688488e-05, - "loss": 1.7443, + "epoch": 0.5192036147980796, + "grad_norm": 4.243365209012949, + "learning_rate": 1.8945909700874013e-05, + "loss": 1.0293, "step": 3677 }, { - "epoch": 0.7712308660096456, - "grad_norm": 6.536702921585937, - "learning_rate": 1.7416230257745624e-05, - "loss": 1.5566, + "epoch": 0.5193448178480655, + "grad_norm": 3.9015538129315464, + "learning_rate": 1.894522834041487e-05, + "loss": 1.2448, "step": 3678 }, { - "epoch": 0.7714405535751729, - "grad_norm": 5.716671615169279, - "learning_rate": 1.741471141570809e-05, - "loss": 1.7709, + "epoch": 0.5194860208980514, + "grad_norm": 4.077228732168723, + "learning_rate": 1.8944546772072487e-05, + "loss": 0.9919, "step": 3679 }, { - "epoch": 0.7716502411407004, - "grad_norm": 5.555081395778038, - "learning_rate": 1.741319219365374e-05, - "loss": 1.6331, + "epoch": 0.5196272239480373, + "grad_norm": 3.925950921261376, + "learning_rate": 1.8943864995862692e-05, + "loss": 1.1329, "step": 3680 }, { - "epoch": 0.7718599287062278, - "grad_norm": 6.750869221208491, - "learning_rate": 1.7411672591660433e-05, - "loss": 1.7384, + "epoch": 0.5197684269980232, + "grad_norm": 4.148283827747259, + "learning_rate": 1.8943183011801346e-05, + "loss": 1.1611, "step": 3681 }, { - "epoch": 0.7720696162717551, - "grad_norm": 6.695160836802208, - "learning_rate": 1.741015260980605e-05, - "loss": 1.7865, + "epoch": 0.519909630048009, + "grad_norm": 3.540684038895232, + "learning_rate": 1.8942500819904285e-05, + "loss": 0.919, "step": 3682 }, { - "epoch": 0.7722793038372825, - "grad_norm": 5.9770800413181036, - "learning_rate": 1.7408632248168492e-05, - "loss": 1.613, + "epoch": 0.5200508330979949, + "grad_norm": 6.981249547133298, + "learning_rate": 1.8941818420187364e-05, + "loss": 1.0309, "step": 3683 }, { - "epoch": 0.7724889914028098, - "grad_norm": 6.793188038358102, - "learning_rate": 1.7407111506825686e-05, - "loss": 1.9104, + "epoch": 0.5201920361479808, + "grad_norm": 3.2245458914995524, + "learning_rate": 1.8941135812666445e-05, + "loss": 0.8724, "step": 3684 }, { - "epoch": 0.7726986789683372, - "grad_norm": 6.325997286887496, - "learning_rate": 1.7405590385855563e-05, - "loss": 1.698, + "epoch": 0.5203332391979667, + "grad_norm": 3.745572757756558, + "learning_rate": 1.8940452997357394e-05, + "loss": 0.9531, "step": 3685 }, { - "epoch": 0.7729083665338645, - "grad_norm": 6.374183867551127, - "learning_rate": 1.7404068885336093e-05, - "loss": 1.9877, + "epoch": 0.5204744422479526, + "grad_norm": 3.777472974555665, + "learning_rate": 1.8939769974276076e-05, + "loss": 0.8905, "step": 3686 }, { - "epoch": 0.7731180540993919, - "grad_norm": 6.552711821913816, - "learning_rate": 1.7402547005345246e-05, - "loss": 1.7765, + "epoch": 0.5206156452979385, + "grad_norm": 4.108352997250015, + "learning_rate": 1.8939086743438363e-05, + "loss": 1.1512, "step": 3687 }, { - "epoch": 0.7733277416649192, - "grad_norm": 6.14122226227425, - "learning_rate": 1.740102474596103e-05, - "loss": 1.7039, + "epoch": 0.5207568483479244, + "grad_norm": 3.9989724187218565, + "learning_rate": 1.8938403304860137e-05, + "loss": 1.0386, "step": 3688 }, { - "epoch": 0.7735374292304467, - "grad_norm": 7.122926686964567, - "learning_rate": 1.7399502107261453e-05, - "loss": 1.6352, + "epoch": 0.5208980513979102, + "grad_norm": 3.5035108379191104, + "learning_rate": 1.8937719658557276e-05, + "loss": 1.1209, "step": 3689 }, { - "epoch": 0.773747116795974, - "grad_norm": 6.355239932185493, - "learning_rate": 1.739797908932457e-05, - "loss": 1.8225, + "epoch": 0.5210392544478961, + "grad_norm": 3.29570684624625, + "learning_rate": 1.893703580454567e-05, + "loss": 0.841, "step": 3690 }, { - "epoch": 0.7739568043615014, - "grad_norm": 6.239632884365097, - "learning_rate": 1.739645569222842e-05, - "loss": 1.8271, + "epoch": 0.521180457497882, + "grad_norm": 2.9166760533203875, + "learning_rate": 1.8936351742841213e-05, + "loss": 0.8326, "step": 3691 }, { - "epoch": 0.7741664919270287, - "grad_norm": 7.116278558799536, - "learning_rate": 1.739493191605109e-05, - "loss": 2.053, + "epoch": 0.5213216605478679, + "grad_norm": 3.593051297734018, + "learning_rate": 1.89356674734598e-05, + "loss": 1.0042, "step": 3692 }, { - "epoch": 0.7743761794925561, - "grad_norm": 6.131043029445476, - "learning_rate": 1.739340776087067e-05, - "loss": 1.7674, + "epoch": 0.5214628635978538, + "grad_norm": 5.735259005849613, + "learning_rate": 1.8934982996417336e-05, + "loss": 1.1838, "step": 3693 }, { - "epoch": 0.7745858670580834, - "grad_norm": 6.052566275181074, - "learning_rate": 1.7391883226765278e-05, - "loss": 1.5088, + "epoch": 0.5216040666478396, + "grad_norm": 3.0227126054085107, + "learning_rate": 1.8934298311729728e-05, + "loss": 0.8901, "step": 3694 }, { - "epoch": 0.7747955546236108, - "grad_norm": 7.494807947346358, - "learning_rate": 1.7390358313813056e-05, - "loss": 1.6442, + "epoch": 0.5217452696978254, + "grad_norm": 3.543966301628358, + "learning_rate": 1.893361341941288e-05, + "loss": 0.9893, "step": 3695 }, { - "epoch": 0.7750052421891381, - "grad_norm": 5.8679602513475855, - "learning_rate": 1.738883302209215e-05, - "loss": 1.9317, + "epoch": 0.5218864727478113, + "grad_norm": 3.271505482749413, + "learning_rate": 1.8932928319482714e-05, + "loss": 0.7027, "step": 3696 }, { - "epoch": 0.7752149297546655, - "grad_norm": 6.314531817242334, - "learning_rate": 1.7387307351680735e-05, - "loss": 1.676, + "epoch": 0.5220276757977972, + "grad_norm": 4.9295632675804075, + "learning_rate": 1.8932243011955154e-05, + "loss": 1.1269, "step": 3697 }, { - "epoch": 0.775424617320193, - "grad_norm": 6.215495584880176, - "learning_rate": 1.7385781302657005e-05, - "loss": 2.1957, + "epoch": 0.5221688788477831, + "grad_norm": 4.914879796632518, + "learning_rate": 1.8931557496846124e-05, + "loss": 1.2371, "step": 3698 }, { - "epoch": 0.7756343048857203, - "grad_norm": 6.845501418141876, - "learning_rate": 1.7384254875099177e-05, - "loss": 2.0808, + "epoch": 0.522310081897769, + "grad_norm": 3.8073822826407855, + "learning_rate": 1.8930871774171555e-05, + "loss": 0.9767, "step": 3699 }, { - "epoch": 0.7758439924512477, - "grad_norm": 6.139472042533223, - "learning_rate": 1.7382728069085476e-05, - "loss": 1.8114, + "epoch": 0.5224512849477548, + "grad_norm": 3.4674787161621303, + "learning_rate": 1.8930185843947382e-05, + "loss": 0.9508, "step": 3700 }, { - "epoch": 0.776053680016775, - "grad_norm": 5.8290804514132795, - "learning_rate": 1.738120088469416e-05, - "loss": 2.2186, + "epoch": 0.5225924879977407, + "grad_norm": 3.5433596983422033, + "learning_rate": 1.892949970618955e-05, + "loss": 0.9219, "step": 3701 }, { - "epoch": 0.7762633675823024, - "grad_norm": 5.486323133570247, - "learning_rate": 1.7379673322003493e-05, - "loss": 1.6502, + "epoch": 0.5227336910477266, + "grad_norm": 3.534829723048114, + "learning_rate": 1.8928813360914e-05, + "loss": 0.9747, "step": 3702 }, { - "epoch": 0.7764730551478297, - "grad_norm": 5.420312803535227, - "learning_rate": 1.737814538109177e-05, - "loss": 1.5772, + "epoch": 0.5228748940977125, + "grad_norm": 4.220976375918456, + "learning_rate": 1.892812680813668e-05, + "loss": 1.0705, "step": 3703 }, { - "epoch": 0.7766827427133571, - "grad_norm": 5.707300103941764, - "learning_rate": 1.7376617062037298e-05, - "loss": 2.0236, + "epoch": 0.5230160971476984, + "grad_norm": 3.218765265829235, + "learning_rate": 1.892744004787355e-05, + "loss": 0.8724, "step": 3704 }, { - "epoch": 0.7768924302788844, - "grad_norm": 8.624140557111666, - "learning_rate": 1.7375088364918412e-05, - "loss": 1.8287, + "epoch": 0.5231573001976842, + "grad_norm": 3.9482994667748335, + "learning_rate": 1.892675308014057e-05, + "loss": 1.0468, "step": 3705 }, { - "epoch": 0.7771021178444119, - "grad_norm": 6.016758332069924, - "learning_rate": 1.737355928981345e-05, - "loss": 1.8598, + "epoch": 0.5232985032476701, + "grad_norm": 4.398453109970903, + "learning_rate": 1.8926065904953703e-05, + "loss": 1.1235, "step": 3706 }, { - "epoch": 0.7773118054099392, - "grad_norm": 5.197530421822921, - "learning_rate": 1.737202983680079e-05, - "loss": 1.7728, + "epoch": 0.523439706297656, + "grad_norm": 3.7671666690079078, + "learning_rate": 1.8925378522328918e-05, + "loss": 0.9358, "step": 3707 }, { - "epoch": 0.7775214929754666, - "grad_norm": 7.3863502485668295, - "learning_rate": 1.7370500005958818e-05, - "loss": 1.9952, + "epoch": 0.5235809093476419, + "grad_norm": 3.692662359522412, + "learning_rate": 1.8924690932282193e-05, + "loss": 0.9935, "step": 3708 }, { - "epoch": 0.7777311805409939, - "grad_norm": 5.797472213560914, - "learning_rate": 1.736896979736593e-05, - "loss": 1.9066, + "epoch": 0.5237221123976278, + "grad_norm": 4.149845068508517, + "learning_rate": 1.8924003134829504e-05, + "loss": 1.1628, "step": 3709 }, { - "epoch": 0.7779408681065213, - "grad_norm": 5.548546574467694, - "learning_rate": 1.7367439211100562e-05, - "loss": 1.787, + "epoch": 0.5238633154476137, + "grad_norm": 3.477017695137049, + "learning_rate": 1.8923315129986838e-05, + "loss": 1.0132, "step": 3710 }, { - "epoch": 0.7781505556720486, - "grad_norm": 6.678654679124065, - "learning_rate": 1.7365908247241156e-05, - "loss": 1.8541, + "epoch": 0.5240045184975995, + "grad_norm": 3.8890350611954827, + "learning_rate": 1.8922626917770178e-05, + "loss": 1.1258, "step": 3711 }, { - "epoch": 0.778360243237576, - "grad_norm": 5.937774431010136, - "learning_rate": 1.736437690586618e-05, - "loss": 1.823, + "epoch": 0.5241457215475854, + "grad_norm": 3.405094231212233, + "learning_rate": 1.8921938498195523e-05, + "loss": 1.0686, "step": 3712 }, { - "epoch": 0.7785699308031033, - "grad_norm": 5.77062241835383, - "learning_rate": 1.736284518705411e-05, - "loss": 1.8848, + "epoch": 0.5242869245975713, + "grad_norm": 3.883819331479461, + "learning_rate": 1.8921249871278874e-05, + "loss": 1.1591, "step": 3713 }, { - "epoch": 0.7787796183686307, - "grad_norm": 6.066334595045192, - "learning_rate": 1.7361313090883455e-05, - "loss": 1.7502, + "epoch": 0.5244281276475572, + "grad_norm": 3.334312005637295, + "learning_rate": 1.892056103703623e-05, + "loss": 0.9147, "step": 3714 }, { - "epoch": 0.778989305934158, - "grad_norm": 5.94859261906079, - "learning_rate": 1.7359780617432735e-05, - "loss": 1.7828, + "epoch": 0.5245693306975431, + "grad_norm": 4.277839352820532, + "learning_rate": 1.8919871995483595e-05, + "loss": 1.0862, "step": 3715 }, { - "epoch": 0.7791989934996855, - "grad_norm": 5.674002493380154, - "learning_rate": 1.7358247766780497e-05, - "loss": 1.9576, + "epoch": 0.524710533747529, + "grad_norm": 3.7651505289899068, + "learning_rate": 1.891918274663699e-05, + "loss": 1.0548, "step": 3716 }, { - "epoch": 0.7794086810652129, - "grad_norm": 6.161203207592451, - "learning_rate": 1.7356714539005297e-05, - "loss": 1.8563, + "epoch": 0.5248517367975148, + "grad_norm": 4.06183280578874, + "learning_rate": 1.8918493290512432e-05, + "loss": 1.2302, "step": 3717 }, { - "epoch": 0.7796183686307402, - "grad_norm": 6.422434939327867, - "learning_rate": 1.7355180934185717e-05, - "loss": 1.89, + "epoch": 0.5249929398475007, + "grad_norm": 4.7193096967857775, + "learning_rate": 1.891780362712594e-05, + "loss": 1.3575, "step": 3718 }, { - "epoch": 0.7798280561962676, - "grad_norm": 6.316184690673304, - "learning_rate": 1.735364695240036e-05, - "loss": 1.9439, + "epoch": 0.5251341428974866, + "grad_norm": 4.056500474461471, + "learning_rate": 1.8917113756493542e-05, + "loss": 1.2785, "step": 3719 }, { - "epoch": 0.7800377437617949, - "grad_norm": 6.434339532347986, - "learning_rate": 1.7352112593727838e-05, - "loss": 1.7838, + "epoch": 0.5252753459474725, + "grad_norm": 3.6656774541120503, + "learning_rate": 1.891642367863127e-05, + "loss": 1.0496, "step": 3720 }, { - "epoch": 0.7802474313273223, - "grad_norm": 6.067686122163278, - "learning_rate": 1.7350577858246796e-05, - "loss": 1.7625, + "epoch": 0.5254165489974584, + "grad_norm": 3.427047669738966, + "learning_rate": 1.8915733393555166e-05, + "loss": 1.0009, "step": 3721 }, { - "epoch": 0.7804571188928496, - "grad_norm": 5.60892959814783, - "learning_rate": 1.734904274603589e-05, - "loss": 1.818, + "epoch": 0.5255577520474443, + "grad_norm": 3.2383427100618047, + "learning_rate": 1.891504290128127e-05, + "loss": 0.9051, "step": 3722 }, { - "epoch": 0.780666806458377, - "grad_norm": 5.462586681528775, - "learning_rate": 1.7347507257173797e-05, - "loss": 1.9281, + "epoch": 0.5256989550974301, + "grad_norm": 2.821517886805279, + "learning_rate": 1.8914352201825622e-05, + "loss": 0.7275, "step": 3723 }, { - "epoch": 0.7808764940239044, - "grad_norm": 5.959990016291894, - "learning_rate": 1.7345971391739213e-05, - "loss": 1.4341, + "epoch": 0.525840158147416, + "grad_norm": 3.9920522754610603, + "learning_rate": 1.891366129520428e-05, + "loss": 1.1282, "step": 3724 }, { - "epoch": 0.7810861815894318, - "grad_norm": 5.857288788698317, - "learning_rate": 1.7344435149810857e-05, - "loss": 1.8696, + "epoch": 0.5259813611974019, + "grad_norm": 3.8416653054336467, + "learning_rate": 1.89129701814333e-05, + "loss": 1.3407, "step": 3725 }, { - "epoch": 0.7812958691549591, - "grad_norm": 5.7259854691807615, - "learning_rate": 1.734289853146746e-05, - "loss": 1.9614, + "epoch": 0.5261225642473878, + "grad_norm": 3.5217875546557735, + "learning_rate": 1.8912278860528742e-05, + "loss": 1.048, "step": 3726 }, { - "epoch": 0.7815055567204865, - "grad_norm": 6.366311799426714, - "learning_rate": 1.7341361536787774e-05, - "loss": 1.8454, + "epoch": 0.5262637672973737, + "grad_norm": 3.6915822280847763, + "learning_rate": 1.8911587332506674e-05, + "loss": 1.0335, "step": 3727 }, { - "epoch": 0.7817152442860138, - "grad_norm": 6.5359142095354645, - "learning_rate": 1.733982416585058e-05, - "loss": 2.0712, + "epoch": 0.5264049703473596, + "grad_norm": 4.565950824037857, + "learning_rate": 1.891089559738316e-05, + "loss": 1.0954, "step": 3728 }, { - "epoch": 0.7819249318515412, - "grad_norm": 7.403335527495739, - "learning_rate": 1.7338286418734666e-05, - "loss": 1.7362, + "epoch": 0.5265461733973453, + "grad_norm": 5.255165768442272, + "learning_rate": 1.8910203655174285e-05, + "loss": 1.3427, "step": 3729 }, { - "epoch": 0.7821346194170685, - "grad_norm": 6.759457254223462, - "learning_rate": 1.7336748295518846e-05, - "loss": 1.5543, + "epoch": 0.5266873764473312, + "grad_norm": 3.5944402451511572, + "learning_rate": 1.8909511505896122e-05, + "loss": 0.9757, "step": 3730 }, { - "epoch": 0.7823443069825959, - "grad_norm": 5.809230554526833, - "learning_rate": 1.733520979628195e-05, - "loss": 1.8014, + "epoch": 0.5268285794973171, + "grad_norm": 3.621901786560854, + "learning_rate": 1.8908819149564764e-05, + "loss": 1.0628, "step": 3731 }, { - "epoch": 0.7825539945481232, - "grad_norm": 6.467669548226693, - "learning_rate": 1.733367092110283e-05, - "loss": 1.6486, + "epoch": 0.526969782547303, + "grad_norm": 3.2584361620380067, + "learning_rate": 1.890812658619629e-05, + "loss": 0.8937, "step": 3732 }, { - "epoch": 0.7827636821136507, - "grad_norm": 6.74810538342474, - "learning_rate": 1.7332131670060356e-05, - "loss": 1.9013, + "epoch": 0.5271109855972889, + "grad_norm": 3.387890035674744, + "learning_rate": 1.8907433815806805e-05, + "loss": 0.9446, "step": 3733 }, { - "epoch": 0.7829733696791781, - "grad_norm": 5.6481619832997865, - "learning_rate": 1.733059204323342e-05, - "loss": 1.5805, + "epoch": 0.5272521886472747, + "grad_norm": 3.6879236404237243, + "learning_rate": 1.8906740838412404e-05, + "loss": 0.8987, "step": 3734 }, { - "epoch": 0.7831830572447054, - "grad_norm": 6.93072857651362, - "learning_rate": 1.732905204070092e-05, - "loss": 2.0013, + "epoch": 0.5273933916972606, + "grad_norm": 4.474668386061455, + "learning_rate": 1.8906047654029196e-05, + "loss": 1.0467, "step": 3735 }, { - "epoch": 0.7833927448102328, - "grad_norm": 7.386761936291472, - "learning_rate": 1.73275116625418e-05, - "loss": 1.8713, + "epoch": 0.5275345947472465, + "grad_norm": 3.5608070420740296, + "learning_rate": 1.890535426267328e-05, + "loss": 1.1148, "step": 3736 }, { - "epoch": 0.7836024323757601, - "grad_norm": 5.721379355546761, - "learning_rate": 1.7325970908834993e-05, - "loss": 1.8693, + "epoch": 0.5276757977972324, + "grad_norm": 3.929317790760646, + "learning_rate": 1.8904660664360784e-05, + "loss": 1.1848, "step": 3737 }, { - "epoch": 0.7838121199412875, - "grad_norm": 6.504881031715918, - "learning_rate": 1.732442977965947e-05, - "loss": 1.7702, + "epoch": 0.5278170008472183, + "grad_norm": 4.145990524836006, + "learning_rate": 1.8903966859107816e-05, + "loss": 0.9434, "step": 3738 }, { - "epoch": 0.7840218075068148, - "grad_norm": 7.277498499545157, - "learning_rate": 1.732288827509422e-05, - "loss": 2.3072, + "epoch": 0.5279582038972042, + "grad_norm": 3.3652632248775367, + "learning_rate": 1.8903272846930503e-05, + "loss": 1.0405, "step": 3739 }, { - "epoch": 0.7842314950723422, - "grad_norm": 7.121452781804273, - "learning_rate": 1.7321346395218243e-05, - "loss": 1.7482, + "epoch": 0.52809940694719, + "grad_norm": 3.700148921884483, + "learning_rate": 1.8902578627844975e-05, + "loss": 1.057, "step": 3740 }, { - "epoch": 0.7844411826378695, - "grad_norm": 6.204376078176178, - "learning_rate": 1.7319804140110567e-05, - "loss": 1.9128, + "epoch": 0.5282406099971759, + "grad_norm": 3.325584290683457, + "learning_rate": 1.8901884201867364e-05, + "loss": 0.924, "step": 3741 }, { - "epoch": 0.784650870203397, - "grad_norm": 6.678226094692333, - "learning_rate": 1.7318261509850232e-05, - "loss": 1.8391, + "epoch": 0.5283818130471618, + "grad_norm": 3.4258733127405856, + "learning_rate": 1.890118956901381e-05, + "loss": 0.9585, "step": 3742 }, { - "epoch": 0.7848605577689243, - "grad_norm": 6.767069065605165, - "learning_rate": 1.7316718504516302e-05, - "loss": 1.8829, + "epoch": 0.5285230160971477, + "grad_norm": 3.5223727081235516, + "learning_rate": 1.8900494729300453e-05, + "loss": 1.0187, "step": 3743 }, { - "epoch": 0.7850702453344517, - "grad_norm": 6.548352483353616, - "learning_rate": 1.731517512418786e-05, - "loss": 1.8328, + "epoch": 0.5286642191471336, + "grad_norm": 4.102496728105654, + "learning_rate": 1.8899799682743442e-05, + "loss": 1.234, "step": 3744 }, { - "epoch": 0.785279932899979, - "grad_norm": 6.359480852574694, - "learning_rate": 1.7313631368944002e-05, - "loss": 1.7449, + "epoch": 0.5288054221971195, + "grad_norm": 3.501410272194689, + "learning_rate": 1.8899104429358932e-05, + "loss": 1.091, "step": 3745 }, { - "epoch": 0.7854896204655064, - "grad_norm": 7.126567059130508, - "learning_rate": 1.731208723886385e-05, - "loss": 2.0378, + "epoch": 0.5289466252471053, + "grad_norm": 3.9053115568756556, + "learning_rate": 1.8898408969163078e-05, + "loss": 0.9744, "step": 3746 }, { - "epoch": 0.7856993080310337, - "grad_norm": 5.497025126889722, - "learning_rate": 1.7310542734026553e-05, - "loss": 1.5895, + "epoch": 0.5290878282970912, + "grad_norm": 3.5696788932511008, + "learning_rate": 1.889771330217204e-05, + "loss": 1.0454, "step": 3747 }, { - "epoch": 0.7859089955965611, - "grad_norm": 5.915108898695909, - "learning_rate": 1.7308997854511257e-05, - "loss": 1.552, + "epoch": 0.5292290313470771, + "grad_norm": 3.5202707605735375, + "learning_rate": 1.889701742840199e-05, + "loss": 1.185, "step": 3748 }, { - "epoch": 0.7861186831620884, - "grad_norm": 5.559981812177856, - "learning_rate": 1.730745260039714e-05, - "loss": 1.8118, + "epoch": 0.529370234397063, + "grad_norm": 4.311411512106789, + "learning_rate": 1.8896321347869094e-05, + "loss": 1.2281, "step": 3749 }, { - "epoch": 0.7863283707276159, - "grad_norm": 7.626200982493386, - "learning_rate": 1.7305906971763407e-05, - "loss": 1.651, + "epoch": 0.5295114374470489, + "grad_norm": 3.7448473654752594, + "learning_rate": 1.8895625060589538e-05, + "loss": 1.0566, "step": 3750 }, { - "epoch": 0.7865380582931433, - "grad_norm": 5.723157443469827, - "learning_rate": 1.730436096868927e-05, - "loss": 1.6888, + "epoch": 0.5296526404970348, + "grad_norm": 3.9009726567209104, + "learning_rate": 1.8894928566579492e-05, + "loss": 1.0038, "step": 3751 }, { - "epoch": 0.7867477458586706, - "grad_norm": 5.5653901139750195, - "learning_rate": 1.7302814591253966e-05, - "loss": 2.141, + "epoch": 0.5297938435470206, + "grad_norm": 4.612467865358213, + "learning_rate": 1.8894231865855152e-05, + "loss": 1.3822, "step": 3752 }, { - "epoch": 0.786957433424198, - "grad_norm": 5.281258842465091, - "learning_rate": 1.7301267839536746e-05, - "loss": 1.5765, + "epoch": 0.5299350465970065, + "grad_norm": 3.9766762670366074, + "learning_rate": 1.88935349584327e-05, + "loss": 1.2327, "step": 3753 }, { - "epoch": 0.7871671209897253, - "grad_norm": 5.661900767812405, - "learning_rate": 1.7299720713616887e-05, - "loss": 1.7015, + "epoch": 0.5300762496469924, + "grad_norm": 4.33365599534029, + "learning_rate": 1.8892837844328338e-05, + "loss": 1.1905, "step": 3754 }, { - "epoch": 0.7873768085552527, - "grad_norm": 6.806577795223069, - "learning_rate": 1.7298173213573682e-05, - "loss": 1.479, + "epoch": 0.5302174526969783, + "grad_norm": 4.475437724934253, + "learning_rate": 1.8892140523558266e-05, + "loss": 1.1966, "step": 3755 }, { - "epoch": 0.78758649612078, - "grad_norm": 5.844847643625029, - "learning_rate": 1.729662533948644e-05, - "loss": 1.8283, + "epoch": 0.5303586557469642, + "grad_norm": 3.6951482705848915, + "learning_rate": 1.8891442996138686e-05, + "loss": 0.9878, "step": 3756 }, { - "epoch": 0.7877961836863074, - "grad_norm": 7.5478040015298715, - "learning_rate": 1.729507709143449e-05, - "loss": 2.0444, + "epoch": 0.53049985879695, + "grad_norm": 5.005988774696322, + "learning_rate": 1.8890745262085812e-05, + "loss": 1.1872, "step": 3757 }, { - "epoch": 0.7880058712518347, - "grad_norm": 5.697051218338185, - "learning_rate": 1.7293528469497193e-05, - "loss": 1.5551, + "epoch": 0.5306410618469359, + "grad_norm": 3.533546684299308, + "learning_rate": 1.8890047321415856e-05, + "loss": 0.8394, "step": 3758 }, { - "epoch": 0.7882155588173622, - "grad_norm": 7.272403623970555, - "learning_rate": 1.729197947375391e-05, - "loss": 1.9502, + "epoch": 0.5307822648969218, + "grad_norm": 3.0198817360690224, + "learning_rate": 1.8889349174145044e-05, + "loss": 0.7916, "step": 3759 }, { - "epoch": 0.7884252463828895, - "grad_norm": 7.690605762464217, - "learning_rate": 1.7290430104284032e-05, - "loss": 1.729, + "epoch": 0.5309234679469077, + "grad_norm": 4.366564004778834, + "learning_rate": 1.8888650820289594e-05, + "loss": 1.2799, "step": 3760 }, { - "epoch": 0.7886349339484169, - "grad_norm": 6.282304951578839, - "learning_rate": 1.7288880361166963e-05, - "loss": 1.5239, + "epoch": 0.5310646709968936, + "grad_norm": 4.459936006808013, + "learning_rate": 1.8887952259865735e-05, + "loss": 1.1025, "step": 3761 }, { - "epoch": 0.7888446215139442, - "grad_norm": 5.372825395608513, - "learning_rate": 1.7287330244482137e-05, - "loss": 1.3797, + "epoch": 0.5312058740468795, + "grad_norm": 3.9761443966282473, + "learning_rate": 1.8887253492889708e-05, + "loss": 1.0603, "step": 3762 }, { - "epoch": 0.7890543090794716, - "grad_norm": 6.700864206515308, - "learning_rate": 1.7285779754308995e-05, - "loss": 2.0115, + "epoch": 0.5313470770968652, + "grad_norm": 3.8201912012924124, + "learning_rate": 1.8886554519377744e-05, + "loss": 1.0783, "step": 3763 }, { - "epoch": 0.7892639966449989, - "grad_norm": 6.475946005773212, - "learning_rate": 1.7284228890727006e-05, - "loss": 1.6781, + "epoch": 0.5314882801468511, + "grad_norm": 3.990610110117607, + "learning_rate": 1.8885855339346097e-05, + "loss": 1.3363, "step": 3764 }, { - "epoch": 0.7894736842105263, - "grad_norm": 6.967050518608623, - "learning_rate": 1.7282677653815652e-05, - "loss": 1.5084, + "epoch": 0.531629483196837, + "grad_norm": 3.7372920390115505, + "learning_rate": 1.8885155952811e-05, + "loss": 0.9556, "step": 3765 }, { - "epoch": 0.7896833717760536, - "grad_norm": 5.3013809157615635, - "learning_rate": 1.7281126043654436e-05, - "loss": 1.7113, + "epoch": 0.5317706862468229, + "grad_norm": 5.665162005196365, + "learning_rate": 1.8884456359788725e-05, + "loss": 0.8942, "step": 3766 }, { - "epoch": 0.789893059341581, - "grad_norm": 6.502806611506876, - "learning_rate": 1.727957406032288e-05, - "loss": 1.8401, + "epoch": 0.5319118892968088, + "grad_norm": 3.572809254306325, + "learning_rate": 1.8883756560295517e-05, + "loss": 0.9111, "step": 3767 }, { - "epoch": 0.7901027469071084, - "grad_norm": 5.561432351562957, - "learning_rate": 1.727802170390053e-05, - "loss": 1.3767, + "epoch": 0.5320530923467947, + "grad_norm": 3.6545047638360697, + "learning_rate": 1.8883056554347643e-05, + "loss": 1.0559, "step": 3768 }, { - "epoch": 0.7903124344726358, - "grad_norm": 6.774195578611065, - "learning_rate": 1.7276468974466946e-05, - "loss": 1.761, + "epoch": 0.5321942953967805, + "grad_norm": 4.081169121234149, + "learning_rate": 1.8882356341961374e-05, + "loss": 1.102, "step": 3769 }, { - "epoch": 0.7905221220381632, - "grad_norm": 7.25839027326519, - "learning_rate": 1.7274915872101704e-05, - "loss": 1.871, + "epoch": 0.5323354984467664, + "grad_norm": 3.543406279372913, + "learning_rate": 1.8881655923152975e-05, + "loss": 0.906, "step": 3770 }, { - "epoch": 0.7907318096036905, - "grad_norm": 7.185594100387391, - "learning_rate": 1.7273362396884405e-05, - "loss": 2.09, + "epoch": 0.5324767014967523, + "grad_norm": 3.9266551932895135, + "learning_rate": 1.888095529793873e-05, + "loss": 0.9235, "step": 3771 }, { - "epoch": 0.7909414971692179, - "grad_norm": 7.020451992151008, - "learning_rate": 1.727180854889467e-05, - "loss": 1.8139, + "epoch": 0.5326179045467382, + "grad_norm": 3.265047355417172, + "learning_rate": 1.888025446633492e-05, + "loss": 0.8209, "step": 3772 }, { - "epoch": 0.7911511847347452, - "grad_norm": 7.6327971279649365, - "learning_rate": 1.727025432821214e-05, - "loss": 1.8714, + "epoch": 0.5327591075967241, + "grad_norm": 3.963005410213608, + "learning_rate": 1.8879553428357832e-05, + "loss": 1.1074, "step": 3773 }, { - "epoch": 0.7913608723002726, - "grad_norm": 5.8341109856249895, - "learning_rate": 1.7268699734916458e-05, - "loss": 1.833, + "epoch": 0.53290031064671, + "grad_norm": 3.4930562886484897, + "learning_rate": 1.8878852184023754e-05, + "loss": 0.9966, "step": 3774 }, { - "epoch": 0.7915705598657999, - "grad_norm": 7.177776879424799, - "learning_rate": 1.7267144769087308e-05, - "loss": 1.8145, + "epoch": 0.5330415136966958, + "grad_norm": 3.6004359110176174, + "learning_rate": 1.8878150733348988e-05, + "loss": 1.0806, "step": 3775 }, { - "epoch": 0.7917802474313274, - "grad_norm": 6.312583535356928, - "learning_rate": 1.726558943080439e-05, - "loss": 1.8524, + "epoch": 0.5331827167466817, + "grad_norm": 3.3870535403913506, + "learning_rate": 1.8877449076349833e-05, + "loss": 0.9993, "step": 3776 }, { - "epoch": 0.7919899349968547, - "grad_norm": 6.73636148457771, - "learning_rate": 1.7264033720147408e-05, - "loss": 1.8153, + "epoch": 0.5333239197966676, + "grad_norm": 4.6838007969239905, + "learning_rate": 1.8876747213042593e-05, + "loss": 1.3216, "step": 3777 }, { - "epoch": 0.7921996225623821, - "grad_norm": 6.9189433647696825, - "learning_rate": 1.7262477637196096e-05, - "loss": 1.8173, + "epoch": 0.5334651228466535, + "grad_norm": 3.514406427630022, + "learning_rate": 1.8876045143443583e-05, + "loss": 0.9919, "step": 3778 }, { - "epoch": 0.7924093101279094, - "grad_norm": 5.790322560463186, - "learning_rate": 1.7260921182030216e-05, - "loss": 2.053, + "epoch": 0.5336063258966394, + "grad_norm": 3.735516494113757, + "learning_rate": 1.887534286756912e-05, + "loss": 0.9415, "step": 3779 }, { - "epoch": 0.7926189976934368, - "grad_norm": 6.389957297059367, - "learning_rate": 1.725936435472953e-05, - "loss": 1.7552, + "epoch": 0.5337475289466252, + "grad_norm": 3.544848413726788, + "learning_rate": 1.8874640385435515e-05, + "loss": 1.0803, "step": 3780 }, { - "epoch": 0.7928286852589641, - "grad_norm": 6.314598199232086, - "learning_rate": 1.7257807155373827e-05, - "loss": 1.8706, + "epoch": 0.5338887319966111, + "grad_norm": 4.4592700704814785, + "learning_rate": 1.8873937697059106e-05, + "loss": 1.1805, "step": 3781 }, { - "epoch": 0.7930383728244915, - "grad_norm": 5.787078024360537, - "learning_rate": 1.7256249584042922e-05, - "loss": 1.7596, + "epoch": 0.534029935046597, + "grad_norm": 4.485667861004511, + "learning_rate": 1.8873234802456216e-05, + "loss": 1.1603, "step": 3782 }, { - "epoch": 0.7932480603900188, - "grad_norm": 6.425191335934589, - "learning_rate": 1.7254691640816642e-05, - "loss": 1.6373, + "epoch": 0.5341711380965829, + "grad_norm": 3.140248543462782, + "learning_rate": 1.887253170164318e-05, + "loss": 0.8463, "step": 3783 }, { - "epoch": 0.7934577479555462, - "grad_norm": 6.39785336382828, - "learning_rate": 1.725313332577483e-05, - "loss": 1.8341, + "epoch": 0.5343123411465688, + "grad_norm": 3.4827840208249814, + "learning_rate": 1.887182839463634e-05, + "loss": 0.9549, "step": 3784 }, { - "epoch": 0.7936674355210735, - "grad_norm": 6.318111129730518, - "learning_rate": 1.7251574638997354e-05, - "loss": 2.1615, + "epoch": 0.5344535441965547, + "grad_norm": 3.810842862002668, + "learning_rate": 1.887112488145204e-05, + "loss": 0.9456, "step": 3785 }, { - "epoch": 0.793877123086601, - "grad_norm": 6.1883330846969855, - "learning_rate": 1.7250015580564104e-05, - "loss": 1.8955, + "epoch": 0.5345947472465405, + "grad_norm": 3.185679454074993, + "learning_rate": 1.8870421162106628e-05, + "loss": 0.7847, "step": 3786 }, { - "epoch": 0.7940868106521284, - "grad_norm": 6.750690227888648, - "learning_rate": 1.724845615055498e-05, - "loss": 1.671, + "epoch": 0.5347359502965264, + "grad_norm": 3.685949497467339, + "learning_rate": 1.886971723661646e-05, + "loss": 1.0316, "step": 3787 }, { - "epoch": 0.7942964982176557, - "grad_norm": 6.535233279981005, - "learning_rate": 1.7246896349049902e-05, - "loss": 1.8024, + "epoch": 0.5348771533465123, + "grad_norm": 4.831736445542316, + "learning_rate": 1.8869013104997896e-05, + "loss": 1.4054, "step": 3788 }, { - "epoch": 0.7945061857831831, - "grad_norm": 6.772774932749703, - "learning_rate": 1.7245336176128823e-05, - "loss": 2.1947, + "epoch": 0.5350183563964982, + "grad_norm": 4.617643201035467, + "learning_rate": 1.8868308767267294e-05, + "loss": 1.2344, "step": 3789 }, { - "epoch": 0.7947158733487104, - "grad_norm": 6.2559571036721415, - "learning_rate": 1.7243775631871695e-05, - "loss": 1.9779, + "epoch": 0.5351595594464841, + "grad_norm": 3.55219316513033, + "learning_rate": 1.8867604223441027e-05, + "loss": 1.0954, "step": 3790 }, { - "epoch": 0.7949255609142378, - "grad_norm": 5.986316117092616, - "learning_rate": 1.7242214716358505e-05, - "loss": 1.8951, + "epoch": 0.53530076249647, + "grad_norm": 3.4017549632117157, + "learning_rate": 1.8866899473535464e-05, + "loss": 0.8748, "step": 3791 }, { - "epoch": 0.7951352484797651, - "grad_norm": 6.613841955465014, - "learning_rate": 1.7240653429669247e-05, - "loss": 1.6175, + "epoch": 0.5354419655464558, + "grad_norm": 3.6911470744908303, + "learning_rate": 1.8866194517566993e-05, + "loss": 0.9162, "step": 3792 }, { - "epoch": 0.7953449360452925, - "grad_norm": 5.713812453568993, - "learning_rate": 1.723909177188394e-05, - "loss": 1.877, + "epoch": 0.5355831685964417, + "grad_norm": 4.577046268320204, + "learning_rate": 1.8865489355551987e-05, + "loss": 1.1754, "step": 3793 }, { - "epoch": 0.7955546236108199, - "grad_norm": 6.0688181772144505, - "learning_rate": 1.7237529743082633e-05, - "loss": 1.5416, + "epoch": 0.5357243716464276, + "grad_norm": 4.098129047252893, + "learning_rate": 1.886478398750684e-05, + "loss": 1.3211, "step": 3794 }, { - "epoch": 0.7957643111763473, - "grad_norm": 6.072981791258866, - "learning_rate": 1.7235967343345366e-05, - "loss": 1.5986, + "epoch": 0.5358655746964135, + "grad_norm": 3.8419376047470495, + "learning_rate": 1.8864078413447936e-05, + "loss": 1.1913, "step": 3795 }, { - "epoch": 0.7959739987418746, - "grad_norm": 6.063548487148982, - "learning_rate": 1.7234404572752227e-05, - "loss": 1.942, + "epoch": 0.5360067777463994, + "grad_norm": 4.16435459236593, + "learning_rate": 1.886337263339168e-05, + "loss": 1.1394, "step": 3796 }, { - "epoch": 0.796183686307402, - "grad_norm": 6.4782408324516165, - "learning_rate": 1.7232841431383303e-05, - "loss": 1.9494, + "epoch": 0.5361479807963851, + "grad_norm": 3.474837498994005, + "learning_rate": 1.8862666647354476e-05, + "loss": 0.9907, "step": 3797 }, { - "epoch": 0.7963933738729293, - "grad_norm": 6.003525252312476, - "learning_rate": 1.723127791931871e-05, - "loss": 1.8673, + "epoch": 0.536289183846371, + "grad_norm": 3.827259108160972, + "learning_rate": 1.8861960455352723e-05, + "loss": 1.112, "step": 3798 }, { - "epoch": 0.7966030614384567, - "grad_norm": 6.030061542015949, - "learning_rate": 1.7229714036638587e-05, - "loss": 1.6838, + "epoch": 0.5364303868963569, + "grad_norm": 3.802314808632533, + "learning_rate": 1.8861254057402836e-05, + "loss": 1.1161, "step": 3799 }, { - "epoch": 0.796812749003984, - "grad_norm": 6.674114090550902, - "learning_rate": 1.7228149783423076e-05, - "loss": 1.4886, + "epoch": 0.5365715899463428, + "grad_norm": 4.0700284502483495, + "learning_rate": 1.8860547453521232e-05, + "loss": 1.1049, "step": 3800 }, { - "epoch": 0.7970224365695114, - "grad_norm": 6.668637716461723, - "learning_rate": 1.7226585159752353e-05, - "loss": 1.8793, + "epoch": 0.5367127929963287, + "grad_norm": 4.19886670423054, + "learning_rate": 1.8859840643724333e-05, + "loss": 0.9153, "step": 3801 }, { - "epoch": 0.7972321241350387, - "grad_norm": 5.416851967880576, - "learning_rate": 1.7225020165706604e-05, - "loss": 1.8698, + "epoch": 0.5368539960463146, + "grad_norm": 3.8601149461189697, + "learning_rate": 1.8859133628028564e-05, + "loss": 0.8878, "step": 3802 }, { - "epoch": 0.7974418117005662, - "grad_norm": 6.978915757041792, - "learning_rate": 1.7223454801366046e-05, - "loss": 1.7246, + "epoch": 0.5369951990963004, + "grad_norm": 3.6688535772665247, + "learning_rate": 1.8858426406450352e-05, + "loss": 0.9219, "step": 3803 }, { - "epoch": 0.7976514992660935, - "grad_norm": 6.571909494372986, - "learning_rate": 1.72218890668109e-05, - "loss": 2.016, + "epoch": 0.5371364021462863, + "grad_norm": 4.157277899248749, + "learning_rate": 1.8857718979006135e-05, + "loss": 1.0764, "step": 3804 }, { - "epoch": 0.7978611868316209, - "grad_norm": 6.892446555062564, - "learning_rate": 1.722032296212141e-05, - "loss": 1.7742, + "epoch": 0.5372776051962722, + "grad_norm": 3.7108899977284655, + "learning_rate": 1.8857011345712363e-05, + "loss": 0.9823, "step": 3805 }, { - "epoch": 0.7980708743971483, - "grad_norm": 7.083615288691697, - "learning_rate": 1.7218756487377847e-05, - "loss": 1.7427, + "epoch": 0.5374188082462581, + "grad_norm": 3.9004438884537516, + "learning_rate": 1.885630350658546e-05, + "loss": 1.2722, "step": 3806 }, { - "epoch": 0.7982805619626756, - "grad_norm": 6.218556958226318, - "learning_rate": 1.7217189642660494e-05, - "loss": 1.6799, + "epoch": 0.537560011296244, + "grad_norm": 3.728219225623841, + "learning_rate": 1.8855595461641897e-05, + "loss": 1.0473, "step": 3807 }, { - "epoch": 0.798490249528203, - "grad_norm": 5.886471881923572, - "learning_rate": 1.7215622428049654e-05, - "loss": 1.7002, + "epoch": 0.5377012143462299, + "grad_norm": 3.881517723387876, + "learning_rate": 1.885488721089812e-05, + "loss": 1.2276, "step": 3808 }, { - "epoch": 0.7986999370937303, - "grad_norm": 6.745564840928233, - "learning_rate": 1.7214054843625648e-05, - "loss": 1.941, + "epoch": 0.5378424173962157, + "grad_norm": 3.673150938176506, + "learning_rate": 1.8854178754370585e-05, + "loss": 0.9423, "step": 3809 }, { - "epoch": 0.7989096246592577, - "grad_norm": 5.579330799862577, - "learning_rate": 1.721248688946882e-05, - "loss": 1.2416, + "epoch": 0.5379836204462016, + "grad_norm": 3.761399395682253, + "learning_rate": 1.885347009207576e-05, + "loss": 1.1541, "step": 3810 }, { - "epoch": 0.799119312224785, - "grad_norm": 6.8048380758167655, - "learning_rate": 1.721091856565953e-05, - "loss": 1.8795, + "epoch": 0.5381248234961875, + "grad_norm": 3.5988274423545663, + "learning_rate": 1.8852761224030115e-05, + "loss": 0.9223, "step": 3811 }, { - "epoch": 0.7993289997903125, - "grad_norm": 5.601752762941683, - "learning_rate": 1.720934987227816e-05, - "loss": 1.7188, + "epoch": 0.5382660265461734, + "grad_norm": 5.293686764886406, + "learning_rate": 1.8852052150250123e-05, + "loss": 1.3332, "step": 3812 }, { - "epoch": 0.7995386873558398, - "grad_norm": 6.9495853592189265, - "learning_rate": 1.72077808094051e-05, - "loss": 1.843, + "epoch": 0.5384072295961593, + "grad_norm": 3.3795443651037074, + "learning_rate": 1.885134287075226e-05, + "loss": 1.0237, "step": 3813 }, { - "epoch": 0.7997483749213672, - "grad_norm": 5.690448920199598, - "learning_rate": 1.7206211377120774e-05, - "loss": 1.7054, + "epoch": 0.5385484326461452, + "grad_norm": 3.454811603393523, + "learning_rate": 1.885063338555301e-05, + "loss": 0.8723, "step": 3814 }, { - "epoch": 0.7999580624868945, - "grad_norm": 5.356620538465669, - "learning_rate": 1.720464157550562e-05, - "loss": 1.5814, + "epoch": 0.538689635696131, + "grad_norm": 4.496403200579583, + "learning_rate": 1.8849923694668864e-05, + "loss": 1.1199, "step": 3815 }, { - "epoch": 0.8001677500524219, - "grad_norm": 7.624145202548761, - "learning_rate": 1.7203071404640082e-05, - "loss": 1.7814, + "epoch": 0.5388308387461169, + "grad_norm": 4.651912270899583, + "learning_rate": 1.8849213798116318e-05, + "loss": 1.1843, "step": 3816 }, { - "epoch": 0.8003774376179492, - "grad_norm": 6.451469813819988, - "learning_rate": 1.7201500864604647e-05, - "loss": 1.5567, + "epoch": 0.5389720417961028, + "grad_norm": 3.6081589923719237, + "learning_rate": 1.884850369591186e-05, + "loss": 0.936, "step": 3817 }, { - "epoch": 0.8005871251834766, - "grad_norm": 6.022100448953644, - "learning_rate": 1.7199929955479802e-05, - "loss": 1.8386, + "epoch": 0.5391132448460887, + "grad_norm": 3.4993076219248125, + "learning_rate": 1.8847793388071997e-05, + "loss": 1.0451, "step": 3818 }, { - "epoch": 0.8007968127490039, - "grad_norm": 7.464575530393103, - "learning_rate": 1.7198358677346057e-05, - "loss": 1.7831, + "epoch": 0.5392544478960746, + "grad_norm": 4.2185681753978805, + "learning_rate": 1.884708287461324e-05, + "loss": 1.1652, "step": 3819 }, { - "epoch": 0.8010065003145314, - "grad_norm": 6.10489966943075, - "learning_rate": 1.7196787030283947e-05, - "loss": 1.7321, + "epoch": 0.5393956509460605, + "grad_norm": 4.156323819689551, + "learning_rate": 1.8846372155552095e-05, + "loss": 1.1968, "step": 3820 }, { - "epoch": 0.8012161878800587, - "grad_norm": 6.857979364748703, - "learning_rate": 1.719521501437402e-05, - "loss": 2.0479, + "epoch": 0.5395368539960463, + "grad_norm": 4.441259517968104, + "learning_rate": 1.8845661230905083e-05, + "loss": 1.1527, "step": 3821 }, { - "epoch": 0.8014258754455861, - "grad_norm": 6.66497312338493, - "learning_rate": 1.719364262969684e-05, - "loss": 1.6613, + "epoch": 0.5396780570460322, + "grad_norm": 3.5676195338518486, + "learning_rate": 1.884495010068872e-05, + "loss": 0.9997, "step": 3822 }, { - "epoch": 0.8016355630111135, - "grad_norm": 6.7263985000781386, - "learning_rate": 1.7192069876333002e-05, - "loss": 1.7643, + "epoch": 0.5398192600960181, + "grad_norm": 4.184453637252857, + "learning_rate": 1.8844238764919543e-05, + "loss": 1.0955, "step": 3823 }, { - "epoch": 0.8018452505766408, - "grad_norm": 5.851714349019155, - "learning_rate": 1.7190496754363108e-05, - "loss": 1.7006, + "epoch": 0.539960463146004, + "grad_norm": 4.353390440911992, + "learning_rate": 1.884352722361407e-05, + "loss": 1.2126, "step": 3824 }, { - "epoch": 0.8020549381421682, - "grad_norm": 6.070106510216911, - "learning_rate": 1.7188923263867782e-05, - "loss": 1.7015, + "epoch": 0.5401016661959899, + "grad_norm": 3.630989189050421, + "learning_rate": 1.884281547678885e-05, + "loss": 1.1242, "step": 3825 }, { - "epoch": 0.8022646257076955, - "grad_norm": 6.589466145770719, - "learning_rate": 1.7187349404927673e-05, - "loss": 1.602, + "epoch": 0.5402428692459758, + "grad_norm": 3.737872738951251, + "learning_rate": 1.8842103524460414e-05, + "loss": 1.0109, "step": 3826 }, { - "epoch": 0.8024743132732229, - "grad_norm": 6.712943345196664, - "learning_rate": 1.718577517762344e-05, - "loss": 1.7956, + "epoch": 0.5403840722959616, + "grad_norm": 3.6222416928471453, + "learning_rate": 1.884139136664531e-05, + "loss": 0.8416, "step": 3827 }, { - "epoch": 0.8026840008387502, - "grad_norm": 6.534884201755805, - "learning_rate": 1.7184200582035766e-05, - "loss": 1.8752, + "epoch": 0.5405252753459475, + "grad_norm": 4.324389445276633, + "learning_rate": 1.8840679003360088e-05, + "loss": 0.9096, "step": 3828 }, { - "epoch": 0.8028936884042777, - "grad_norm": 7.079045462705081, - "learning_rate": 1.7182625618245352e-05, - "loss": 1.7322, + "epoch": 0.5406664783959334, + "grad_norm": 3.7562904897280163, + "learning_rate": 1.883996643462131e-05, + "loss": 0.9866, "step": 3829 }, { - "epoch": 0.803103375969805, - "grad_norm": 6.346393789780253, - "learning_rate": 1.7181050286332915e-05, - "loss": 1.5294, + "epoch": 0.5408076814459193, + "grad_norm": 3.358237835679867, + "learning_rate": 1.8839253660445523e-05, + "loss": 0.8707, "step": 3830 }, { - "epoch": 0.8033130635353324, - "grad_norm": 6.867774565332408, - "learning_rate": 1.7179474586379198e-05, - "loss": 1.7358, + "epoch": 0.5409488844959051, + "grad_norm": 3.269886142190921, + "learning_rate": 1.8838540680849303e-05, + "loss": 0.9054, "step": 3831 }, { - "epoch": 0.8035227511008597, - "grad_norm": 6.2550411729918824, - "learning_rate": 1.717789851846496e-05, - "loss": 1.7556, + "epoch": 0.5410900875458909, + "grad_norm": 4.0656969369704905, + "learning_rate": 1.883782749584921e-05, + "loss": 1.2079, "step": 3832 }, { - "epoch": 0.8037324386663871, - "grad_norm": 7.030671005251954, - "learning_rate": 1.7176322082670968e-05, - "loss": 1.886, + "epoch": 0.5412312905958768, + "grad_norm": 3.5265498799610757, + "learning_rate": 1.8837114105461827e-05, + "loss": 1.0484, "step": 3833 }, { - "epoch": 0.8039421262319144, - "grad_norm": 5.85199752738826, - "learning_rate": 1.7174745279078027e-05, - "loss": 1.8604, + "epoch": 0.5413724936458627, + "grad_norm": 4.024059228559577, + "learning_rate": 1.8836400509703727e-05, + "loss": 1.2135, "step": 3834 }, { - "epoch": 0.8041518137974418, - "grad_norm": 5.817270856429654, - "learning_rate": 1.717316810776694e-05, - "loss": 1.9208, + "epoch": 0.5415136966958486, + "grad_norm": 3.8157557191789553, + "learning_rate": 1.8835686708591495e-05, + "loss": 1.0417, "step": 3835 }, { - "epoch": 0.8043615013629691, - "grad_norm": 6.008708085348807, - "learning_rate": 1.7171590568818555e-05, - "loss": 1.5347, + "epoch": 0.5416548997458345, + "grad_norm": 4.270475969552564, + "learning_rate": 1.883497270214172e-05, + "loss": 1.1325, "step": 3836 }, { - "epoch": 0.8045711889284966, - "grad_norm": 5.462490751806413, - "learning_rate": 1.717001266231371e-05, - "loss": 1.4815, + "epoch": 0.5417961027958204, + "grad_norm": 4.216334971731389, + "learning_rate": 1.8834258490370997e-05, + "loss": 1.1135, "step": 3837 }, { - "epoch": 0.8047808764940239, - "grad_norm": 5.619333441743936, - "learning_rate": 1.716843438833328e-05, - "loss": 1.627, + "epoch": 0.5419373058458062, + "grad_norm": 3.2816332243081123, + "learning_rate": 1.8833544073295918e-05, + "loss": 1.0178, "step": 3838 }, { - "epoch": 0.8049905640595513, - "grad_norm": 5.176482233766037, - "learning_rate": 1.7166855746958156e-05, - "loss": 1.5186, + "epoch": 0.5420785088957921, + "grad_norm": 4.519750750139794, + "learning_rate": 1.8832829450933093e-05, + "loss": 1.3017, "step": 3839 }, { - "epoch": 0.8052002516250786, - "grad_norm": 6.600445941310134, - "learning_rate": 1.7165276738269242e-05, - "loss": 1.9792, + "epoch": 0.542219711945778, + "grad_norm": 3.796527579322978, + "learning_rate": 1.8832114623299125e-05, + "loss": 0.9906, "step": 3840 }, { - "epoch": 0.805409939190606, - "grad_norm": 6.689859704797481, - "learning_rate": 1.716369736234747e-05, - "loss": 2.1147, + "epoch": 0.5423609149957639, + "grad_norm": 3.892357886016174, + "learning_rate": 1.8831399590410626e-05, + "loss": 1.0114, "step": 3841 }, { - "epoch": 0.8056196267561334, - "grad_norm": 6.1384239941711405, - "learning_rate": 1.7162117619273783e-05, - "loss": 2.0201, + "epoch": 0.5425021180457498, + "grad_norm": 4.878033631309455, + "learning_rate": 1.8830684352284217e-05, + "loss": 1.2065, "step": 3842 }, { - "epoch": 0.8058293143216607, - "grad_norm": 6.22079975226795, - "learning_rate": 1.7160537509129147e-05, - "loss": 1.9786, + "epoch": 0.5426433210957357, + "grad_norm": 3.661274411992499, + "learning_rate": 1.8829968908936514e-05, + "loss": 0.9734, "step": 3843 }, { - "epoch": 0.8060390018871881, - "grad_norm": 5.354616251386052, - "learning_rate": 1.715895703199454e-05, - "loss": 1.4581, + "epoch": 0.5427845241457215, + "grad_norm": 3.9193353729226517, + "learning_rate": 1.882925326038415e-05, + "loss": 1.0587, "step": 3844 }, { - "epoch": 0.8062486894527154, - "grad_norm": 6.161687289078216, - "learning_rate": 1.7157376187950974e-05, - "loss": 1.7461, + "epoch": 0.5429257271957074, + "grad_norm": 3.8774094487699835, + "learning_rate": 1.8828537406643752e-05, + "loss": 1.1293, "step": 3845 }, { - "epoch": 0.8064583770182429, - "grad_norm": 6.240097575561236, - "learning_rate": 1.715579497707946e-05, - "loss": 1.7106, + "epoch": 0.5430669302456933, + "grad_norm": 3.7422890666177935, + "learning_rate": 1.8827821347731955e-05, + "loss": 1.147, "step": 3846 }, { - "epoch": 0.8066680645837702, - "grad_norm": 6.304997890600722, - "learning_rate": 1.7154213399461048e-05, - "loss": 1.6415, + "epoch": 0.5432081332956792, + "grad_norm": 4.159755691141257, + "learning_rate": 1.882710508366541e-05, + "loss": 1.1265, "step": 3847 }, { - "epoch": 0.8068777521492976, - "grad_norm": 7.168846557616687, - "learning_rate": 1.7152631455176787e-05, - "loss": 2.2949, + "epoch": 0.5433493363456651, + "grad_norm": 4.008194922680433, + "learning_rate": 1.8826388614460746e-05, + "loss": 1.2734, "step": 3848 }, { - "epoch": 0.8070874397148249, - "grad_norm": 6.610468118676679, - "learning_rate": 1.715104914430776e-05, - "loss": 1.7858, + "epoch": 0.543490539395651, + "grad_norm": 3.253626219016584, + "learning_rate": 1.8825671940134627e-05, + "loss": 0.9434, "step": 3849 }, { - "epoch": 0.8072971272803523, - "grad_norm": 5.937503205474189, - "learning_rate": 1.714946646693506e-05, - "loss": 1.7341, + "epoch": 0.5436317424456368, + "grad_norm": 3.7675352513955818, + "learning_rate": 1.88249550607037e-05, + "loss": 0.9716, "step": 3850 }, { - "epoch": 0.8075068148458796, - "grad_norm": 5.387170622602409, - "learning_rate": 1.71478834231398e-05, - "loss": 1.924, + "epoch": 0.5437729454956227, + "grad_norm": 3.6538046080886857, + "learning_rate": 1.8824237976184638e-05, + "loss": 0.9889, "step": 3851 }, { - "epoch": 0.807716502411407, - "grad_norm": 6.879081460648234, - "learning_rate": 1.714630001300312e-05, - "loss": 1.9383, + "epoch": 0.5439141485456086, + "grad_norm": 3.9386585392304916, + "learning_rate": 1.8823520686594087e-05, + "loss": 1.1763, "step": 3852 }, { - "epoch": 0.8079261899769343, - "grad_norm": 7.208809971669895, - "learning_rate": 1.714471623660617e-05, - "loss": 2.1168, + "epoch": 0.5440553515955945, + "grad_norm": 4.15527059119074, + "learning_rate": 1.8822803191948732e-05, + "loss": 1.1513, "step": 3853 }, { - "epoch": 0.8081358775424617, - "grad_norm": 5.8625241374675925, - "learning_rate": 1.7143132094030123e-05, - "loss": 1.7497, + "epoch": 0.5441965546455804, + "grad_norm": 4.475196729599218, + "learning_rate": 1.8822085492265235e-05, + "loss": 1.1943, "step": 3854 }, { - "epoch": 0.808345565107989, - "grad_norm": 7.3765272204907495, - "learning_rate": 1.7141547585356166e-05, - "loss": 1.7477, + "epoch": 0.5443377576955662, + "grad_norm": 3.615064963858525, + "learning_rate": 1.8821367587560283e-05, + "loss": 1.0521, "step": 3855 }, { - "epoch": 0.8085552526735165, - "grad_norm": 5.0374632483112265, - "learning_rate": 1.713996271066551e-05, - "loss": 1.5288, + "epoch": 0.5444789607455521, + "grad_norm": 3.670768028409117, + "learning_rate": 1.8820649477850562e-05, + "loss": 0.9234, "step": 3856 }, { - "epoch": 0.8087649402390438, - "grad_norm": 7.362723808791006, - "learning_rate": 1.7138377470039378e-05, - "loss": 2.1388, + "epoch": 0.544620163795538, + "grad_norm": 4.233917133415625, + "learning_rate": 1.8819931163152753e-05, + "loss": 1.0692, "step": 3857 }, { - "epoch": 0.8089746278045712, - "grad_norm": 6.797606879188635, - "learning_rate": 1.7136791863559025e-05, - "loss": 1.3778, + "epoch": 0.5447613668455239, + "grad_norm": 3.931715131880214, + "learning_rate": 1.881921264348355e-05, + "loss": 0.9156, "step": 3858 }, { - "epoch": 0.8091843153700986, - "grad_norm": 5.72627689035103, - "learning_rate": 1.7135205891305708e-05, - "loss": 1.6881, + "epoch": 0.5449025698955098, + "grad_norm": 3.927969452779757, + "learning_rate": 1.881849391885966e-05, + "loss": 1.1097, "step": 3859 }, { - "epoch": 0.8093940029356259, - "grad_norm": 5.933320309137687, - "learning_rate": 1.7133619553360712e-05, - "loss": 2.0796, + "epoch": 0.5450437729454957, + "grad_norm": 3.492253830567953, + "learning_rate": 1.8817774989297776e-05, + "loss": 1.1507, "step": 3860 }, { - "epoch": 0.8096036905011533, - "grad_norm": 6.0464941947261375, - "learning_rate": 1.713203284980534e-05, - "loss": 1.7245, + "epoch": 0.5451849759954815, + "grad_norm": 3.7321397409341097, + "learning_rate": 1.881705585481461e-05, + "loss": 1.012, "step": 3861 }, { - "epoch": 0.8098133780666806, - "grad_norm": 6.020859638122137, - "learning_rate": 1.7130445780720917e-05, - "loss": 2.0015, + "epoch": 0.5453261790454674, + "grad_norm": 3.207884745262269, + "learning_rate": 1.8816336515426873e-05, + "loss": 0.8441, "step": 3862 }, { - "epoch": 0.810023065632208, - "grad_norm": 5.749231695495465, - "learning_rate": 1.7128858346188782e-05, - "loss": 1.7884, + "epoch": 0.5454673820954533, + "grad_norm": 4.531047815098707, + "learning_rate": 1.8815616971151284e-05, + "loss": 1.026, "step": 3863 }, { - "epoch": 0.8102327531977354, - "grad_norm": 5.616404901822202, - "learning_rate": 1.712727054629029e-05, - "loss": 1.5936, + "epoch": 0.5456085851454392, + "grad_norm": 4.255232529223473, + "learning_rate": 1.8814897222004564e-05, + "loss": 1.2007, "step": 3864 }, { - "epoch": 0.8104424407632628, - "grad_norm": 5.783498764050352, - "learning_rate": 1.7125682381106823e-05, - "loss": 1.8613, + "epoch": 0.545749788195425, + "grad_norm": 4.066086229986954, + "learning_rate": 1.881417726800344e-05, + "loss": 1.0899, "step": 3865 }, { - "epoch": 0.8106521283287901, - "grad_norm": 5.839088678026966, - "learning_rate": 1.7124093850719772e-05, - "loss": 1.7901, + "epoch": 0.5458909912454109, + "grad_norm": 3.304782280935318, + "learning_rate": 1.8813457109164642e-05, + "loss": 0.8937, "step": 3866 }, { - "epoch": 0.8108618158943175, - "grad_norm": 7.5869478501642735, - "learning_rate": 1.712250495521056e-05, - "loss": 1.9588, + "epoch": 0.5460321942953967, + "grad_norm": 3.684151191189467, + "learning_rate": 1.8812736745504904e-05, + "loss": 1.0937, "step": 3867 }, { - "epoch": 0.8110715034598448, - "grad_norm": 6.871929242603936, - "learning_rate": 1.7120915694660612e-05, - "loss": 2.0037, + "epoch": 0.5461733973453826, + "grad_norm": 4.3058057040360085, + "learning_rate": 1.8812016177040975e-05, + "loss": 1.2465, "step": 3868 }, { - "epoch": 0.8112811910253722, - "grad_norm": 7.309677272184801, - "learning_rate": 1.7119326069151382e-05, - "loss": 1.7913, + "epoch": 0.5463146003953685, + "grad_norm": 3.7763622890534245, + "learning_rate": 1.8811295403789595e-05, + "loss": 1.1207, "step": 3869 }, { - "epoch": 0.8114908785908995, - "grad_norm": 6.666497868615008, - "learning_rate": 1.711773607876435e-05, - "loss": 1.8072, + "epoch": 0.5464558034453544, + "grad_norm": 3.64307849650698, + "learning_rate": 1.8810574425767512e-05, + "loss": 1.0671, "step": 3870 }, { - "epoch": 0.8117005661564269, - "grad_norm": 7.2474070086423295, - "learning_rate": 1.7116145723580992e-05, - "loss": 1.6869, + "epoch": 0.5465970064953403, + "grad_norm": 4.020394813710588, + "learning_rate": 1.8809853242991485e-05, + "loss": 1.2006, "step": 3871 }, { - "epoch": 0.8119102537219542, - "grad_norm": 7.089398637308205, - "learning_rate": 1.7114555003682825e-05, - "loss": 1.7287, + "epoch": 0.5467382095453261, + "grad_norm": 3.2932329095896913, + "learning_rate": 1.8809131855478276e-05, + "loss": 0.9257, "step": 3872 }, { - "epoch": 0.8121199412874817, - "grad_norm": 6.206714235520265, - "learning_rate": 1.7112963919151378e-05, - "loss": 1.9023, + "epoch": 0.546879412595312, + "grad_norm": 4.231867467963071, + "learning_rate": 1.880841026324464e-05, + "loss": 1.1722, "step": 3873 }, { - "epoch": 0.812329628853009, - "grad_norm": 7.228809344924215, - "learning_rate": 1.7111372470068192e-05, - "loss": 1.7569, + "epoch": 0.5470206156452979, + "grad_norm": 4.547104655318466, + "learning_rate": 1.8807688466307362e-05, + "loss": 1.3541, "step": 3874 }, { - "epoch": 0.8125393164185364, - "grad_norm": 7.5493383469573345, - "learning_rate": 1.710978065651483e-05, - "loss": 1.9262, + "epoch": 0.5471618186952838, + "grad_norm": 3.281925917636599, + "learning_rate": 1.8806966464683208e-05, + "loss": 0.893, "step": 3875 }, { - "epoch": 0.8127490039840638, - "grad_norm": 7.468431627598546, - "learning_rate": 1.710818847857288e-05, - "loss": 2.2593, + "epoch": 0.5473030217452697, + "grad_norm": 4.044671521612103, + "learning_rate": 1.880624425838895e-05, + "loss": 1.1164, "step": 3876 }, { - "epoch": 0.8129586915495911, - "grad_norm": 7.746975675404207, - "learning_rate": 1.710659593632394e-05, - "loss": 1.8683, + "epoch": 0.5474442247952556, + "grad_norm": 4.525863483666993, + "learning_rate": 1.8805521847441382e-05, + "loss": 1.0555, "step": 3877 }, { - "epoch": 0.8131683791151185, - "grad_norm": 7.772874909079346, - "learning_rate": 1.7105003029849635e-05, - "loss": 1.9158, + "epoch": 0.5475854278452414, + "grad_norm": 3.4182288264259126, + "learning_rate": 1.8804799231857292e-05, + "loss": 0.9431, "step": 3878 }, { - "epoch": 0.8133780666806458, - "grad_norm": 6.075576999572013, - "learning_rate": 1.7103409759231603e-05, - "loss": 1.8004, + "epoch": 0.5477266308952273, + "grad_norm": 4.165673740847113, + "learning_rate": 1.880407641165347e-05, + "loss": 1.0617, "step": 3879 }, { - "epoch": 0.8135877542461732, - "grad_norm": 6.7395463574091385, - "learning_rate": 1.71018161245515e-05, - "loss": 2.1537, + "epoch": 0.5478678339452132, + "grad_norm": 4.090336347631208, + "learning_rate": 1.8803353386846708e-05, + "loss": 1.0255, "step": 3880 }, { - "epoch": 0.8137974418117006, - "grad_norm": 7.057123390469567, - "learning_rate": 1.7100222125891e-05, - "loss": 1.6438, + "epoch": 0.5480090369951991, + "grad_norm": 3.638200032283269, + "learning_rate": 1.8802630157453817e-05, + "loss": 1.1112, "step": 3881 }, { - "epoch": 0.814007129377228, - "grad_norm": 7.59638470230116, - "learning_rate": 1.7098627763331804e-05, - "loss": 1.7968, + "epoch": 0.548150240045185, + "grad_norm": 3.4565866003698784, + "learning_rate": 1.8801906723491606e-05, + "loss": 1.0069, "step": 3882 }, { - "epoch": 0.8142168169427553, - "grad_norm": 6.369594802852877, - "learning_rate": 1.709703303695562e-05, - "loss": 2.0111, + "epoch": 0.5482914430951709, + "grad_norm": 3.9923389185351548, + "learning_rate": 1.8801183084976885e-05, + "loss": 0.9384, "step": 3883 }, { - "epoch": 0.8144265045082827, - "grad_norm": 5.617026892781355, - "learning_rate": 1.7095437946844186e-05, - "loss": 1.6553, + "epoch": 0.5484326461451567, + "grad_norm": 4.8535901937060135, + "learning_rate": 1.8800459241926466e-05, + "loss": 1.2769, "step": 3884 }, { - "epoch": 0.81463619207381, - "grad_norm": 6.712605791095175, - "learning_rate": 1.709384249307925e-05, - "loss": 1.7758, + "epoch": 0.5485738491951426, + "grad_norm": 3.5057984356674146, + "learning_rate": 1.8799735194357176e-05, + "loss": 0.9463, "step": 3885 }, { - "epoch": 0.8148458796393374, - "grad_norm": 5.636393609553181, - "learning_rate": 1.7092246675742585e-05, - "loss": 1.7874, + "epoch": 0.5487150522451285, + "grad_norm": 3.9237456823091743, + "learning_rate": 1.879901094228584e-05, + "loss": 1.0291, "step": 3886 }, { - "epoch": 0.8150555672048647, - "grad_norm": 6.201993124639991, - "learning_rate": 1.7090650494915976e-05, - "loss": 1.5989, + "epoch": 0.5488562552951144, + "grad_norm": 3.3254477230159667, + "learning_rate": 1.8798286485729293e-05, + "loss": 0.9555, "step": 3887 }, { - "epoch": 0.8152652547703921, - "grad_norm": 9.040236563336762, - "learning_rate": 1.708905395068123e-05, - "loss": 1.4851, + "epoch": 0.5489974583451003, + "grad_norm": 3.804346182232666, + "learning_rate": 1.8797561824704364e-05, + "loss": 1.1534, "step": 3888 }, { - "epoch": 0.8154749423359194, - "grad_norm": 5.858930164935578, - "learning_rate": 1.708745704312017e-05, - "loss": 1.7632, + "epoch": 0.5491386613950862, + "grad_norm": 4.098970539544811, + "learning_rate": 1.8796836959227897e-05, + "loss": 1.281, "step": 3889 }, { - "epoch": 0.8156846299014469, - "grad_norm": 6.393560494320786, - "learning_rate": 1.708585977231465e-05, - "loss": 2.247, + "epoch": 0.549279864445072, + "grad_norm": 3.624041841256215, + "learning_rate": 1.8796111889316742e-05, + "loss": 0.9628, "step": 3890 }, { - "epoch": 0.8158943174669742, - "grad_norm": 6.340133599402949, - "learning_rate": 1.7084262138346526e-05, - "loss": 1.5463, + "epoch": 0.5494210674950579, + "grad_norm": 3.881386219187497, + "learning_rate": 1.8795386614987744e-05, + "loss": 0.9089, "step": 3891 }, { - "epoch": 0.8161040050325016, - "grad_norm": 6.937659462205507, - "learning_rate": 1.708266414129768e-05, - "loss": 1.8473, + "epoch": 0.5495622705450438, + "grad_norm": 4.481357436138758, + "learning_rate": 1.879466113625776e-05, + "loss": 1.4944, "step": 3892 }, { - "epoch": 0.8163136925980289, - "grad_norm": 6.741601316501367, - "learning_rate": 1.7081065781250013e-05, - "loss": 1.8479, + "epoch": 0.5497034735950297, + "grad_norm": 4.684090944139845, + "learning_rate": 1.879393545314365e-05, + "loss": 1.247, "step": 3893 }, { - "epoch": 0.8165233801635563, - "grad_norm": 7.502901551758546, - "learning_rate": 1.707946705828544e-05, - "loss": 1.709, + "epoch": 0.5498446766450156, + "grad_norm": 3.399428854535425, + "learning_rate": 1.8793209565662273e-05, + "loss": 0.9096, "step": 3894 }, { - "epoch": 0.8167330677290837, - "grad_norm": 6.9527500790872425, - "learning_rate": 1.7077867972485904e-05, - "loss": 1.4605, + "epoch": 0.5499858796950015, + "grad_norm": 3.5351889011058413, + "learning_rate": 1.8792483473830505e-05, + "loss": 0.9215, "step": 3895 }, { - "epoch": 0.816942755294611, - "grad_norm": 5.7770182074210075, - "learning_rate": 1.7076268523933358e-05, - "loss": 1.5681, + "epoch": 0.5501270827449873, + "grad_norm": 3.7516605124384075, + "learning_rate": 1.8791757177665223e-05, + "loss": 0.9511, "step": 3896 }, { - "epoch": 0.8171524428601384, - "grad_norm": 6.378681629755651, - "learning_rate": 1.7074668712709778e-05, - "loss": 1.5815, + "epoch": 0.5502682857949732, + "grad_norm": 3.6299965782933143, + "learning_rate": 1.8791030677183294e-05, + "loss": 1.1281, "step": 3897 }, { - "epoch": 0.8173621304256657, - "grad_norm": 6.238428664092997, - "learning_rate": 1.7073068538897154e-05, - "loss": 1.6234, + "epoch": 0.5504094888449591, + "grad_norm": 3.1349215061997713, + "learning_rate": 1.8790303972401616e-05, + "loss": 0.9632, "step": 3898 }, { - "epoch": 0.8175718179911932, - "grad_norm": 6.794791184725831, - "learning_rate": 1.7071468002577504e-05, - "loss": 2.0315, + "epoch": 0.5505506918949449, + "grad_norm": 4.716373423201452, + "learning_rate": 1.8789577063337066e-05, + "loss": 1.4103, "step": 3899 }, { - "epoch": 0.8177815055567205, - "grad_norm": 5.975337225159249, - "learning_rate": 1.706986710383285e-05, - "loss": 1.8659, + "epoch": 0.5506918949449308, + "grad_norm": 3.919946827664476, + "learning_rate": 1.878884995000654e-05, + "loss": 0.9555, "step": 3900 }, { - "epoch": 0.8179911931222479, - "grad_norm": 6.962117465458161, - "learning_rate": 1.7068265842745244e-05, - "loss": 1.9934, + "epoch": 0.5508330979949166, + "grad_norm": 4.3998338915242, + "learning_rate": 1.878812263242694e-05, + "loss": 1.3407, "step": 3901 }, { - "epoch": 0.8182008806877752, - "grad_norm": 6.797723912795713, - "learning_rate": 1.7066664219396755e-05, - "loss": 2.1104, + "epoch": 0.5509743010449025, + "grad_norm": 4.713140550161659, + "learning_rate": 1.8787395110615163e-05, + "loss": 1.1962, "step": 3902 }, { - "epoch": 0.8184105682533026, - "grad_norm": 5.802479631795701, - "learning_rate": 1.706506223386947e-05, - "loss": 1.5743, + "epoch": 0.5511155040948884, + "grad_norm": 4.145138340696947, + "learning_rate": 1.8786667384588117e-05, + "loss": 1.0392, "step": 3903 }, { - "epoch": 0.8186202558188299, - "grad_norm": 6.666724039398372, - "learning_rate": 1.7063459886245492e-05, - "loss": 1.8826, + "epoch": 0.5512567071448743, + "grad_norm": 4.80046731898512, + "learning_rate": 1.878593945436272e-05, + "loss": 1.094, "step": 3904 }, { - "epoch": 0.8188299433843573, - "grad_norm": 6.72251864907007, - "learning_rate": 1.706185717660694e-05, - "loss": 2.0979, + "epoch": 0.5513979101948602, + "grad_norm": 4.141521166719825, + "learning_rate": 1.8785211319955882e-05, + "loss": 1.2661, "step": 3905 }, { - "epoch": 0.8190396309498846, - "grad_norm": 6.48987150541069, - "learning_rate": 1.7060254105035966e-05, - "loss": 2.0513, + "epoch": 0.5515391132448461, + "grad_norm": 3.625176001315281, + "learning_rate": 1.8784482981384523e-05, + "loss": 1.0683, "step": 3906 }, { - "epoch": 0.819249318515412, - "grad_norm": 6.942590678729585, - "learning_rate": 1.7058650671614717e-05, - "loss": 1.8838, + "epoch": 0.5516803162948319, + "grad_norm": 4.0191478155283455, + "learning_rate": 1.878375443866558e-05, + "loss": 1.1285, "step": 3907 }, { - "epoch": 0.8194590060809394, - "grad_norm": 7.025102360484922, - "learning_rate": 1.7057046876425386e-05, - "loss": 1.8276, + "epoch": 0.5518215193448178, + "grad_norm": 4.209473167398816, + "learning_rate": 1.8783025691815974e-05, + "loss": 1.066, "step": 3908 }, { - "epoch": 0.8196686936464668, - "grad_norm": 5.287910968566388, - "learning_rate": 1.7055442719550156e-05, - "loss": 1.4727, + "epoch": 0.5519627223948037, + "grad_norm": 3.9650233077924715, + "learning_rate": 1.8782296740852645e-05, + "loss": 1.0758, "step": 3909 }, { - "epoch": 0.8198783812119941, - "grad_norm": 7.321205499189208, - "learning_rate": 1.7053838201071252e-05, - "loss": 1.867, + "epoch": 0.5521039254447896, + "grad_norm": 6.0009516870332735, + "learning_rate": 1.878156758579253e-05, + "loss": 1.4329, "step": 3910 }, { - "epoch": 0.8200880687775215, - "grad_norm": 5.485169547078051, - "learning_rate": 1.7052233321070904e-05, - "loss": 1.6076, + "epoch": 0.5522451284947755, + "grad_norm": 4.281558186087087, + "learning_rate": 1.878083822665258e-05, + "loss": 1.2022, "step": 3911 }, { - "epoch": 0.8202977563430489, - "grad_norm": 5.793018704988025, - "learning_rate": 1.705062807963137e-05, - "loss": 1.689, + "epoch": 0.5523863315447614, + "grad_norm": 3.7524113877951164, + "learning_rate": 1.8780108663449742e-05, + "loss": 1.0042, "step": 3912 }, { - "epoch": 0.8205074439085762, - "grad_norm": 5.7599728572233575, - "learning_rate": 1.7049022476834917e-05, - "loss": 1.8892, + "epoch": 0.5525275345947472, + "grad_norm": 4.092354738939234, + "learning_rate": 1.877937889620097e-05, + "loss": 1.2553, "step": 3913 }, { - "epoch": 0.8207171314741036, - "grad_norm": 5.787770412984968, - "learning_rate": 1.704741651276384e-05, - "loss": 2.2209, + "epoch": 0.5526687376447331, + "grad_norm": 3.8064296048539634, + "learning_rate": 1.8778648924923222e-05, + "loss": 1.1935, "step": 3914 }, { - "epoch": 0.8209268190396309, - "grad_norm": 5.779157183874328, - "learning_rate": 1.7045810187500436e-05, - "loss": 1.4783, + "epoch": 0.552809940694719, + "grad_norm": 3.793372793625187, + "learning_rate": 1.8777918749633467e-05, + "loss": 0.9848, "step": 3915 }, { - "epoch": 0.8211365066051584, - "grad_norm": 8.111830612672588, - "learning_rate": 1.7044203501127043e-05, - "loss": 2.0416, + "epoch": 0.5529511437447049, + "grad_norm": 3.9324152935609007, + "learning_rate": 1.8777188370348667e-05, + "loss": 1.2574, "step": 3916 }, { - "epoch": 0.8213461941706857, - "grad_norm": 6.648864842883792, - "learning_rate": 1.7042596453726003e-05, - "loss": 1.5495, + "epoch": 0.5530923467946908, + "grad_norm": 4.219963467373792, + "learning_rate": 1.87764577870858e-05, + "loss": 1.0743, "step": 3917 }, { - "epoch": 0.8215558817362131, - "grad_norm": 6.475013067424919, - "learning_rate": 1.704098904537968e-05, - "loss": 1.7308, + "epoch": 0.5532335498446767, + "grad_norm": 3.3366265132319826, + "learning_rate": 1.877572699986185e-05, + "loss": 0.9479, "step": 3918 }, { - "epoch": 0.8217655693017404, - "grad_norm": 6.441652061371102, - "learning_rate": 1.703938127617046e-05, - "loss": 1.9492, + "epoch": 0.5533747528946625, + "grad_norm": 3.409431986578069, + "learning_rate": 1.8774996008693792e-05, + "loss": 0.971, "step": 3919 }, { - "epoch": 0.8219752568672678, - "grad_norm": 8.202785891608146, - "learning_rate": 1.7037773146180737e-05, - "loss": 1.8701, + "epoch": 0.5535159559446484, + "grad_norm": 3.7631322921156185, + "learning_rate": 1.8774264813598614e-05, + "loss": 1.1178, "step": 3920 }, { - "epoch": 0.8221849444327951, - "grad_norm": 6.405510752463787, - "learning_rate": 1.7036164655492932e-05, - "loss": 1.7116, + "epoch": 0.5536571589946343, + "grad_norm": 4.065354728946702, + "learning_rate": 1.8773533414593313e-05, + "loss": 1.1869, "step": 3921 }, { - "epoch": 0.8223946319983225, - "grad_norm": 5.307028005512784, - "learning_rate": 1.7034555804189487e-05, - "loss": 1.6826, + "epoch": 0.5537983620446202, + "grad_norm": 3.9253843613061417, + "learning_rate": 1.8772801811694882e-05, + "loss": 1.1406, "step": 3922 }, { - "epoch": 0.8226043195638498, - "grad_norm": 6.980848627048561, - "learning_rate": 1.7032946592352857e-05, - "loss": 2.0592, + "epoch": 0.5539395650946061, + "grad_norm": 4.096879151591271, + "learning_rate": 1.8772070004920327e-05, + "loss": 1.0701, "step": 3923 }, { - "epoch": 0.8228140071293772, - "grad_norm": 6.6322796335710805, - "learning_rate": 1.7031337020065515e-05, - "loss": 1.8762, + "epoch": 0.554080768144592, + "grad_norm": 3.34851567164423, + "learning_rate": 1.8771337994286656e-05, + "loss": 1.0256, "step": 3924 }, { - "epoch": 0.8230236946949046, - "grad_norm": 6.664248156728514, - "learning_rate": 1.7029727087409952e-05, - "loss": 1.8921, + "epoch": 0.5542219711945778, + "grad_norm": 3.646029895615022, + "learning_rate": 1.8770605779810874e-05, + "loss": 0.8369, "step": 3925 }, { - "epoch": 0.823233382260432, - "grad_norm": 5.679375082721564, - "learning_rate": 1.702811679446869e-05, - "loss": 1.9246, + "epoch": 0.5543631742445637, + "grad_norm": 4.035941884605273, + "learning_rate": 1.8769873361510004e-05, + "loss": 1.2231, "step": 3926 }, { - "epoch": 0.8234430698259593, - "grad_norm": 6.261660824279909, - "learning_rate": 1.702650614132425e-05, - "loss": 1.569, + "epoch": 0.5545043772945496, + "grad_norm": 4.230555942223311, + "learning_rate": 1.8769140739401063e-05, + "loss": 1.2381, "step": 3927 }, { - "epoch": 0.8236527573914867, - "grad_norm": 6.147221898437719, - "learning_rate": 1.702489512805918e-05, - "loss": 1.6396, + "epoch": 0.5546455803445355, + "grad_norm": 3.3963990347131623, + "learning_rate": 1.876840791350108e-05, + "loss": 0.9069, "step": 3928 }, { - "epoch": 0.823862444957014, - "grad_norm": 5.3295980078599285, - "learning_rate": 1.7023283754756053e-05, - "loss": 1.7624, + "epoch": 0.5547867833945214, + "grad_norm": 3.8069282629586128, + "learning_rate": 1.876767488382708e-05, + "loss": 1.208, "step": 3929 }, { - "epoch": 0.8240721325225414, - "grad_norm": 6.609587325806969, - "learning_rate": 1.7021672021497456e-05, - "loss": 1.9656, + "epoch": 0.5549279864445072, + "grad_norm": 3.8852130747312956, + "learning_rate": 1.8766941650396112e-05, + "loss": 0.9978, "step": 3930 }, { - "epoch": 0.8242818200880688, - "grad_norm": 6.434584650598884, - "learning_rate": 1.7020059928365987e-05, - "loss": 1.7911, + "epoch": 0.5550691894944931, + "grad_norm": 3.798109228490835, + "learning_rate": 1.8766208213225198e-05, + "loss": 1.0595, "step": 3931 }, { - "epoch": 0.8244915076535961, - "grad_norm": 5.40550898630555, - "learning_rate": 1.7018447475444267e-05, - "loss": 1.3427, + "epoch": 0.555210392544479, + "grad_norm": 3.9053044625587114, + "learning_rate": 1.876547457233139e-05, + "loss": 1.1099, "step": 3932 }, { - "epoch": 0.8247011952191236, - "grad_norm": 7.199681556426338, - "learning_rate": 1.7016834662814946e-05, - "loss": 1.6897, + "epoch": 0.5553515955944648, + "grad_norm": 4.108566319220951, + "learning_rate": 1.8764740727731744e-05, + "loss": 1.143, "step": 3933 }, { - "epoch": 0.8249108827846509, - "grad_norm": 7.2762209158635445, - "learning_rate": 1.701522149056068e-05, - "loss": 1.4452, + "epoch": 0.5554927986444507, + "grad_norm": 3.2962138723502017, + "learning_rate": 1.8764006679443306e-05, + "loss": 0.865, "step": 3934 }, { - "epoch": 0.8251205703501783, - "grad_norm": 6.721100600599509, - "learning_rate": 1.701360795876414e-05, - "loss": 1.6193, + "epoch": 0.5556340016944366, + "grad_norm": 3.512707024354957, + "learning_rate": 1.8763272427483136e-05, + "loss": 1.0043, "step": 3935 }, { - "epoch": 0.8253302579157056, - "grad_norm": 7.226238926267577, - "learning_rate": 1.7011994067508027e-05, - "loss": 1.7999, + "epoch": 0.5557752047444224, + "grad_norm": 3.6645254605777655, + "learning_rate": 1.87625379718683e-05, + "loss": 1.0616, "step": 3936 }, { - "epoch": 0.825539945481233, - "grad_norm": 5.948006949237822, - "learning_rate": 1.701037981687506e-05, - "loss": 1.655, + "epoch": 0.5559164077944083, + "grad_norm": 3.579682619406661, + "learning_rate": 1.8761803312615865e-05, + "loss": 1.0111, "step": 3937 }, { - "epoch": 0.8257496330467603, - "grad_norm": 6.595746328253617, - "learning_rate": 1.7008765206947967e-05, - "loss": 1.785, + "epoch": 0.5560576108443942, + "grad_norm": 3.209388120596212, + "learning_rate": 1.876106844974291e-05, + "loss": 0.9496, "step": 3938 }, { - "epoch": 0.8259593206122877, - "grad_norm": 7.013954677420128, - "learning_rate": 1.7007150237809503e-05, - "loss": 1.7213, + "epoch": 0.5561988138943801, + "grad_norm": 4.442654653991964, + "learning_rate": 1.87603333832665e-05, + "loss": 1.194, "step": 3939 }, { - "epoch": 0.826169008177815, - "grad_norm": 7.462444451069835, - "learning_rate": 1.7005534909542435e-05, - "loss": 2.3463, + "epoch": 0.556340016944366, + "grad_norm": 3.6068753790999653, + "learning_rate": 1.875959811320373e-05, + "loss": 1.0901, "step": 3940 }, { - "epoch": 0.8263786957433424, - "grad_norm": 6.6141827406002935, - "learning_rate": 1.700391922222955e-05, - "loss": 1.3879, + "epoch": 0.5564812199943519, + "grad_norm": 3.699216213138372, + "learning_rate": 1.8758862639571682e-05, + "loss": 1.0667, "step": 3941 }, { - "epoch": 0.8265883833088697, - "grad_norm": 7.3848168808440375, - "learning_rate": 1.700230317595366e-05, - "loss": 1.7175, + "epoch": 0.5566224230443377, + "grad_norm": 3.542524684387085, + "learning_rate": 1.875812696238745e-05, + "loss": 1.0257, "step": 3942 }, { - "epoch": 0.8267980708743972, - "grad_norm": 7.790187907916004, - "learning_rate": 1.7000686770797586e-05, - "loss": 1.6395, + "epoch": 0.5567636260943236, + "grad_norm": 3.5789279840972443, + "learning_rate": 1.875739108166813e-05, + "loss": 0.98, "step": 3943 }, { - "epoch": 0.8270077584399245, - "grad_norm": 6.812089362203391, - "learning_rate": 1.6999070006844174e-05, - "loss": 1.8498, + "epoch": 0.5569048291443095, + "grad_norm": 3.8004158515462616, + "learning_rate": 1.8756654997430823e-05, + "loss": 1.0158, "step": 3944 }, { - "epoch": 0.8272174460054519, - "grad_norm": 7.2699440434590175, - "learning_rate": 1.6997452884176287e-05, - "loss": 1.7232, + "epoch": 0.5570460321942954, + "grad_norm": 5.118654208789759, + "learning_rate": 1.8755918709692637e-05, + "loss": 1.2221, "step": 3945 }, { - "epoch": 0.8274271335709792, - "grad_norm": 6.639489202137102, - "learning_rate": 1.6995835402876798e-05, - "loss": 1.8514, + "epoch": 0.5571872352442813, + "grad_norm": 3.7194326501607793, + "learning_rate": 1.8755182218470675e-05, + "loss": 0.9165, "step": 3946 }, { - "epoch": 0.8276368211365066, - "grad_norm": 6.435596860653872, - "learning_rate": 1.6994217563028615e-05, - "loss": 1.729, + "epoch": 0.5573284382942671, + "grad_norm": 4.043919639028234, + "learning_rate": 1.8754445523782065e-05, + "loss": 1.2577, "step": 3947 }, { - "epoch": 0.827846508702034, - "grad_norm": 6.4058472140387686, - "learning_rate": 1.699259936471465e-05, - "loss": 1.8428, + "epoch": 0.557469641344253, + "grad_norm": 4.373364474134514, + "learning_rate": 1.8753708625643924e-05, + "loss": 1.2497, "step": 3948 }, { - "epoch": 0.8280561962675613, - "grad_norm": 6.3039675545852445, - "learning_rate": 1.699098080801784e-05, - "loss": 2.1255, + "epoch": 0.5576108443942389, + "grad_norm": 3.79483258680227, + "learning_rate": 1.8752971524073368e-05, + "loss": 1.0862, "step": 3949 }, { - "epoch": 0.8282658838330887, - "grad_norm": 7.53350247338998, - "learning_rate": 1.6989361893021134e-05, - "loss": 2.2175, + "epoch": 0.5577520474442248, + "grad_norm": 3.553702322867798, + "learning_rate": 1.8752234219087538e-05, + "loss": 0.9789, "step": 3950 }, { - "epoch": 0.828475571398616, - "grad_norm": 6.950589553255795, - "learning_rate": 1.698774261980751e-05, - "loss": 2.2171, + "epoch": 0.5578932504942107, + "grad_norm": 3.694344385272981, + "learning_rate": 1.8751496710703564e-05, + "loss": 0.946, "step": 3951 }, { - "epoch": 0.8286852589641435, - "grad_norm": 6.656253907041343, - "learning_rate": 1.698612298845996e-05, - "loss": 1.7233, + "epoch": 0.5580344535441966, + "grad_norm": 3.2854711849838565, + "learning_rate": 1.8750758998938584e-05, + "loss": 0.9047, "step": 3952 }, { - "epoch": 0.8288949465296708, - "grad_norm": 6.2427764791960945, - "learning_rate": 1.6984502999061492e-05, - "loss": 1.8896, + "epoch": 0.5581756565941824, + "grad_norm": 4.176018630550332, + "learning_rate": 1.875002108380975e-05, + "loss": 1.2, "step": 3953 }, { - "epoch": 0.8291046340951982, - "grad_norm": 5.606495194544615, - "learning_rate": 1.698288265169513e-05, - "loss": 1.7047, + "epoch": 0.5583168596441683, + "grad_norm": 4.115778279687423, + "learning_rate": 1.8749282965334198e-05, + "loss": 0.9926, "step": 3954 }, { - "epoch": 0.8293143216607255, - "grad_norm": 6.912351971044972, - "learning_rate": 1.6981261946443915e-05, - "loss": 1.3881, + "epoch": 0.5584580626941542, + "grad_norm": 3.999272383878516, + "learning_rate": 1.8748544643529093e-05, + "loss": 1.1628, "step": 3955 }, { - "epoch": 0.8295240092262529, - "grad_norm": 5.754447320623951, - "learning_rate": 1.6979640883390924e-05, - "loss": 1.8004, + "epoch": 0.5585992657441401, + "grad_norm": 4.1043874435203, + "learning_rate": 1.8747806118411588e-05, + "loss": 1.2388, "step": 3956 }, { - "epoch": 0.8297336967917802, - "grad_norm": 6.475571073481391, - "learning_rate": 1.697801946261923e-05, - "loss": 1.8933, + "epoch": 0.558740468794126, + "grad_norm": 3.941341065769649, + "learning_rate": 1.8747067389998846e-05, + "loss": 1.0345, "step": 3957 }, { - "epoch": 0.8299433843573076, - "grad_norm": 6.393119396321377, - "learning_rate": 1.6976397684211935e-05, - "loss": 1.9184, + "epoch": 0.5588816718441119, + "grad_norm": 3.5947468141047993, + "learning_rate": 1.8746328458308034e-05, + "loss": 0.9336, "step": 3958 }, { - "epoch": 0.8301530719228349, - "grad_norm": 6.1871064110886165, - "learning_rate": 1.697477554825216e-05, - "loss": 1.5414, + "epoch": 0.5590228748940977, + "grad_norm": 3.8959774682520094, + "learning_rate": 1.8745589323356327e-05, + "loss": 1.1158, "step": 3959 }, { - "epoch": 0.8303627594883624, - "grad_norm": 6.078185500252048, - "learning_rate": 1.697315305482304e-05, - "loss": 1.6762, + "epoch": 0.5591640779440836, + "grad_norm": 3.7044297122585794, + "learning_rate": 1.87448499851609e-05, + "loss": 1.1249, "step": 3960 }, { - "epoch": 0.8305724470538897, - "grad_norm": 7.887267942186894, - "learning_rate": 1.697153020400773e-05, - "loss": 2.2682, + "epoch": 0.5593052809940695, + "grad_norm": 4.002966209248357, + "learning_rate": 1.8744110443738938e-05, + "loss": 1.0362, "step": 3961 }, { - "epoch": 0.8307821346194171, - "grad_norm": 6.757760424495994, - "learning_rate": 1.6969906995889407e-05, - "loss": 1.7142, + "epoch": 0.5594464840440554, + "grad_norm": 3.5635095782220207, + "learning_rate": 1.8743370699107624e-05, + "loss": 0.932, "step": 3962 }, { - "epoch": 0.8309918221849444, - "grad_norm": 6.817190135598574, - "learning_rate": 1.6968283430551263e-05, - "loss": 1.6131, + "epoch": 0.5595876870940413, + "grad_norm": 3.929249530925411, + "learning_rate": 1.874263075128415e-05, + "loss": 1.1557, "step": 3963 }, { - "epoch": 0.8312015097504718, - "grad_norm": 5.500258609668544, - "learning_rate": 1.6966659508076504e-05, - "loss": 1.673, + "epoch": 0.5597288901440272, + "grad_norm": 3.601580126326468, + "learning_rate": 1.8741890600285714e-05, + "loss": 0.9278, "step": 3964 }, { - "epoch": 0.8314111973159991, - "grad_norm": 5.33586380362758, - "learning_rate": 1.6965035228548365e-05, - "loss": 1.4146, + "epoch": 0.559870093194013, + "grad_norm": 3.1732489785219107, + "learning_rate": 1.8741150246129522e-05, + "loss": 0.9764, "step": 3965 }, { - "epoch": 0.8316208848815265, - "grad_norm": 6.961738659664671, - "learning_rate": 1.696341059205009e-05, - "loss": 1.89, + "epoch": 0.5600112962439989, + "grad_norm": 3.934834744934047, + "learning_rate": 1.8740409688832762e-05, + "loss": 1.2504, "step": 3966 }, { - "epoch": 0.8318305724470539, - "grad_norm": 6.475552082764284, - "learning_rate": 1.696178559866494e-05, - "loss": 1.7037, + "epoch": 0.5601524992939847, + "grad_norm": 3.6517759031048835, + "learning_rate": 1.8739668928412663e-05, + "loss": 1.0386, "step": 3967 }, { - "epoch": 0.8320402600125812, - "grad_norm": 7.576856132343177, - "learning_rate": 1.6960160248476212e-05, - "loss": 1.5367, + "epoch": 0.5602937023439706, + "grad_norm": 4.985809750805547, + "learning_rate": 1.873892796488643e-05, + "loss": 1.2311, "step": 3968 }, { - "epoch": 0.8322499475781087, - "grad_norm": 6.96790883441301, - "learning_rate": 1.6958534541567195e-05, - "loss": 1.9419, + "epoch": 0.5604349053939565, + "grad_norm": 4.0889405466025615, + "learning_rate": 1.8738186798271285e-05, + "loss": 0.9641, "step": 3969 }, { - "epoch": 0.832459635143636, - "grad_norm": 5.95952315193859, - "learning_rate": 1.6956908478021213e-05, - "loss": 1.9999, + "epoch": 0.5605761084439423, + "grad_norm": 3.892564459366846, + "learning_rate": 1.8737445428584456e-05, + "loss": 1.3846, "step": 3970 }, { - "epoch": 0.8326693227091634, - "grad_norm": 6.2708041834424355, - "learning_rate": 1.6955282057921608e-05, - "loss": 1.9492, + "epoch": 0.5607173114939282, + "grad_norm": 4.134486488328932, + "learning_rate": 1.8736703855843165e-05, + "loss": 1.1632, "step": 3971 }, { - "epoch": 0.8328790102746907, - "grad_norm": 7.082938415490325, - "learning_rate": 1.695365528135173e-05, - "loss": 1.8262, + "epoch": 0.5608585145439141, + "grad_norm": 3.7667869071410816, + "learning_rate": 1.8735962080064652e-05, + "loss": 1.1535, "step": 3972 }, { - "epoch": 0.8330886978402181, - "grad_norm": 8.493882233002296, - "learning_rate": 1.695202814839496e-05, - "loss": 1.5482, + "epoch": 0.5609997175939, + "grad_norm": 4.821437270054569, + "learning_rate": 1.873522010126615e-05, + "loss": 1.2474, "step": 3973 }, { - "epoch": 0.8332983854057454, - "grad_norm": 5.696394128221247, - "learning_rate": 1.6950400659134693e-05, - "loss": 1.6053, + "epoch": 0.5611409206438859, + "grad_norm": 3.78807913902716, + "learning_rate": 1.8734477919464905e-05, + "loss": 0.9248, "step": 3974 }, { - "epoch": 0.8335080729712728, - "grad_norm": 7.213121116884791, - "learning_rate": 1.6948772813654336e-05, - "loss": 1.7486, + "epoch": 0.5612821236938718, + "grad_norm": 3.6390268765215956, + "learning_rate": 1.873373553467817e-05, + "loss": 0.995, "step": 3975 }, { - "epoch": 0.8337177605368001, - "grad_norm": 7.0743770787021765, - "learning_rate": 1.6947144612037316e-05, - "loss": 1.4862, + "epoch": 0.5614233267438576, + "grad_norm": 3.9631729049554787, + "learning_rate": 1.8732992946923187e-05, + "loss": 1.267, "step": 3976 }, { - "epoch": 0.8339274481023276, - "grad_norm": 6.5501205717384385, - "learning_rate": 1.694551605436709e-05, - "loss": 1.8104, + "epoch": 0.5615645297938435, + "grad_norm": 3.8652569339753042, + "learning_rate": 1.8732250156217223e-05, + "loss": 1.1233, "step": 3977 }, { - "epoch": 0.8341371356678549, - "grad_norm": 5.81805469112791, - "learning_rate": 1.6943887140727118e-05, - "loss": 1.9099, + "epoch": 0.5617057328438294, + "grad_norm": 3.8008455326318056, + "learning_rate": 1.8731507162577536e-05, + "loss": 1.1189, "step": 3978 }, { - "epoch": 0.8343468232333823, - "grad_norm": 6.016732702821326, - "learning_rate": 1.6942257871200885e-05, - "loss": 1.4017, + "epoch": 0.5618469358938153, + "grad_norm": 3.889054749495909, + "learning_rate": 1.8730763966021394e-05, + "loss": 1.2347, "step": 3979 }, { - "epoch": 0.8345565107989096, - "grad_norm": 6.349253790757424, - "learning_rate": 1.69406282458719e-05, - "loss": 1.632, + "epoch": 0.5619881389438012, + "grad_norm": 3.4570162683422203, + "learning_rate": 1.8730020566566068e-05, + "loss": 1.1929, "step": 3980 }, { - "epoch": 0.834766198364437, - "grad_norm": 6.592541998110839, - "learning_rate": 1.6938998264823675e-05, - "loss": 1.6978, + "epoch": 0.5621293419937871, + "grad_norm": 3.4686261039670065, + "learning_rate": 1.8729276964228834e-05, + "loss": 0.9427, "step": 3981 }, { - "epoch": 0.8349758859299643, - "grad_norm": 5.551872739046872, - "learning_rate": 1.6937367928139758e-05, - "loss": 1.3985, + "epoch": 0.5622705450437729, + "grad_norm": 4.401191123030958, + "learning_rate": 1.8728533159026972e-05, + "loss": 1.2275, "step": 3982 }, { - "epoch": 0.8351855734954917, - "grad_norm": 5.8725837130670255, - "learning_rate": 1.69357372359037e-05, - "loss": 1.9824, + "epoch": 0.5624117480937588, + "grad_norm": 3.5849903282434137, + "learning_rate": 1.872778915097777e-05, + "loss": 0.9179, "step": 3983 }, { - "epoch": 0.8353952610610191, - "grad_norm": 6.690071727941447, - "learning_rate": 1.693410618819908e-05, - "loss": 2.1046, + "epoch": 0.5625529511437447, + "grad_norm": 3.4736852223894794, + "learning_rate": 1.8727044940098516e-05, + "loss": 1.0392, "step": 3984 }, { - "epoch": 0.8356049486265464, - "grad_norm": 6.812457278647027, - "learning_rate": 1.693247478510949e-05, - "loss": 1.751, + "epoch": 0.5626941541937306, + "grad_norm": 5.224027531039868, + "learning_rate": 1.8726300526406508e-05, + "loss": 1.0344, "step": 3985 }, { - "epoch": 0.8358146361920739, - "grad_norm": 5.752509665320992, - "learning_rate": 1.693084302671854e-05, - "loss": 1.7108, + "epoch": 0.5628353572437165, + "grad_norm": 3.4500965779584507, + "learning_rate": 1.872555590991904e-05, + "loss": 1.0101, "step": 3986 }, { - "epoch": 0.8360243237576012, - "grad_norm": 7.0222242488131235, - "learning_rate": 1.692921091310987e-05, - "loss": 1.8449, + "epoch": 0.5629765602937024, + "grad_norm": 3.436432995624353, + "learning_rate": 1.8724811090653428e-05, + "loss": 0.9618, "step": 3987 }, { - "epoch": 0.8362340113231286, - "grad_norm": 7.896002349747242, - "learning_rate": 1.692757844436712e-05, - "loss": 1.7794, + "epoch": 0.5631177633436882, + "grad_norm": 4.233761733011712, + "learning_rate": 1.872406606862697e-05, + "loss": 1.1924, "step": 3988 }, { - "epoch": 0.8364436988886559, - "grad_norm": 6.149762195566721, - "learning_rate": 1.692594562057396e-05, - "loss": 1.7636, + "epoch": 0.5632589663936741, + "grad_norm": 3.5373979964986515, + "learning_rate": 1.8723320843856986e-05, + "loss": 1.0613, "step": 3989 }, { - "epoch": 0.8366533864541833, - "grad_norm": 6.336346190062133, - "learning_rate": 1.6924312441814075e-05, - "loss": 2.0715, + "epoch": 0.56340016944366, + "grad_norm": 3.823543056753875, + "learning_rate": 1.8722575416360794e-05, + "loss": 1.1477, "step": 3990 }, { - "epoch": 0.8368630740197106, - "grad_norm": 7.62156232109994, - "learning_rate": 1.692267890817117e-05, - "loss": 1.5241, + "epoch": 0.5635413724936459, + "grad_norm": 4.141554449085671, + "learning_rate": 1.8721829786155714e-05, + "loss": 1.0549, "step": 3991 }, { - "epoch": 0.837072761585238, - "grad_norm": 6.80000315404917, - "learning_rate": 1.692104501972896e-05, - "loss": 1.8459, + "epoch": 0.5636825755436318, + "grad_norm": 3.5182094935613293, + "learning_rate": 1.8721083953259078e-05, + "loss": 0.8924, "step": 3992 }, { - "epoch": 0.8372824491507653, - "grad_norm": 6.525447921706902, - "learning_rate": 1.691941077657119e-05, - "loss": 1.7569, + "epoch": 0.5638237785936177, + "grad_norm": 3.967193723416123, + "learning_rate": 1.8720337917688213e-05, + "loss": 1.2069, "step": 3993 }, { - "epoch": 0.8374921367162927, - "grad_norm": 6.305492333798289, - "learning_rate": 1.6917776178781618e-05, - "loss": 1.7838, + "epoch": 0.5639649816436035, + "grad_norm": 3.6353769080875624, + "learning_rate": 1.8719591679460464e-05, + "loss": 1.1306, "step": 3994 }, { - "epoch": 0.83770182428182, - "grad_norm": 6.260333980163938, - "learning_rate": 1.6916141226444017e-05, - "loss": 1.7514, + "epoch": 0.5641061846935894, + "grad_norm": 4.465815263820734, + "learning_rate": 1.871884523859317e-05, + "loss": 0.9229, "step": 3995 }, { - "epoch": 0.8379115118473475, - "grad_norm": 7.387058605029511, - "learning_rate": 1.6914505919642183e-05, - "loss": 2.1554, + "epoch": 0.5642473877435753, + "grad_norm": 3.619868883940579, + "learning_rate": 1.871809859510368e-05, + "loss": 1.0567, "step": 3996 }, { - "epoch": 0.8381211994128748, - "grad_norm": 6.764726114907841, - "learning_rate": 1.6912870258459927e-05, - "loss": 1.6502, + "epoch": 0.5643885907935612, + "grad_norm": 3.8182239832595153, + "learning_rate": 1.8717351749009342e-05, + "loss": 0.9807, "step": 3997 }, { - "epoch": 0.8383308869784022, - "grad_norm": 5.634243443455935, - "learning_rate": 1.6911234242981082e-05, - "loss": 2.0902, + "epoch": 0.5645297938435471, + "grad_norm": 3.6452521855172417, + "learning_rate": 1.8716604700327516e-05, + "loss": 1.1571, "step": 3998 }, { - "epoch": 0.8385405745439295, - "grad_norm": 6.549890450228507, - "learning_rate": 1.6909597873289495e-05, - "loss": 1.6364, + "epoch": 0.564670996893533, + "grad_norm": 3.6015695462751625, + "learning_rate": 1.8715857449075558e-05, + "loss": 0.945, "step": 3999 }, { - "epoch": 0.8387502621094569, - "grad_norm": 6.183494656841771, - "learning_rate": 1.6907961149469034e-05, - "loss": 1.602, + "epoch": 0.5648121999435188, + "grad_norm": 3.6773534798326635, + "learning_rate": 1.8715109995270836e-05, + "loss": 1.1845, "step": 4000 }, { - "epoch": 0.8389599496749843, - "grad_norm": 7.217535730101063, - "learning_rate": 1.690632407160358e-05, - "loss": 1.9851, + "epoch": 0.5649534029935046, + "grad_norm": 4.15392292033527, + "learning_rate": 1.8714362338930724e-05, + "loss": 1.2354, "step": 4001 }, { - "epoch": 0.8391696372405116, - "grad_norm": 5.9332054416389015, - "learning_rate": 1.6904686639777042e-05, - "loss": 1.5317, + "epoch": 0.5650946060434905, + "grad_norm": 3.6891643024126273, + "learning_rate": 1.8713614480072594e-05, + "loss": 1.1217, "step": 4002 }, { - "epoch": 0.839379324806039, - "grad_norm": 6.715253099310353, - "learning_rate": 1.6903048854073335e-05, - "loss": 1.8186, + "epoch": 0.5652358090934764, + "grad_norm": 4.196458051514103, + "learning_rate": 1.871286641871383e-05, + "loss": 1.0469, "step": 4003 }, { - "epoch": 0.8395890123715664, - "grad_norm": 6.753639254937967, - "learning_rate": 1.6901410714576406e-05, - "loss": 1.7341, + "epoch": 0.5653770121434623, + "grad_norm": 3.3708880991040098, + "learning_rate": 1.8712118154871808e-05, + "loss": 1.0082, "step": 4004 }, { - "epoch": 0.8397986999370938, - "grad_norm": 6.1121809819360955, - "learning_rate": 1.6899772221370205e-05, - "loss": 2.0645, + "epoch": 0.5655182151934481, + "grad_norm": 3.5647183047676654, + "learning_rate": 1.8711369688563925e-05, + "loss": 1.0219, "step": 4005 }, { - "epoch": 0.8400083875026211, - "grad_norm": 6.906744394471587, - "learning_rate": 1.6898133374538708e-05, - "loss": 1.9306, + "epoch": 0.565659418243434, + "grad_norm": 3.658344839046683, + "learning_rate": 1.871062101980757e-05, + "loss": 1.0328, "step": 4006 }, { - "epoch": 0.8402180750681485, - "grad_norm": 6.477071918635669, - "learning_rate": 1.6896494174165915e-05, - "loss": 1.8979, + "epoch": 0.5658006212934199, + "grad_norm": 3.7183317548085864, + "learning_rate": 1.870987214862015e-05, + "loss": 1.0559, "step": 4007 }, { - "epoch": 0.8404277626336758, - "grad_norm": 7.576741766780498, - "learning_rate": 1.689485462033583e-05, - "loss": 1.7563, + "epoch": 0.5659418243434058, + "grad_norm": 3.6854313763121973, + "learning_rate": 1.870912307501906e-05, + "loss": 1.1209, "step": 4008 }, { - "epoch": 0.8406374501992032, - "grad_norm": 7.50509130129985, - "learning_rate": 1.6893214713132493e-05, - "loss": 1.9493, + "epoch": 0.5660830273933917, + "grad_norm": 4.1700625993183715, + "learning_rate": 1.8708373799021705e-05, + "loss": 1.2296, "step": 4009 }, { - "epoch": 0.8408471377647305, - "grad_norm": 7.121248469291038, - "learning_rate": 1.6891574452639945e-05, - "loss": 1.8676, + "epoch": 0.5662242304433776, + "grad_norm": 3.569878134877533, + "learning_rate": 1.870762432064551e-05, + "loss": 1.0739, "step": 4010 }, { - "epoch": 0.8410568253302579, - "grad_norm": 7.452978984434102, - "learning_rate": 1.688993383894225e-05, - "loss": 1.8357, + "epoch": 0.5663654334933634, + "grad_norm": 4.714528583814792, + "learning_rate": 1.8706874639907887e-05, + "loss": 1.3346, "step": 4011 }, { - "epoch": 0.8412665128957852, - "grad_norm": 6.115838218404241, - "learning_rate": 1.68882928721235e-05, - "loss": 1.6665, + "epoch": 0.5665066365433493, + "grad_norm": 4.456733139042029, + "learning_rate": 1.8706124756826255e-05, + "loss": 1.2945, "step": 4012 }, { - "epoch": 0.8414762004613127, - "grad_norm": 6.181234890005197, - "learning_rate": 1.688665155226779e-05, - "loss": 1.7065, + "epoch": 0.5666478395933352, + "grad_norm": 2.8165700077283486, + "learning_rate": 1.8705374671418048e-05, + "loss": 0.7626, "step": 4013 }, { - "epoch": 0.84168588802684, - "grad_norm": 6.813608444389689, - "learning_rate": 1.6885009879459245e-05, - "loss": 1.867, + "epoch": 0.5667890426433211, + "grad_norm": 4.4521736785944706, + "learning_rate": 1.8704624383700686e-05, + "loss": 1.1991, "step": 4014 }, { - "epoch": 0.8418955755923674, - "grad_norm": 6.522394137976414, - "learning_rate": 1.6883367853782e-05, - "loss": 1.7219, + "epoch": 0.566930245693307, + "grad_norm": 4.3107219518221465, + "learning_rate": 1.8703873893691617e-05, + "loss": 1.0934, "step": 4015 }, { - "epoch": 0.8421052631578947, - "grad_norm": 6.85252620738763, - "learning_rate": 1.688172547532022e-05, - "loss": 1.8185, + "epoch": 0.5670714487432928, + "grad_norm": 3.3212234512839416, + "learning_rate": 1.8703123201408277e-05, + "loss": 0.8906, "step": 4016 }, { - "epoch": 0.8423149507234221, - "grad_norm": 7.209103110221067, - "learning_rate": 1.6880082744158067e-05, - "loss": 1.7135, + "epoch": 0.5672126517932787, + "grad_norm": 3.486644299402663, + "learning_rate": 1.8702372306868113e-05, + "loss": 0.9148, "step": 4017 }, { - "epoch": 0.8425246382889494, - "grad_norm": 6.931465671789631, - "learning_rate": 1.687843966037974e-05, - "loss": 1.789, + "epoch": 0.5673538548432646, + "grad_norm": 4.497917731164971, + "learning_rate": 1.8701621210088574e-05, + "loss": 1.3779, "step": 4018 }, { - "epoch": 0.8427343258544768, - "grad_norm": 6.916898807033722, - "learning_rate": 1.6876796224069457e-05, - "loss": 2.0119, + "epoch": 0.5674950578932505, + "grad_norm": 4.267107338949943, + "learning_rate": 1.8700869911087115e-05, + "loss": 1.0385, "step": 4019 }, { - "epoch": 0.8429440134200042, - "grad_norm": 7.305549369292113, - "learning_rate": 1.6875152435311437e-05, - "loss": 1.7291, + "epoch": 0.5676362609432364, + "grad_norm": 4.0654836341133285, + "learning_rate": 1.8700118409881198e-05, + "loss": 1.1222, "step": 4020 }, { - "epoch": 0.8431537009855316, - "grad_norm": 7.192916222582827, - "learning_rate": 1.687350829418993e-05, - "loss": 1.6291, + "epoch": 0.5677774639932223, + "grad_norm": 3.657433983718111, + "learning_rate": 1.8699366706488287e-05, + "loss": 0.8517, "step": 4021 }, { - "epoch": 0.843363388551059, - "grad_norm": 7.396340310351018, - "learning_rate": 1.68718638007892e-05, - "loss": 1.8393, + "epoch": 0.5679186670432081, + "grad_norm": 3.221267266718776, + "learning_rate": 1.8698614800925853e-05, + "loss": 1.1548, "step": 4022 }, { - "epoch": 0.8435730761165863, - "grad_norm": 7.770199906827693, - "learning_rate": 1.687021895519353e-05, - "loss": 1.931, + "epoch": 0.568059870093194, + "grad_norm": 3.5436983626221457, + "learning_rate": 1.8697862693211363e-05, + "loss": 0.9938, "step": 4023 }, { - "epoch": 0.8437827636821137, - "grad_norm": 6.771345182781149, - "learning_rate": 1.686857375748723e-05, - "loss": 1.9987, + "epoch": 0.5682010731431799, + "grad_norm": 3.7820638060943956, + "learning_rate": 1.86971103833623e-05, + "loss": 1.0047, "step": 4024 }, { - "epoch": 0.843992451247641, - "grad_norm": 6.369968201595189, - "learning_rate": 1.6866928207754606e-05, - "loss": 1.8065, + "epoch": 0.5683422761931658, + "grad_norm": 3.210789657144361, + "learning_rate": 1.869635787139615e-05, + "loss": 0.9722, "step": 4025 }, { - "epoch": 0.8442021388131684, - "grad_norm": 6.939187416800364, - "learning_rate": 1.6865282306080004e-05, - "loss": 1.5416, + "epoch": 0.5684834792431517, + "grad_norm": 4.125414082165563, + "learning_rate": 1.8695605157330398e-05, + "loss": 1.1898, "step": 4026 }, { - "epoch": 0.8444118263786957, - "grad_norm": 6.160319056442851, - "learning_rate": 1.6863636052547777e-05, - "loss": 1.6808, + "epoch": 0.5686246822931376, + "grad_norm": 3.2006671030063605, + "learning_rate": 1.869485224118254e-05, + "loss": 1.067, "step": 4027 }, { - "epoch": 0.8446215139442231, - "grad_norm": 6.671515785380993, - "learning_rate": 1.6861989447242292e-05, - "loss": 1.9519, + "epoch": 0.5687658853431234, + "grad_norm": 4.089174053233287, + "learning_rate": 1.869409912297007e-05, + "loss": 1.0943, "step": 4028 }, { - "epoch": 0.8448312015097504, - "grad_norm": 5.624884097408585, - "learning_rate": 1.6860342490247955e-05, - "loss": 1.3278, + "epoch": 0.5689070883931093, + "grad_norm": 3.4092930254541267, + "learning_rate": 1.869334580271049e-05, + "loss": 0.9824, "step": 4029 }, { - "epoch": 0.8450408890752779, - "grad_norm": 6.1733010812894324, - "learning_rate": 1.685869518164916e-05, - "loss": 1.9412, + "epoch": 0.5690482914430952, + "grad_norm": 4.35356074344431, + "learning_rate": 1.8692592280421305e-05, + "loss": 1.212, "step": 4030 }, { - "epoch": 0.8452505766408052, - "grad_norm": 5.765314732838086, - "learning_rate": 1.6857047521530346e-05, - "loss": 1.6069, + "epoch": 0.5691894944930811, + "grad_norm": 3.489860911727647, + "learning_rate": 1.8691838556120034e-05, + "loss": 1.1868, "step": 4031 }, { - "epoch": 0.8454602642063326, - "grad_norm": 6.374809020935715, - "learning_rate": 1.685539950997595e-05, - "loss": 1.7501, + "epoch": 0.569330697543067, + "grad_norm": 3.361720575529464, + "learning_rate": 1.8691084629824186e-05, + "loss": 0.9423, "step": 4032 }, { - "epoch": 0.8456699517718599, - "grad_norm": 5.9228495312441165, - "learning_rate": 1.685375114707044e-05, - "loss": 1.7501, + "epoch": 0.5694719005930529, + "grad_norm": 3.2236889991097093, + "learning_rate": 1.8690330501551286e-05, + "loss": 0.8294, "step": 4033 }, { - "epoch": 0.8458796393373873, - "grad_norm": 7.218462249616903, - "learning_rate": 1.6852102432898298e-05, - "loss": 2.0852, + "epoch": 0.5696131036430387, + "grad_norm": 3.556290011979793, + "learning_rate": 1.868957617131886e-05, + "loss": 0.8222, "step": 4034 }, { - "epoch": 0.8460893269029146, - "grad_norm": 7.838262731900389, - "learning_rate": 1.685045336754402e-05, - "loss": 2.3111, + "epoch": 0.5697543066930245, + "grad_norm": 3.7321653029750155, + "learning_rate": 1.8688821639144432e-05, + "loss": 1.0997, "step": 4035 }, { - "epoch": 0.846299014468442, - "grad_norm": 6.747390285628549, - "learning_rate": 1.684880395109213e-05, - "loss": 1.8842, + "epoch": 0.5698955097430104, + "grad_norm": 3.8130805488870547, + "learning_rate": 1.8688066905045545e-05, + "loss": 1.015, "step": 4036 }, { - "epoch": 0.8465087020339694, - "grad_norm": 5.644998930329347, - "learning_rate": 1.684715418362715e-05, - "loss": 1.7946, + "epoch": 0.5700367127929963, + "grad_norm": 3.8600953399708007, + "learning_rate": 1.8687311969039735e-05, + "loss": 1.0199, "step": 4037 }, { - "epoch": 0.8467183895994967, - "grad_norm": 6.49390528388707, - "learning_rate": 1.684550406523365e-05, - "loss": 1.8395, + "epoch": 0.5701779158429822, + "grad_norm": 4.134047799246088, + "learning_rate": 1.8686556831144545e-05, + "loss": 1.1477, "step": 4038 }, { - "epoch": 0.8469280771650242, - "grad_norm": 6.15223090537594, - "learning_rate": 1.6843853595996188e-05, - "loss": 1.7943, + "epoch": 0.570319118892968, + "grad_norm": 3.9964222690492197, + "learning_rate": 1.8685801491377527e-05, + "loss": 1.1441, "step": 4039 }, { - "epoch": 0.8471377647305515, - "grad_norm": 6.118079923338906, - "learning_rate": 1.6842202775999363e-05, - "loss": 1.6471, + "epoch": 0.5704603219429539, + "grad_norm": 3.407155980585171, + "learning_rate": 1.8685045949756232e-05, + "loss": 1.1131, "step": 4040 }, { - "epoch": 0.8473474522960789, - "grad_norm": 6.45871309394736, - "learning_rate": 1.6840551605327778e-05, - "loss": 1.7374, + "epoch": 0.5706015249929398, + "grad_norm": 3.6004508519496996, + "learning_rate": 1.868429020629822e-05, + "loss": 1.1093, "step": 4041 }, { - "epoch": 0.8475571398616062, - "grad_norm": 5.340383660209119, - "learning_rate": 1.6838900084066057e-05, - "loss": 1.5416, + "epoch": 0.5707427280429257, + "grad_norm": 3.294933598654903, + "learning_rate": 1.8683534261021058e-05, + "loss": 0.9021, "step": 4042 }, { - "epoch": 0.8477668274271336, - "grad_norm": 5.431163753877188, - "learning_rate": 1.683724821229885e-05, - "loss": 2.0014, + "epoch": 0.5708839310929116, + "grad_norm": 3.607231252704722, + "learning_rate": 1.8682778113942306e-05, + "loss": 0.9316, "step": 4043 }, { - "epoch": 0.8479765149926609, - "grad_norm": 5.786637531431312, - "learning_rate": 1.683559599011081e-05, - "loss": 1.7262, + "epoch": 0.5710251341428975, + "grad_norm": 3.310538084518935, + "learning_rate": 1.8682021765079537e-05, + "loss": 0.9706, "step": 4044 }, { - "epoch": 0.8481862025581883, - "grad_norm": 5.728800571441821, - "learning_rate": 1.683394341758662e-05, - "loss": 1.6645, + "epoch": 0.5711663371928833, + "grad_norm": 4.548667763013974, + "learning_rate": 1.868126521445034e-05, + "loss": 1.3445, "step": 4045 }, { - "epoch": 0.8483958901237156, - "grad_norm": 5.84242924625343, - "learning_rate": 1.6832290494810973e-05, - "loss": 1.6721, + "epoch": 0.5713075402428692, + "grad_norm": 4.09011477774083, + "learning_rate": 1.8680508462072282e-05, + "loss": 1.047, "step": 4046 }, { - "epoch": 0.848605577689243, - "grad_norm": 5.762757560058926, - "learning_rate": 1.6830637221868594e-05, - "loss": 1.5264, + "epoch": 0.5714487432928551, + "grad_norm": 3.661998994232539, + "learning_rate": 1.867975150796296e-05, + "loss": 1.0011, "step": 4047 }, { - "epoch": 0.8488152652547704, - "grad_norm": 6.494943925464844, - "learning_rate": 1.682898359884421e-05, - "loss": 1.4834, + "epoch": 0.571589946342841, + "grad_norm": 3.3706059459380215, + "learning_rate": 1.867899435213996e-05, + "loss": 1.0994, "step": 4048 }, { - "epoch": 0.8490249528202978, - "grad_norm": 6.178539836281641, - "learning_rate": 1.682732962582257e-05, - "loss": 2.0168, + "epoch": 0.5717311493928269, + "grad_norm": 3.7157762270720895, + "learning_rate": 1.8678236994620878e-05, + "loss": 0.9293, "step": 4049 }, { - "epoch": 0.8492346403858251, - "grad_norm": 7.5972464468103595, - "learning_rate": 1.6825675302888447e-05, - "loss": 1.8575, + "epoch": 0.5718723524428128, + "grad_norm": 4.124502626145871, + "learning_rate": 1.867747943542332e-05, + "loss": 1.0396, "step": 4050 }, { - "epoch": 0.8494443279513525, - "grad_norm": 5.69115397775228, - "learning_rate": 1.6824020630126625e-05, - "loss": 1.5521, + "epoch": 0.5720135554927986, + "grad_norm": 3.6139623445028577, + "learning_rate": 1.8676721674564884e-05, + "loss": 0.9247, "step": 4051 }, { - "epoch": 0.8496540155168798, - "grad_norm": 7.801675329561413, - "learning_rate": 1.682236560762191e-05, - "loss": 1.9698, + "epoch": 0.5721547585427845, + "grad_norm": 4.076364476169935, + "learning_rate": 1.8675963712063184e-05, + "loss": 1.2324, "step": 4052 }, { - "epoch": 0.8498637030824072, - "grad_norm": 5.769053570789696, - "learning_rate": 1.6820710235459127e-05, - "loss": 1.5013, + "epoch": 0.5722959615927704, + "grad_norm": 3.4124404132729564, + "learning_rate": 1.8675205547935836e-05, + "loss": 0.991, "step": 4053 }, { - "epoch": 0.8500733906479345, - "grad_norm": 6.546824269564784, - "learning_rate": 1.681905451372311e-05, - "loss": 1.8828, + "epoch": 0.5724371646427563, + "grad_norm": 3.1382017917374117, + "learning_rate": 1.8674447182200457e-05, + "loss": 0.7637, "step": 4054 }, { - "epoch": 0.8502830782134619, - "grad_norm": 6.04430494919629, - "learning_rate": 1.6817398442498723e-05, - "loss": 1.6741, + "epoch": 0.5725783676927422, + "grad_norm": 4.066103826686601, + "learning_rate": 1.8673688614874668e-05, + "loss": 1.1621, "step": 4055 }, { - "epoch": 0.8504927657789894, - "grad_norm": 6.193996276395604, - "learning_rate": 1.6815742021870845e-05, - "loss": 1.6034, + "epoch": 0.5727195707427281, + "grad_norm": 3.5593029798153073, + "learning_rate": 1.8672929845976106e-05, + "loss": 1.0452, "step": 4056 }, { - "epoch": 0.8507024533445167, - "grad_norm": 6.5374210177670955, - "learning_rate": 1.6814085251924364e-05, - "loss": 1.5482, + "epoch": 0.5728607737927139, + "grad_norm": 3.483338870226903, + "learning_rate": 1.86721708755224e-05, + "loss": 0.9948, "step": 4057 }, { - "epoch": 0.8509121409100441, - "grad_norm": 6.514336917218364, - "learning_rate": 1.68124281327442e-05, - "loss": 1.6008, + "epoch": 0.5730019768426998, + "grad_norm": 4.148003201983862, + "learning_rate": 1.8671411703531186e-05, + "loss": 1.1782, "step": 4058 }, { - "epoch": 0.8511218284755714, - "grad_norm": 6.7995146920074685, - "learning_rate": 1.6810770664415276e-05, - "loss": 1.8645, + "epoch": 0.5731431798926857, + "grad_norm": 6.3901148085079935, + "learning_rate": 1.867065233002011e-05, + "loss": 1.1194, "step": 4059 }, { - "epoch": 0.8513315160410988, - "grad_norm": 6.312212077918305, - "learning_rate": 1.680911284702254e-05, - "loss": 2.003, + "epoch": 0.5732843829426716, + "grad_norm": 4.240732582520416, + "learning_rate": 1.8669892755006816e-05, + "loss": 1.1402, "step": 4060 }, { - "epoch": 0.8515412036066261, - "grad_norm": 6.688409603756761, - "learning_rate": 1.6807454680650966e-05, - "loss": 1.8913, + "epoch": 0.5734255859926575, + "grad_norm": 4.671011318299797, + "learning_rate": 1.866913297850896e-05, + "loss": 1.1747, "step": 4061 }, { - "epoch": 0.8517508911721535, - "grad_norm": 6.369184361389251, - "learning_rate": 1.6805796165385528e-05, - "loss": 1.8479, + "epoch": 0.5735667890426434, + "grad_norm": 3.433875078800676, + "learning_rate": 1.8668373000544197e-05, + "loss": 0.955, "step": 4062 }, { - "epoch": 0.8519605787376808, - "grad_norm": 7.5700002444483285, - "learning_rate": 1.6804137301311234e-05, - "loss": 2.1044, + "epoch": 0.5737079920926292, + "grad_norm": 3.9755119151099567, + "learning_rate": 1.8667612821130192e-05, + "loss": 0.855, "step": 4063 }, { - "epoch": 0.8521702663032082, - "grad_norm": 6.817948679081982, - "learning_rate": 1.68024780885131e-05, - "loss": 1.5958, + "epoch": 0.5738491951426151, + "grad_norm": 3.66702021349931, + "learning_rate": 1.8666852440284607e-05, + "loss": 1.1501, "step": 4064 }, { - "epoch": 0.8523799538687356, - "grad_norm": 7.506265727778259, - "learning_rate": 1.680081852707617e-05, - "loss": 1.727, + "epoch": 0.573990398192601, + "grad_norm": 3.833889190963788, + "learning_rate": 1.8666091858025113e-05, + "loss": 0.9233, "step": 4065 }, { - "epoch": 0.852589641434263, - "grad_norm": 7.280909309786979, - "learning_rate": 1.6799158617085493e-05, - "loss": 2.0798, + "epoch": 0.5741316012425869, + "grad_norm": 3.6927183979369844, + "learning_rate": 1.8665331074369385e-05, + "loss": 0.9356, "step": 4066 }, { - "epoch": 0.8527993289997903, - "grad_norm": 6.67376062868688, - "learning_rate": 1.6797498358626144e-05, - "loss": 1.3141, + "epoch": 0.5742728042925728, + "grad_norm": 3.563584328804171, + "learning_rate": 1.8664570089335107e-05, + "loss": 1.0223, "step": 4067 }, { - "epoch": 0.8530090165653177, - "grad_norm": 6.738049115867896, - "learning_rate": 1.6795837751783215e-05, - "loss": 1.7618, + "epoch": 0.5744140073425587, + "grad_norm": 3.827552698585337, + "learning_rate": 1.8663808902939965e-05, + "loss": 0.9718, "step": 4068 }, { - "epoch": 0.853218704130845, - "grad_norm": 5.461867226881196, - "learning_rate": 1.6794176796641816e-05, - "loss": 1.3658, + "epoch": 0.5745552103925444, + "grad_norm": 3.0354040275615426, + "learning_rate": 1.866304751520164e-05, + "loss": 0.757, "step": 4069 }, { - "epoch": 0.8534283916963724, - "grad_norm": 6.064585966998766, - "learning_rate": 1.679251549328707e-05, - "loss": 1.2837, + "epoch": 0.5746964134425303, + "grad_norm": 4.124308887238903, + "learning_rate": 1.8662285926137837e-05, + "loss": 0.9576, "step": 4070 }, { - "epoch": 0.8536380792618997, - "grad_norm": 5.288320535297466, - "learning_rate": 1.679085384180412e-05, - "loss": 1.7139, + "epoch": 0.5748376164925162, + "grad_norm": 3.748082007878318, + "learning_rate": 1.8661524135766245e-05, + "loss": 1.0634, "step": 4071 }, { - "epoch": 0.8538477668274271, - "grad_norm": 7.042558412663411, - "learning_rate": 1.678919184227814e-05, - "loss": 1.6952, + "epoch": 0.5749788195425021, + "grad_norm": 3.9242736955462334, + "learning_rate": 1.866076214410458e-05, + "loss": 1.1096, "step": 4072 }, { - "epoch": 0.8540574543929546, - "grad_norm": 5.768677494000512, - "learning_rate": 1.67875294947943e-05, - "loss": 1.7048, + "epoch": 0.575120022592488, + "grad_norm": 3.5970199025641625, + "learning_rate": 1.8659999951170537e-05, + "loss": 0.9973, "step": 4073 }, { - "epoch": 0.8542671419584819, - "grad_norm": 6.605477401303626, - "learning_rate": 1.6785866799437802e-05, - "loss": 1.8234, + "epoch": 0.5752612256424738, + "grad_norm": 3.351985571872935, + "learning_rate": 1.8659237556981836e-05, + "loss": 0.9583, "step": 4074 }, { - "epoch": 0.8544768295240093, - "grad_norm": 6.333116552791403, - "learning_rate": 1.6784203756293858e-05, - "loss": 2.0078, + "epoch": 0.5754024286924597, + "grad_norm": 3.7299514533190616, + "learning_rate": 1.8658474961556196e-05, + "loss": 1.0378, "step": 4075 }, { - "epoch": 0.8546865170895366, - "grad_norm": 6.347038795224373, - "learning_rate": 1.6782540365447706e-05, - "loss": 1.9784, + "epoch": 0.5755436317424456, + "grad_norm": 3.3691051032372332, + "learning_rate": 1.8657712164911337e-05, + "loss": 0.9757, "step": 4076 }, { - "epoch": 0.854896204655064, - "grad_norm": 5.442063158279843, - "learning_rate": 1.67808766269846e-05, - "loss": 1.5329, + "epoch": 0.5756848347924315, + "grad_norm": 4.269007724904411, + "learning_rate": 1.8656949167064985e-05, + "loss": 1.2039, "step": 4077 }, { - "epoch": 0.8551058922205913, - "grad_norm": 6.584376713015203, - "learning_rate": 1.6779212540989802e-05, - "loss": 1.8931, + "epoch": 0.5758260378424174, + "grad_norm": 3.9688942324797383, + "learning_rate": 1.865618596803487e-05, + "loss": 1.0074, "step": 4078 }, { - "epoch": 0.8553155797861187, - "grad_norm": 6.132185835620608, - "learning_rate": 1.6777548107548605e-05, - "loss": 1.3404, + "epoch": 0.5759672408924033, + "grad_norm": 3.8733222221692354, + "learning_rate": 1.865542256783874e-05, + "loss": 1.0798, "step": 4079 }, { - "epoch": 0.855525267351646, - "grad_norm": 5.875433445790861, - "learning_rate": 1.677588332674631e-05, - "loss": 1.702, + "epoch": 0.5761084439423891, + "grad_norm": 3.5535190444316527, + "learning_rate": 1.865465896649432e-05, + "loss": 1.0735, "step": 4080 }, { - "epoch": 0.8557349549171734, - "grad_norm": 6.516010102519589, - "learning_rate": 1.6774218198668246e-05, - "loss": 1.7293, + "epoch": 0.576249646992375, + "grad_norm": 3.986588276414098, + "learning_rate": 1.8653895164019366e-05, + "loss": 1.0561, "step": 4081 }, { - "epoch": 0.8559446424827007, - "grad_norm": 7.416113122299674, - "learning_rate": 1.677255272339975e-05, - "loss": 1.6949, + "epoch": 0.5763908500423609, + "grad_norm": 3.7592889064791946, + "learning_rate": 1.8653131160431622e-05, + "loss": 1.1176, "step": 4082 }, { - "epoch": 0.8561543300482282, - "grad_norm": 6.27239336850154, - "learning_rate": 1.6770886901026175e-05, - "loss": 1.7851, + "epoch": 0.5765320530923468, + "grad_norm": 3.925079357411468, + "learning_rate": 1.865236695574885e-05, + "loss": 1.1441, "step": 4083 }, { - "epoch": 0.8563640176137555, - "grad_norm": 6.645256940827824, - "learning_rate": 1.676922073163291e-05, - "loss": 1.8578, + "epoch": 0.5766732561423327, + "grad_norm": 3.6549853293507364, + "learning_rate": 1.865160254998881e-05, + "loss": 1.0528, "step": 4084 }, { - "epoch": 0.8565737051792829, - "grad_norm": 8.519694567238316, - "learning_rate": 1.6767554215305336e-05, - "loss": 1.9412, + "epoch": 0.5768144591923186, + "grad_norm": 4.72273554505084, + "learning_rate": 1.865083794316926e-05, + "loss": 1.3406, "step": 4085 }, { - "epoch": 0.8567833927448102, - "grad_norm": 8.558611013620533, - "learning_rate": 1.6765887352128875e-05, - "loss": 1.7994, + "epoch": 0.5769556622423044, + "grad_norm": 3.758135342394704, + "learning_rate": 1.8650073135307973e-05, + "loss": 1.1384, "step": 4086 }, { - "epoch": 0.8569930803103376, - "grad_norm": 7.269436595798622, - "learning_rate": 1.676422014218895e-05, - "loss": 1.6119, + "epoch": 0.5770968652922903, + "grad_norm": 3.9927339524153296, + "learning_rate": 1.864930812642272e-05, + "loss": 1.1253, "step": 4087 }, { - "epoch": 0.8572027678758649, - "grad_norm": 8.358283549429638, - "learning_rate": 1.6762552585571013e-05, - "loss": 1.8244, + "epoch": 0.5772380683422762, + "grad_norm": 3.9828418644034222, + "learning_rate": 1.8648542916531283e-05, + "loss": 0.9463, "step": 4088 }, { - "epoch": 0.8574124554413923, - "grad_norm": 7.274918469975, - "learning_rate": 1.6760884682360524e-05, - "loss": 1.6623, + "epoch": 0.5773792713922621, + "grad_norm": 4.477233367179409, + "learning_rate": 1.864777750565144e-05, + "loss": 1.0366, "step": 4089 }, { - "epoch": 0.8576221430069196, - "grad_norm": 6.267100562341706, - "learning_rate": 1.675921643264297e-05, - "loss": 1.5391, + "epoch": 0.577520474442248, + "grad_norm": 4.6330626747561485, + "learning_rate": 1.8647011893800987e-05, + "loss": 1.3283, "step": 4090 }, { - "epoch": 0.857831830572447, - "grad_norm": 7.294849565446345, - "learning_rate": 1.6757547836503852e-05, - "loss": 1.6781, + "epoch": 0.5776616774922338, + "grad_norm": 4.523402042030172, + "learning_rate": 1.8646246080997712e-05, + "loss": 1.3327, "step": 4091 }, { - "epoch": 0.8580415181379745, - "grad_norm": 6.300140674071617, - "learning_rate": 1.6755878894028686e-05, - "loss": 1.758, + "epoch": 0.5778028805422197, + "grad_norm": 3.1236032032378027, + "learning_rate": 1.864548006725941e-05, + "loss": 0.8357, "step": 4092 }, { - "epoch": 0.8582512057035018, - "grad_norm": 6.416570693730129, - "learning_rate": 1.675420960530301e-05, - "loss": 1.7945, + "epoch": 0.5779440835922056, + "grad_norm": 3.721403427530213, + "learning_rate": 1.8644713852603886e-05, + "loss": 1.0216, "step": 4093 }, { - "epoch": 0.8584608932690292, - "grad_norm": 7.767231207369511, - "learning_rate": 1.6752539970412372e-05, - "loss": 1.684, + "epoch": 0.5780852866421915, + "grad_norm": 3.7577475616156506, + "learning_rate": 1.8643947437048944e-05, + "loss": 0.9567, "step": 4094 }, { - "epoch": 0.8586705808345565, - "grad_norm": 8.243118646417733, - "learning_rate": 1.6750869989442355e-05, - "loss": 2.0537, + "epoch": 0.5782264896921774, + "grad_norm": 4.339273507854912, + "learning_rate": 1.8643180820612397e-05, + "loss": 0.8988, "step": 4095 }, { - "epoch": 0.8588802684000839, - "grad_norm": 6.190850573492643, - "learning_rate": 1.674919966247854e-05, - "loss": 1.8319, + "epoch": 0.5783676927421633, + "grad_norm": 4.325405401469997, + "learning_rate": 1.8642414003312063e-05, + "loss": 1.0157, "step": 4096 }, { - "epoch": 0.8590899559656112, - "grad_norm": 6.042247186646617, - "learning_rate": 1.6747528989606538e-05, - "loss": 1.7628, + "epoch": 0.5785088957921491, + "grad_norm": 3.284602683304248, + "learning_rate": 1.864164698516576e-05, + "loss": 0.7211, "step": 4097 }, { - "epoch": 0.8592996435311386, - "grad_norm": 6.424365228216792, - "learning_rate": 1.674585797091197e-05, - "loss": 1.6779, + "epoch": 0.578650098842135, + "grad_norm": 3.638482015522909, + "learning_rate": 1.8640879766191305e-05, + "loss": 1.0067, "step": 4098 }, { - "epoch": 0.8595093310966659, - "grad_norm": 6.080976025170756, - "learning_rate": 1.6744186606480482e-05, - "loss": 2.0233, + "epoch": 0.5787913018921209, + "grad_norm": 4.296071967987287, + "learning_rate": 1.8640112346406545e-05, + "loss": 1.1891, "step": 4099 }, { - "epoch": 0.8597190186621934, - "grad_norm": 5.227385829929483, - "learning_rate": 1.6742514896397732e-05, - "loss": 1.3561, + "epoch": 0.5789325049421068, + "grad_norm": 4.901869242405439, + "learning_rate": 1.8639344725829302e-05, + "loss": 1.4346, "step": 4100 }, { - "epoch": 0.8599287062277207, - "grad_norm": 6.460993572808233, - "learning_rate": 1.67408428407494e-05, - "loss": 2.0457, + "epoch": 0.5790737079920927, + "grad_norm": 4.1041779440755315, + "learning_rate": 1.863857690447742e-05, + "loss": 1.1322, "step": 4101 }, { - "epoch": 0.8601383937932481, - "grad_norm": 7.001027589011534, - "learning_rate": 1.6739170439621184e-05, - "loss": 1.5379, + "epoch": 0.5792149110420786, + "grad_norm": 3.320669993704349, + "learning_rate": 1.863780888236874e-05, + "loss": 1.0015, "step": 4102 }, { - "epoch": 0.8603480813587754, - "grad_norm": 5.66578736761006, - "learning_rate": 1.6737497693098792e-05, - "loss": 1.764, + "epoch": 0.5793561140920643, + "grad_norm": 3.481158138883747, + "learning_rate": 1.8637040659521114e-05, + "loss": 0.9114, "step": 4103 }, { - "epoch": 0.8605577689243028, - "grad_norm": 6.535439473448954, - "learning_rate": 1.6735824601267956e-05, - "loss": 2.085, + "epoch": 0.5794973171420502, + "grad_norm": 3.2296475287976234, + "learning_rate": 1.863627223595239e-05, + "loss": 0.949, "step": 4104 }, { - "epoch": 0.8607674564898301, - "grad_norm": 5.602150166482043, - "learning_rate": 1.6734151164214426e-05, - "loss": 1.5367, + "epoch": 0.5796385201920361, + "grad_norm": 3.544018410815906, + "learning_rate": 1.863550361168043e-05, + "loss": 1.1104, "step": 4105 }, { - "epoch": 0.8609771440553575, - "grad_norm": 6.345825360859426, - "learning_rate": 1.6732477382023975e-05, - "loss": 1.5763, + "epoch": 0.579779723242022, + "grad_norm": 3.70059130708354, + "learning_rate": 1.8634734786723096e-05, + "loss": 1.036, "step": 4106 }, { - "epoch": 0.8611868316208848, - "grad_norm": 6.197337714905158, - "learning_rate": 1.6730803254782375e-05, - "loss": 1.9616, + "epoch": 0.5799209262920079, + "grad_norm": 3.9310511314850864, + "learning_rate": 1.8633965761098255e-05, + "loss": 1.1319, "step": 4107 }, { - "epoch": 0.8613965191864122, - "grad_norm": 6.704681234172346, - "learning_rate": 1.672912878257544e-05, - "loss": 1.7714, + "epoch": 0.5800621293419937, + "grad_norm": 4.1680156749198085, + "learning_rate": 1.8633196534823778e-05, + "loss": 1.3732, "step": 4108 }, { - "epoch": 0.8616062067519397, - "grad_norm": 6.84404509215181, - "learning_rate": 1.6727453965488977e-05, - "loss": 1.8884, + "epoch": 0.5802033323919796, + "grad_norm": 3.547009320086699, + "learning_rate": 1.8632427107917542e-05, + "loss": 1.0682, "step": 4109 }, { - "epoch": 0.861815894317467, - "grad_norm": 5.759213560494223, - "learning_rate": 1.6725778803608836e-05, - "loss": 1.6507, + "epoch": 0.5803445354419655, + "grad_norm": 3.2804669783562694, + "learning_rate": 1.863165748039743e-05, + "loss": 0.8838, "step": 4110 }, { - "epoch": 0.8620255818829944, - "grad_norm": 6.43857197204098, - "learning_rate": 1.672410329702086e-05, - "loss": 1.8624, + "epoch": 0.5804857384919514, + "grad_norm": 3.7086539230506337, + "learning_rate": 1.8630887652281325e-05, + "loss": 1.0536, "step": 4111 }, { - "epoch": 0.8622352694485217, - "grad_norm": 6.617445157629611, - "learning_rate": 1.6722427445810935e-05, - "loss": 1.9023, + "epoch": 0.5806269415419373, + "grad_norm": 3.9473233903276848, + "learning_rate": 1.8630117623587116e-05, + "loss": 1.2209, "step": 4112 }, { - "epoch": 0.8624449570140491, - "grad_norm": 6.347460036252912, - "learning_rate": 1.6720751250064936e-05, - "loss": 2.1338, + "epoch": 0.5807681445919232, + "grad_norm": 3.1857044452577297, + "learning_rate": 1.86293473943327e-05, + "loss": 1.0196, "step": 4113 }, { - "epoch": 0.8626546445795764, - "grad_norm": 5.932569865442243, - "learning_rate": 1.6719074709868785e-05, - "loss": 1.6801, + "epoch": 0.580909347641909, + "grad_norm": 3.0909825332315592, + "learning_rate": 1.862857696453598e-05, + "loss": 0.9297, "step": 4114 }, { - "epoch": 0.8628643321451038, - "grad_norm": 6.403342864378055, - "learning_rate": 1.67173978253084e-05, - "loss": 1.4328, + "epoch": 0.5810505506918949, + "grad_norm": 3.9863603334242033, + "learning_rate": 1.8627806334214855e-05, + "loss": 1.0411, "step": 4115 }, { - "epoch": 0.8630740197106311, - "grad_norm": 5.670422689912964, - "learning_rate": 1.6715720596469723e-05, - "loss": 1.6441, + "epoch": 0.5811917537418808, + "grad_norm": 4.262376141816896, + "learning_rate": 1.862703550338724e-05, + "loss": 1.0171, "step": 4116 }, { - "epoch": 0.8632837072761586, - "grad_norm": 6.065047645298659, - "learning_rate": 1.6714043023438724e-05, - "loss": 2.0748, + "epoch": 0.5813329567918667, + "grad_norm": 3.9985089115793975, + "learning_rate": 1.8626264472071045e-05, + "loss": 1.0034, "step": 4117 }, { - "epoch": 0.8634933948416859, - "grad_norm": 5.580019512609728, - "learning_rate": 1.6712365106301368e-05, - "loss": 1.831, + "epoch": 0.5814741598418526, + "grad_norm": 4.583967078420294, + "learning_rate": 1.862549324028419e-05, + "loss": 1.0899, "step": 4118 }, { - "epoch": 0.8637030824072133, - "grad_norm": 6.417617044226933, - "learning_rate": 1.671068684514366e-05, - "loss": 1.8771, + "epoch": 0.5816153628918385, + "grad_norm": 3.8435110755123376, + "learning_rate": 1.8624721808044596e-05, + "loss": 1.0668, "step": 4119 }, { - "epoch": 0.8639127699727406, - "grad_norm": 5.883432892905316, - "learning_rate": 1.6709008240051615e-05, - "loss": 1.73, + "epoch": 0.5817565659418243, + "grad_norm": 4.50179586566256, + "learning_rate": 1.862395017537019e-05, + "loss": 1.1168, "step": 4120 }, { - "epoch": 0.864122457538268, - "grad_norm": 5.939082319613713, - "learning_rate": 1.670732929111126e-05, - "loss": 1.8674, + "epoch": 0.5818977689918102, + "grad_norm": 3.711670697973765, + "learning_rate": 1.8623178342278907e-05, + "loss": 0.86, "step": 4121 }, { - "epoch": 0.8643321451037953, - "grad_norm": 5.769408061985854, - "learning_rate": 1.6705649998408644e-05, - "loss": 1.8751, + "epoch": 0.5820389720417961, + "grad_norm": 3.3557356490790973, + "learning_rate": 1.8622406308788683e-05, + "loss": 0.9903, "step": 4122 }, { - "epoch": 0.8645418326693227, - "grad_norm": 5.939437567638129, - "learning_rate": 1.6703970362029836e-05, - "loss": 1.7763, + "epoch": 0.582180175091782, + "grad_norm": 3.5910738672981632, + "learning_rate": 1.862163407491746e-05, + "loss": 1.0231, "step": 4123 }, { - "epoch": 0.86475152023485, - "grad_norm": 6.864290094889894, - "learning_rate": 1.6702290382060923e-05, - "loss": 1.5722, + "epoch": 0.5823213781417679, + "grad_norm": 3.268798707228626, + "learning_rate": 1.862086164068319e-05, + "loss": 1.0015, "step": 4124 }, { - "epoch": 0.8649612078003774, - "grad_norm": 5.900771778414244, - "learning_rate": 1.6700610058588e-05, - "loss": 1.6329, + "epoch": 0.5824625811917538, + "grad_norm": 3.860641764477037, + "learning_rate": 1.8620089006103812e-05, + "loss": 1.0093, "step": 4125 }, { - "epoch": 0.8651708953659049, - "grad_norm": 6.397590505603625, - "learning_rate": 1.669892939169719e-05, - "loss": 1.5469, + "epoch": 0.5826037842417396, + "grad_norm": 4.298604029423862, + "learning_rate": 1.8619316171197292e-05, + "loss": 1.417, "step": 4126 }, { - "epoch": 0.8653805829314322, - "grad_norm": 6.4875939528809115, - "learning_rate": 1.6697248381474628e-05, - "loss": 1.8049, + "epoch": 0.5827449872917255, + "grad_norm": 3.471171173962099, + "learning_rate": 1.8618543135981584e-05, + "loss": 0.9174, "step": 4127 }, { - "epoch": 0.8655902704969596, - "grad_norm": 6.224547445409898, - "learning_rate": 1.6695567028006473e-05, - "loss": 1.7051, + "epoch": 0.5828861903417114, + "grad_norm": 4.841094139622842, + "learning_rate": 1.861776990047466e-05, + "loss": 0.896, "step": 4128 }, { - "epoch": 0.8657999580624869, - "grad_norm": 7.289401450254317, - "learning_rate": 1.6693885331378895e-05, - "loss": 1.9078, + "epoch": 0.5830273933916973, + "grad_norm": 3.5639196508723607, + "learning_rate": 1.861699646469448e-05, + "loss": 1.1188, "step": 4129 }, { - "epoch": 0.8660096456280143, - "grad_norm": 10.925646727562006, - "learning_rate": 1.6692203291678083e-05, - "loss": 1.874, + "epoch": 0.5831685964416832, + "grad_norm": 3.62673367153281, + "learning_rate": 1.8616222828659024e-05, + "loss": 1.0508, "step": 4130 }, { - "epoch": 0.8662193331935416, - "grad_norm": 5.957501322225471, - "learning_rate": 1.6690520908990246e-05, - "loss": 1.775, + "epoch": 0.5833097994916691, + "grad_norm": 4.39419136431228, + "learning_rate": 1.861544899238627e-05, + "loss": 1.4008, "step": 4131 }, { - "epoch": 0.866429020759069, - "grad_norm": 5.8350814328062155, - "learning_rate": 1.668883818340161e-05, - "loss": 1.4979, + "epoch": 0.5834510025416549, + "grad_norm": 3.496167935892804, + "learning_rate": 1.8614674955894206e-05, + "loss": 0.8996, "step": 4132 }, { - "epoch": 0.8666387083245963, - "grad_norm": 5.83128349361276, - "learning_rate": 1.6687155114998416e-05, - "loss": 1.8158, + "epoch": 0.5835922055916408, + "grad_norm": 3.2883180045074263, + "learning_rate": 1.8613900719200813e-05, + "loss": 1.1551, "step": 4133 }, { - "epoch": 0.8668483958901237, - "grad_norm": 6.214088500148104, - "learning_rate": 1.668547170386692e-05, - "loss": 1.6123, + "epoch": 0.5837334086416267, + "grad_norm": 3.6695319780858475, + "learning_rate": 1.8613126282324092e-05, + "loss": 0.9432, "step": 4134 }, { - "epoch": 0.867058083455651, - "grad_norm": 5.345833789676633, - "learning_rate": 1.6683787950093407e-05, - "loss": 1.6547, + "epoch": 0.5838746116916126, + "grad_norm": 4.0264931087430265, + "learning_rate": 1.8612351645282033e-05, + "loss": 1.0691, "step": 4135 }, { - "epoch": 0.8672677710211785, - "grad_norm": 6.295755869637431, - "learning_rate": 1.6682103853764168e-05, - "loss": 1.5095, + "epoch": 0.5840158147415985, + "grad_norm": 3.69807199678644, + "learning_rate": 1.861157680809264e-05, + "loss": 0.988, "step": 4136 }, { - "epoch": 0.8674774585867058, - "grad_norm": 6.917409491913672, - "learning_rate": 1.6680419414965516e-05, - "loss": 1.9964, + "epoch": 0.5841570177915842, + "grad_norm": 3.8498733605858204, + "learning_rate": 1.861080177077392e-05, + "loss": 1.044, "step": 4137 }, { - "epoch": 0.8676871461522332, - "grad_norm": 7.6354402584142544, - "learning_rate": 1.6678734633783783e-05, - "loss": 1.7531, + "epoch": 0.5842982208415701, + "grad_norm": 4.068201377829482, + "learning_rate": 1.861002653334389e-05, + "loss": 0.8888, "step": 4138 }, { - "epoch": 0.8678968337177605, - "grad_norm": 6.232529202633637, - "learning_rate": 1.667704951030532e-05, - "loss": 1.7983, + "epoch": 0.584439423891556, + "grad_norm": 3.772110771026714, + "learning_rate": 1.860925109582056e-05, + "loss": 0.8924, "step": 4139 }, { - "epoch": 0.8681065212832879, - "grad_norm": 6.085741507743412, - "learning_rate": 1.6675364044616486e-05, - "loss": 1.7778, + "epoch": 0.5845806269415419, + "grad_norm": 3.2289979354102494, + "learning_rate": 1.860847545822195e-05, + "loss": 0.7755, "step": 4140 }, { - "epoch": 0.8683162088488152, - "grad_norm": 7.259996606017139, - "learning_rate": 1.6673678236803662e-05, - "loss": 1.9203, + "epoch": 0.5847218299915278, + "grad_norm": 3.775070156987605, + "learning_rate": 1.860769962056609e-05, + "loss": 1.0759, "step": 4141 }, { - "epoch": 0.8685258964143426, - "grad_norm": 6.085718675555314, - "learning_rate": 1.667199208695326e-05, - "loss": 1.6597, + "epoch": 0.5848630330415137, + "grad_norm": 3.407625556966518, + "learning_rate": 1.8606923582871007e-05, + "loss": 0.8833, "step": 4142 }, { - "epoch": 0.8687355839798699, - "grad_norm": 6.70760805124773, - "learning_rate": 1.667030559515169e-05, - "loss": 1.7846, + "epoch": 0.5850042360914995, + "grad_norm": 3.9787719510572295, + "learning_rate": 1.8606147345154738e-05, + "loss": 1.0986, "step": 4143 }, { - "epoch": 0.8689452715453974, - "grad_norm": 7.066188953890235, - "learning_rate": 1.6668618761485385e-05, - "loss": 1.6989, + "epoch": 0.5851454391414854, + "grad_norm": 3.996893294690718, + "learning_rate": 1.8605370907435322e-05, + "loss": 1.0206, "step": 4144 }, { - "epoch": 0.8691549591109248, - "grad_norm": 6.444540020861225, - "learning_rate": 1.6666931586040808e-05, - "loss": 2.0588, + "epoch": 0.5852866421914713, + "grad_norm": 4.154648656553519, + "learning_rate": 1.8604594269730803e-05, + "loss": 0.9879, "step": 4145 }, { - "epoch": 0.8693646466764521, - "grad_norm": 6.910348421867361, - "learning_rate": 1.6665244068904424e-05, - "loss": 1.5115, + "epoch": 0.5854278452414572, + "grad_norm": 3.2572562237746685, + "learning_rate": 1.860381743205923e-05, + "loss": 0.9381, "step": 4146 }, { - "epoch": 0.8695743342419795, - "grad_norm": 6.370747231989148, - "learning_rate": 1.6663556210162715e-05, - "loss": 1.5102, + "epoch": 0.5855690482914431, + "grad_norm": 4.43992863158585, + "learning_rate": 1.8603040394438653e-05, + "loss": 1.2913, "step": 4147 }, { - "epoch": 0.8697840218075068, - "grad_norm": 7.13844079649498, - "learning_rate": 1.66618680099022e-05, - "loss": 2.0964, + "epoch": 0.585710251341429, + "grad_norm": 3.5999277703032075, + "learning_rate": 1.8602263156887136e-05, + "loss": 1.0901, "step": 4148 }, { - "epoch": 0.8699937093730342, - "grad_norm": 7.20659292614585, - "learning_rate": 1.666017946820939e-05, - "loss": 1.5944, + "epoch": 0.5858514543914148, + "grad_norm": 3.794153402856877, + "learning_rate": 1.8601485719422735e-05, + "loss": 0.9834, "step": 4149 }, { - "epoch": 0.8702033969385615, - "grad_norm": 6.446851760976773, - "learning_rate": 1.6658490585170834e-05, - "loss": 1.7971, + "epoch": 0.5859926574414007, + "grad_norm": 5.436880412803433, + "learning_rate": 1.8600708082063518e-05, + "loss": 1.06, "step": 4150 }, { - "epoch": 0.8704130845040889, - "grad_norm": 6.933176390484422, - "learning_rate": 1.6656801360873087e-05, - "loss": 1.985, + "epoch": 0.5861338604913866, + "grad_norm": 3.608668652080738, + "learning_rate": 1.8599930244827565e-05, + "loss": 1.0059, "step": 4151 }, { - "epoch": 0.8706227720696162, - "grad_norm": 7.044346910190448, - "learning_rate": 1.6655111795402724e-05, - "loss": 1.9448, + "epoch": 0.5862750635413725, + "grad_norm": 3.952991424870413, + "learning_rate": 1.8599152207732945e-05, + "loss": 0.9753, "step": 4152 }, { - "epoch": 0.8708324596351437, - "grad_norm": 6.666922888668738, - "learning_rate": 1.665342188884634e-05, - "loss": 1.8658, + "epoch": 0.5864162665913584, + "grad_norm": 3.561323589778944, + "learning_rate": 1.859837397079774e-05, + "loss": 1.1503, "step": 4153 }, { - "epoch": 0.871042147200671, - "grad_norm": 5.815692865868926, - "learning_rate": 1.665173164129054e-05, - "loss": 1.6452, + "epoch": 0.5865574696413443, + "grad_norm": 3.8801330679186115, + "learning_rate": 1.859759553404004e-05, + "loss": 0.9689, "step": 4154 }, { - "epoch": 0.8712518347661984, - "grad_norm": 6.616894657237418, - "learning_rate": 1.665004105282196e-05, - "loss": 1.7552, + "epoch": 0.5866986726913301, + "grad_norm": 3.9749590759581923, + "learning_rate": 1.859681689747793e-05, + "loss": 1.1224, "step": 4155 }, { - "epoch": 0.8714615223317257, - "grad_norm": 5.943510227564201, - "learning_rate": 1.664835012352724e-05, - "loss": 1.658, + "epoch": 0.586839875741316, + "grad_norm": 3.897480694732494, + "learning_rate": 1.859603806112951e-05, + "loss": 1.2696, "step": 4156 }, { - "epoch": 0.8716712098972531, - "grad_norm": 6.728526701869622, - "learning_rate": 1.664665885349305e-05, - "loss": 1.6258, + "epoch": 0.5869810787913019, + "grad_norm": 3.364759248885293, + "learning_rate": 1.859525902501288e-05, + "loss": 0.9327, "step": 4157 }, { - "epoch": 0.8718808974627804, - "grad_norm": 6.927190531965406, - "learning_rate": 1.664496724280606e-05, - "loss": 2.2536, + "epoch": 0.5871222818412878, + "grad_norm": 3.8458697977703795, + "learning_rate": 1.859447978914614e-05, + "loss": 1.2428, "step": 4158 }, { - "epoch": 0.8720905850283078, - "grad_norm": 6.256615811841955, - "learning_rate": 1.6643275291552978e-05, - "loss": 1.7736, + "epoch": 0.5872634848912737, + "grad_norm": 4.4123686120616235, + "learning_rate": 1.85937003535474e-05, + "loss": 1.1181, "step": 4159 }, { - "epoch": 0.8723002725938351, - "grad_norm": 7.404477606781132, - "learning_rate": 1.6641582999820514e-05, - "loss": 1.8992, + "epoch": 0.5874046879412596, + "grad_norm": 4.076397130983413, + "learning_rate": 1.8592920718234775e-05, + "loss": 1.0772, "step": 4160 }, { - "epoch": 0.8725099601593626, - "grad_norm": 6.600040278912407, - "learning_rate": 1.66398903676954e-05, - "loss": 1.4958, + "epoch": 0.5875458909912454, + "grad_norm": 3.8956444640232397, + "learning_rate": 1.8592140883226384e-05, + "loss": 1.1652, "step": 4161 }, { - "epoch": 0.87271964772489, - "grad_norm": 6.533917000318811, - "learning_rate": 1.663819739526439e-05, - "loss": 1.6696, + "epoch": 0.5876870940412313, + "grad_norm": 3.4993230281651857, + "learning_rate": 1.859136084854035e-05, + "loss": 1.1086, "step": 4162 }, { - "epoch": 0.8729293352904173, - "grad_norm": 6.33142490413681, - "learning_rate": 1.6636504082614247e-05, - "loss": 1.9349, + "epoch": 0.5878282970912172, + "grad_norm": 3.2460513650642127, + "learning_rate": 1.85905806141948e-05, + "loss": 0.8666, "step": 4163 }, { - "epoch": 0.8731390228559447, - "grad_norm": 5.912891503656744, - "learning_rate": 1.6634810429831764e-05, - "loss": 1.5889, + "epoch": 0.5879695001412031, + "grad_norm": 3.59038934578245, + "learning_rate": 1.8589800180207865e-05, + "loss": 1.0769, "step": 4164 }, { - "epoch": 0.873348710421472, - "grad_norm": 5.8095362101669865, - "learning_rate": 1.6633116437003737e-05, - "loss": 1.9217, + "epoch": 0.588110703191189, + "grad_norm": 4.346430640078812, + "learning_rate": 1.8589019546597686e-05, + "loss": 1.2488, "step": 4165 }, { - "epoch": 0.8735583979869994, - "grad_norm": 6.691066716627281, - "learning_rate": 1.663142210421699e-05, - "loss": 1.7573, + "epoch": 0.5882519062411748, + "grad_norm": 4.067375155288011, + "learning_rate": 1.85882387133824e-05, + "loss": 1.1368, "step": 4166 }, { - "epoch": 0.8737680855525267, - "grad_norm": 6.795445764657608, - "learning_rate": 1.6629727431558355e-05, - "loss": 1.9464, + "epoch": 0.5883931092911607, + "grad_norm": 3.2877716655822526, + "learning_rate": 1.858745768058016e-05, + "loss": 0.804, "step": 4167 }, { - "epoch": 0.8739777731180541, - "grad_norm": 7.396434034826449, - "learning_rate": 1.662803241911469e-05, - "loss": 1.4333, + "epoch": 0.5885343123411466, + "grad_norm": 3.4526663372858306, + "learning_rate": 1.8586676448209107e-05, + "loss": 1.2041, "step": 4168 }, { - "epoch": 0.8741874606835814, - "grad_norm": 6.144683077864973, - "learning_rate": 1.662633706697287e-05, - "loss": 1.8239, + "epoch": 0.5886755153911325, + "grad_norm": 3.579941098848215, + "learning_rate": 1.8585895016287404e-05, + "loss": 1.0065, "step": 4169 }, { - "epoch": 0.8743971482491089, - "grad_norm": 5.607598988224902, - "learning_rate": 1.6624641375219782e-05, - "loss": 1.5463, + "epoch": 0.5888167184411184, + "grad_norm": 4.2726062913530045, + "learning_rate": 1.8585113384833206e-05, + "loss": 1.0268, "step": 4170 }, { - "epoch": 0.8746068358146362, - "grad_norm": 6.436820754770758, - "learning_rate": 1.6622945343942336e-05, - "loss": 1.7603, + "epoch": 0.5889579214911042, + "grad_norm": 3.1946690958879755, + "learning_rate": 1.8584331553864686e-05, + "loss": 0.8639, "step": 4171 }, { - "epoch": 0.8748165233801636, - "grad_norm": 6.193795991470847, - "learning_rate": 1.6621248973227454e-05, - "loss": 1.5974, + "epoch": 0.58909912454109, + "grad_norm": 3.7377133572636487, + "learning_rate": 1.85835495234e-05, + "loss": 1.0931, "step": 4172 }, { - "epoch": 0.8750262109456909, - "grad_norm": 6.503986743050873, - "learning_rate": 1.6619552263162076e-05, - "loss": 1.9723, + "epoch": 0.5892403275910759, + "grad_norm": 4.032285210083998, + "learning_rate": 1.8582767293457337e-05, + "loss": 1.2589, "step": 4173 }, { - "epoch": 0.8752358985112183, - "grad_norm": 8.326753326907651, - "learning_rate": 1.6617855213833162e-05, - "loss": 1.9234, + "epoch": 0.5893815306410618, + "grad_norm": 3.740966082866538, + "learning_rate": 1.8581984864054866e-05, + "loss": 0.9042, "step": 4174 }, { - "epoch": 0.8754455860767456, - "grad_norm": 6.146816444851391, - "learning_rate": 1.661615782532769e-05, - "loss": 1.7026, + "epoch": 0.5895227336910477, + "grad_norm": 3.295802433210191, + "learning_rate": 1.8581202235210773e-05, + "loss": 1.041, "step": 4175 }, { - "epoch": 0.875655273642273, - "grad_norm": 6.895539325954905, - "learning_rate": 1.661446009773266e-05, - "loss": 1.6319, + "epoch": 0.5896639367410336, + "grad_norm": 3.414815003774926, + "learning_rate": 1.858041940694325e-05, + "loss": 1.025, "step": 4176 }, { - "epoch": 0.8758649612078003, - "grad_norm": 6.481939533611673, - "learning_rate": 1.6612762031135075e-05, - "loss": 1.953, + "epoch": 0.5898051397910195, + "grad_norm": 4.1443396562225, + "learning_rate": 1.857963637927048e-05, + "loss": 1.2908, "step": 4177 }, { - "epoch": 0.8760746487733277, - "grad_norm": 6.156340351809294, - "learning_rate": 1.6611063625621963e-05, - "loss": 1.3428, + "epoch": 0.5899463428410053, + "grad_norm": 3.7999804497848286, + "learning_rate": 1.8578853152210666e-05, + "loss": 1.1903, "step": 4178 }, { - "epoch": 0.876284336338855, - "grad_norm": 6.244744926077012, - "learning_rate": 1.6609364881280375e-05, - "loss": 1.4629, + "epoch": 0.5900875458909912, + "grad_norm": 3.363521081523304, + "learning_rate": 1.8578069725782013e-05, + "loss": 0.8813, "step": 4179 }, { - "epoch": 0.8764940239043825, - "grad_norm": 6.177259750176226, - "learning_rate": 1.6607665798197376e-05, - "loss": 1.4052, + "epoch": 0.5902287489409771, + "grad_norm": 3.7934736403695335, + "learning_rate": 1.8577286100002723e-05, + "loss": 1.2552, "step": 4180 }, { - "epoch": 0.8767037114699099, - "grad_norm": 6.388589274637202, - "learning_rate": 1.6605966376460046e-05, - "loss": 1.8215, + "epoch": 0.590369951990963, + "grad_norm": 3.4828122939862243, + "learning_rate": 1.8576502274891007e-05, + "loss": 1.0573, "step": 4181 }, { - "epoch": 0.8769133990354372, - "grad_norm": 6.286666901060453, - "learning_rate": 1.6604266616155476e-05, - "loss": 1.607, + "epoch": 0.5905111550409489, + "grad_norm": 4.175525563806178, + "learning_rate": 1.8575718250465083e-05, + "loss": 1.2271, "step": 4182 }, { - "epoch": 0.8771230866009646, - "grad_norm": 6.310579091458862, - "learning_rate": 1.6602566517370793e-05, - "loss": 1.5558, + "epoch": 0.5906523580909347, + "grad_norm": 3.5786096602089206, + "learning_rate": 1.8574934026743168e-05, + "loss": 0.8729, "step": 4183 }, { - "epoch": 0.8773327741664919, - "grad_norm": 6.313980374218684, - "learning_rate": 1.660086608019312e-05, - "loss": 1.8328, + "epoch": 0.5907935611409206, + "grad_norm": 4.089116699523889, + "learning_rate": 1.8574149603743495e-05, + "loss": 1.176, "step": 4184 }, { - "epoch": 0.8775424617320193, - "grad_norm": 6.595329085417071, - "learning_rate": 1.6599165304709616e-05, - "loss": 1.6355, + "epoch": 0.5909347641909065, + "grad_norm": 3.8663664284317125, + "learning_rate": 1.8573364981484285e-05, + "loss": 1.0262, "step": 4185 }, { - "epoch": 0.8777521492975466, - "grad_norm": 7.286760683543323, - "learning_rate": 1.659746419100744e-05, - "loss": 1.6846, + "epoch": 0.5910759672408924, + "grad_norm": 3.741639408603783, + "learning_rate": 1.8572580159983772e-05, + "loss": 1.1781, "step": 4186 }, { - "epoch": 0.877961836863074, - "grad_norm": 6.035817162734676, - "learning_rate": 1.6595762739173785e-05, - "loss": 1.7408, + "epoch": 0.5912171702908783, + "grad_norm": 4.074612844350278, + "learning_rate": 1.8571795139260206e-05, + "loss": 1.2962, "step": 4187 }, { - "epoch": 0.8781715244286014, - "grad_norm": 5.5316037705403724, - "learning_rate": 1.659406094929585e-05, - "loss": 1.4016, + "epoch": 0.5913583733408642, + "grad_norm": 3.385148177459225, + "learning_rate": 1.8571009919331817e-05, + "loss": 1.1209, "step": 4188 }, { - "epoch": 0.8783812119941288, - "grad_norm": 5.674474812781397, - "learning_rate": 1.6592358821460856e-05, - "loss": 1.4591, + "epoch": 0.59149957639085, + "grad_norm": 3.699640781667295, + "learning_rate": 1.857022450021686e-05, + "loss": 1.1956, "step": 4189 }, { - "epoch": 0.8785908995596561, - "grad_norm": 8.007116948368038, - "learning_rate": 1.6590656355756033e-05, - "loss": 2.2284, + "epoch": 0.5916407794408359, + "grad_norm": 3.4254288626768834, + "learning_rate": 1.8569438881933587e-05, + "loss": 0.8202, "step": 4190 }, { - "epoch": 0.8788005871251835, - "grad_norm": 6.299430611650503, - "learning_rate": 1.6588953552268644e-05, - "loss": 1.792, + "epoch": 0.5917819824908218, + "grad_norm": 3.1517625023060694, + "learning_rate": 1.8568653064500257e-05, + "loss": 0.8292, "step": 4191 }, { - "epoch": 0.8790102746907108, - "grad_norm": 6.975303432966199, - "learning_rate": 1.6587250411085956e-05, - "loss": 1.7971, + "epoch": 0.5919231855408077, + "grad_norm": 3.5690309824726576, + "learning_rate": 1.8567867047935128e-05, + "loss": 0.978, "step": 4192 }, { - "epoch": 0.8792199622562382, - "grad_norm": 5.836448985337462, - "learning_rate": 1.658554693229526e-05, - "loss": 1.4385, + "epoch": 0.5920643885907936, + "grad_norm": 4.598491553393195, + "learning_rate": 1.8567080832256467e-05, + "loss": 1.1733, "step": 4193 }, { - "epoch": 0.8794296498217655, - "grad_norm": 5.998244069465465, - "learning_rate": 1.6583843115983862e-05, - "loss": 1.7741, + "epoch": 0.5922055916407795, + "grad_norm": 3.5887122414354784, + "learning_rate": 1.8566294417482552e-05, + "loss": 1.0617, "step": 4194 }, { - "epoch": 0.8796393373872929, - "grad_norm": 5.695771881860834, - "learning_rate": 1.6582138962239087e-05, - "loss": 1.6615, + "epoch": 0.5923467946907653, + "grad_norm": 3.419623106137663, + "learning_rate": 1.8565507803631653e-05, + "loss": 0.9393, "step": 4195 }, { - "epoch": 0.8798490249528202, - "grad_norm": 6.657903649702837, - "learning_rate": 1.658043447114827e-05, - "loss": 1.731, + "epoch": 0.5924879977407512, + "grad_norm": 3.8740404633552097, + "learning_rate": 1.8564720990722048e-05, + "loss": 0.9876, "step": 4196 }, { - "epoch": 0.8800587125183477, - "grad_norm": 6.231902850585657, - "learning_rate": 1.6578729642798776e-05, - "loss": 1.7567, + "epoch": 0.5926292007907371, + "grad_norm": 3.5426007129037513, + "learning_rate": 1.856393397877203e-05, + "loss": 1.1348, "step": 4197 }, { - "epoch": 0.8802684000838751, - "grad_norm": 6.241851435191944, - "learning_rate": 1.657702447727798e-05, - "loss": 2.1111, + "epoch": 0.592770403840723, + "grad_norm": 3.807543033676919, + "learning_rate": 1.8563146767799884e-05, + "loss": 0.9695, "step": 4198 }, { - "epoch": 0.8804780876494024, - "grad_norm": 6.194869427874186, - "learning_rate": 1.657531897467327e-05, - "loss": 1.8413, + "epoch": 0.5929116068907089, + "grad_norm": 4.016723350460209, + "learning_rate": 1.8562359357823907e-05, + "loss": 0.9035, "step": 4199 }, { - "epoch": 0.8806877752149298, - "grad_norm": 8.090924345094702, - "learning_rate": 1.6573613135072054e-05, - "loss": 2.0217, + "epoch": 0.5930528099406948, + "grad_norm": 3.562927018738239, + "learning_rate": 1.8561571748862394e-05, + "loss": 1.1548, "step": 4200 }, { - "epoch": 0.8808974627804571, - "grad_norm": 7.145242538444357, - "learning_rate": 1.657190695856177e-05, - "loss": 1.6433, + "epoch": 0.5931940129906806, + "grad_norm": 3.3446241065301168, + "learning_rate": 1.8560783940933655e-05, + "loss": 0.8988, "step": 4201 }, { - "epoch": 0.8811071503459845, - "grad_norm": 6.783958492129735, - "learning_rate": 1.657020044522985e-05, - "loss": 1.8909, + "epoch": 0.5933352160406665, + "grad_norm": 3.8292064786297253, + "learning_rate": 1.855999593405599e-05, + "loss": 1.1284, "step": 4202 }, { - "epoch": 0.8813168379115118, - "grad_norm": 7.084025728111844, - "learning_rate": 1.6568493595163767e-05, - "loss": 1.8949, + "epoch": 0.5934764190906524, + "grad_norm": 3.561859513307444, + "learning_rate": 1.8559207728247716e-05, + "loss": 0.9537, "step": 4203 }, { - "epoch": 0.8815265254770392, - "grad_norm": 5.511609751078453, - "learning_rate": 1.6566786408450994e-05, - "loss": 1.6822, + "epoch": 0.5936176221406383, + "grad_norm": 3.518081777293403, + "learning_rate": 1.8558419323527152e-05, + "loss": 1.1501, "step": 4204 }, { - "epoch": 0.8817362130425666, - "grad_norm": 5.861108809171588, - "learning_rate": 1.6565078885179023e-05, - "loss": 1.6471, + "epoch": 0.5937588251906241, + "grad_norm": 3.501337791651096, + "learning_rate": 1.855763071991262e-05, + "loss": 0.8717, "step": 4205 }, { - "epoch": 0.881945900608094, - "grad_norm": 5.9138373410186, - "learning_rate": 1.6563371025435378e-05, - "loss": 1.8203, + "epoch": 0.59390002824061, + "grad_norm": 4.265705061661282, + "learning_rate": 1.8556841917422443e-05, + "loss": 1.2586, "step": 4206 }, { - "epoch": 0.8821555881736213, - "grad_norm": 7.5940866238742375, - "learning_rate": 1.6561662829307584e-05, - "loss": 1.8631, + "epoch": 0.5940412312905958, + "grad_norm": 3.9160703937076953, + "learning_rate": 1.8556052916074957e-05, + "loss": 1.0242, "step": 4207 }, { - "epoch": 0.8823652757391487, - "grad_norm": 5.678065033979423, - "learning_rate": 1.6559954296883186e-05, - "loss": 1.259, + "epoch": 0.5941824343405817, + "grad_norm": 4.049207023650699, + "learning_rate": 1.8555263715888493e-05, + "loss": 1.1262, "step": 4208 }, { - "epoch": 0.882574963304676, - "grad_norm": 6.132008966117958, - "learning_rate": 1.6558245428249755e-05, - "loss": 1.7613, + "epoch": 0.5943236373905676, + "grad_norm": 3.1461353978224307, + "learning_rate": 1.85544743168814e-05, + "loss": 0.8149, "step": 4209 }, { - "epoch": 0.8827846508702034, - "grad_norm": 6.026407606866815, - "learning_rate": 1.6556536223494873e-05, - "loss": 1.8227, + "epoch": 0.5944648404405535, + "grad_norm": 4.392253975893292, + "learning_rate": 1.8553684719072017e-05, + "loss": 1.2961, "step": 4210 }, { - "epoch": 0.8829943384357307, - "grad_norm": 6.479886305033372, - "learning_rate": 1.6554826682706134e-05, - "loss": 1.7815, + "epoch": 0.5946060434905394, + "grad_norm": 4.2499285472583495, + "learning_rate": 1.8552894922478697e-05, + "loss": 0.9916, "step": 4211 }, { - "epoch": 0.8832040260012581, - "grad_norm": 6.272687322304673, - "learning_rate": 1.6553116805971158e-05, - "loss": 1.1637, + "epoch": 0.5947472465405252, + "grad_norm": 4.094266899473215, + "learning_rate": 1.855210492711979e-05, + "loss": 1.0899, "step": 4212 }, { - "epoch": 0.8834137135667854, - "grad_norm": 7.010320419088454, - "learning_rate": 1.6551406593377586e-05, - "loss": 2.0757, + "epoch": 0.5948884495905111, + "grad_norm": 3.8097712050021024, + "learning_rate": 1.8551314733013657e-05, + "loss": 1.096, "step": 4213 }, { - "epoch": 0.8836234011323129, - "grad_norm": 5.607070268571226, - "learning_rate": 1.6549696045013056e-05, - "loss": 1.4201, + "epoch": 0.595029652640497, + "grad_norm": 3.7426982491451244, + "learning_rate": 1.8550524340178664e-05, + "loss": 1.0881, "step": 4214 }, { - "epoch": 0.8838330886978402, - "grad_norm": 6.465862970307962, - "learning_rate": 1.6547985160965246e-05, - "loss": 1.7698, + "epoch": 0.5951708556904829, + "grad_norm": 3.7159524848947045, + "learning_rate": 1.8549733748633177e-05, + "loss": 1.1563, "step": 4215 }, { - "epoch": 0.8840427762633676, - "grad_norm": 8.046636789694077, - "learning_rate": 1.6546273941321843e-05, - "loss": 1.7464, + "epoch": 0.5953120587404688, + "grad_norm": 3.628120590810224, + "learning_rate": 1.8548942958395573e-05, + "loss": 0.9457, "step": 4216 }, { - "epoch": 0.884252463828895, - "grad_norm": 6.871983865045377, - "learning_rate": 1.6544562386170544e-05, - "loss": 1.8556, + "epoch": 0.5954532617904547, + "grad_norm": 3.9646476483645956, + "learning_rate": 1.8548151969484224e-05, + "loss": 1.0168, "step": 4217 }, { - "epoch": 0.8844621513944223, - "grad_norm": 7.02663216876567, - "learning_rate": 1.6542850495599073e-05, - "loss": 1.7666, + "epoch": 0.5955944648404405, + "grad_norm": 3.976566449011886, + "learning_rate": 1.8547360781917515e-05, + "loss": 1.2408, "step": 4218 }, { - "epoch": 0.8846718389599497, - "grad_norm": 6.364714001485779, - "learning_rate": 1.6541138269695166e-05, - "loss": 1.7579, + "epoch": 0.5957356678904264, + "grad_norm": 3.558609644510249, + "learning_rate": 1.8546569395713835e-05, + "loss": 0.9833, "step": 4219 }, { - "epoch": 0.884881526525477, - "grad_norm": 5.744510311875562, - "learning_rate": 1.653942570854658e-05, - "loss": 1.5383, + "epoch": 0.5958768709404123, + "grad_norm": 3.395662811036489, + "learning_rate": 1.854577781089157e-05, + "loss": 1.0397, "step": 4220 }, { - "epoch": 0.8850912140910044, - "grad_norm": 6.202910582922797, - "learning_rate": 1.6537712812241078e-05, - "loss": 1.7455, + "epoch": 0.5960180739903982, + "grad_norm": 3.6616115074976676, + "learning_rate": 1.854498602746912e-05, + "loss": 0.9314, "step": 4221 }, { - "epoch": 0.8853009016565317, - "grad_norm": 6.274524909498673, - "learning_rate": 1.653599958086646e-05, - "loss": 1.503, + "epoch": 0.5961592770403841, + "grad_norm": 3.769337826716327, + "learning_rate": 1.8544194045464888e-05, + "loss": 1.1068, "step": 4222 }, { - "epoch": 0.8855105892220592, - "grad_norm": 8.240448855122168, - "learning_rate": 1.6534286014510525e-05, - "loss": 1.6262, + "epoch": 0.59630048009037, + "grad_norm": 3.9955321243283195, + "learning_rate": 1.8543401864897274e-05, + "loss": 1.0406, "step": 4223 }, { - "epoch": 0.8857202767875865, - "grad_norm": 5.3901477754515605, - "learning_rate": 1.65325721132611e-05, - "loss": 1.6222, + "epoch": 0.5964416831403558, + "grad_norm": 4.216055356561233, + "learning_rate": 1.854260948578469e-05, + "loss": 1.275, "step": 4224 }, { - "epoch": 0.8859299643531139, - "grad_norm": 5.496054191013193, - "learning_rate": 1.6530857877206023e-05, - "loss": 1.8457, + "epoch": 0.5965828861903417, + "grad_norm": 4.231304418914608, + "learning_rate": 1.854181690814555e-05, + "loss": 1.182, "step": 4225 }, { - "epoch": 0.8861396519186412, - "grad_norm": 6.721973920138175, - "learning_rate": 1.6529143306433154e-05, - "loss": 1.8193, + "epoch": 0.5967240892403276, + "grad_norm": 3.7619344944707747, + "learning_rate": 1.8541024131998277e-05, + "loss": 1.1379, "step": 4226 }, { - "epoch": 0.8863493394841686, - "grad_norm": 6.789185424653191, - "learning_rate": 1.6527428401030365e-05, - "loss": 1.9454, + "epoch": 0.5968652922903135, + "grad_norm": 3.5626609559296023, + "learning_rate": 1.854023115736129e-05, + "loss": 1.0559, "step": 4227 }, { - "epoch": 0.8865590270496959, - "grad_norm": 7.414278783949691, - "learning_rate": 1.652571316108555e-05, - "loss": 2.1373, + "epoch": 0.5970064953402994, + "grad_norm": 3.2219965946883273, + "learning_rate": 1.853943798425302e-05, + "loss": 0.8866, "step": 4228 }, { - "epoch": 0.8867687146152233, - "grad_norm": 6.785899924502074, - "learning_rate": 1.6523997586686615e-05, - "loss": 1.7761, + "epoch": 0.5971476983902853, + "grad_norm": 3.996811555249157, + "learning_rate": 1.85386446126919e-05, + "loss": 1.1395, "step": 4229 }, { - "epoch": 0.8869784021807506, - "grad_norm": 7.069587241695219, - "learning_rate": 1.6522281677921492e-05, - "loss": 1.9637, + "epoch": 0.5972889014402711, + "grad_norm": 3.5233814852189247, + "learning_rate": 1.8537851042696363e-05, + "loss": 1.0329, "step": 4230 }, { - "epoch": 0.887188089746278, - "grad_norm": 6.102843097255973, - "learning_rate": 1.6520565434878116e-05, - "loss": 1.6843, + "epoch": 0.597430104490257, + "grad_norm": 3.443132049818685, + "learning_rate": 1.853705727428486e-05, + "loss": 1.2396, "step": 4231 }, { - "epoch": 0.8873977773118054, - "grad_norm": 6.775947982101723, - "learning_rate": 1.6518848857644457e-05, - "loss": 1.9318, + "epoch": 0.5975713075402429, + "grad_norm": 3.5988634046430628, + "learning_rate": 1.8536263307475826e-05, + "loss": 1.088, "step": 4232 }, { - "epoch": 0.8876074648773328, - "grad_norm": 6.376389146725458, - "learning_rate": 1.6517131946308485e-05, - "loss": 1.6781, + "epoch": 0.5977125105902288, + "grad_norm": 3.765297979851038, + "learning_rate": 1.8535469142287723e-05, + "loss": 0.9556, "step": 4233 }, { - "epoch": 0.8878171524428602, - "grad_norm": 6.307086498055612, - "learning_rate": 1.65154147009582e-05, - "loss": 1.5653, + "epoch": 0.5978537136402147, + "grad_norm": 3.3080528605880883, + "learning_rate": 1.8534674778739004e-05, + "loss": 0.9631, "step": 4234 }, { - "epoch": 0.8880268400083875, - "grad_norm": 6.903887930849312, - "learning_rate": 1.6513697121681608e-05, - "loss": 1.8538, + "epoch": 0.5979949166902006, + "grad_norm": 4.460455095885312, + "learning_rate": 1.8533880216848126e-05, + "loss": 1.3145, "step": 4235 }, { - "epoch": 0.8882365275739149, - "grad_norm": 6.391011103727617, - "learning_rate": 1.6511979208566745e-05, - "loss": 1.8071, + "epoch": 0.5981361197401864, + "grad_norm": 3.9652244607295044, + "learning_rate": 1.853308545663356e-05, + "loss": 1.2007, "step": 4236 }, { - "epoch": 0.8884462151394422, - "grad_norm": 5.746264749818297, - "learning_rate": 1.6510260961701646e-05, - "loss": 1.9037, + "epoch": 0.5982773227901723, + "grad_norm": 4.05360503758188, + "learning_rate": 1.8532290498113776e-05, + "loss": 1.1863, "step": 4237 }, { - "epoch": 0.8886559027049696, - "grad_norm": 6.784776582153941, - "learning_rate": 1.6508542381174385e-05, - "loss": 2.0323, + "epoch": 0.5984185258401582, + "grad_norm": 4.4161962522304945, + "learning_rate": 1.853149534130724e-05, + "loss": 1.271, "step": 4238 }, { - "epoch": 0.8888655902704969, - "grad_norm": 7.3612835810126604, - "learning_rate": 1.650682346707304e-05, - "loss": 1.9431, + "epoch": 0.598559728890144, + "grad_norm": 3.683654874400558, + "learning_rate": 1.853069998623244e-05, + "loss": 1.1423, "step": 4239 }, { - "epoch": 0.8890752778360244, - "grad_norm": 6.504269313988687, - "learning_rate": 1.6505104219485705e-05, - "loss": 1.918, + "epoch": 0.5987009319401299, + "grad_norm": 4.193599015810658, + "learning_rate": 1.8529904432907858e-05, + "loss": 1.0244, "step": 4240 }, { - "epoch": 0.8892849654015517, - "grad_norm": 5.167449973790571, - "learning_rate": 1.65033846385005e-05, - "loss": 1.4456, + "epoch": 0.5988421349901157, + "grad_norm": 3.6488400158111562, + "learning_rate": 1.852910868135198e-05, + "loss": 1.1112, "step": 4241 }, { - "epoch": 0.8894946529670791, - "grad_norm": 6.397935277898472, - "learning_rate": 1.650166472420555e-05, - "loss": 1.6406, + "epoch": 0.5989833380401016, + "grad_norm": 3.5124736225468487, + "learning_rate": 1.8528312731583295e-05, + "loss": 0.838, "step": 4242 }, { - "epoch": 0.8897043405326064, - "grad_norm": 6.162533108459571, - "learning_rate": 1.6499944476689006e-05, - "loss": 1.7019, + "epoch": 0.5991245410900875, + "grad_norm": 3.5154409822658867, + "learning_rate": 1.852751658362031e-05, + "loss": 0.8923, "step": 4243 }, { - "epoch": 0.8899140280981338, - "grad_norm": 7.077215973426679, - "learning_rate": 1.6498223896039035e-05, - "loss": 1.4343, + "epoch": 0.5992657441400734, + "grad_norm": 3.6414373337053596, + "learning_rate": 1.8526720237481518e-05, + "loss": 0.8413, "step": 4244 }, { - "epoch": 0.8901237156636611, - "grad_norm": 6.474706000905946, - "learning_rate": 1.6496502982343817e-05, - "loss": 1.5677, + "epoch": 0.5994069471900593, + "grad_norm": 3.7847290395824844, + "learning_rate": 1.8525923693185436e-05, + "loss": 1.1494, "step": 4245 }, { - "epoch": 0.8903334032291885, - "grad_norm": 6.030284493007642, - "learning_rate": 1.6494781735691555e-05, - "loss": 1.6542, + "epoch": 0.5995481502400452, + "grad_norm": 3.190683007903166, + "learning_rate": 1.8525126950750564e-05, + "loss": 0.8344, "step": 4246 }, { - "epoch": 0.8905430907947158, - "grad_norm": 7.082140868043991, - "learning_rate": 1.6493060156170463e-05, - "loss": 1.8232, + "epoch": 0.599689353290031, + "grad_norm": 3.556293311070704, + "learning_rate": 1.8524330010195427e-05, + "loss": 1.1617, "step": 4247 }, { - "epoch": 0.8907527783602432, - "grad_norm": 6.145336002177126, - "learning_rate": 1.649133824386878e-05, - "loss": 1.5695, + "epoch": 0.5998305563400169, + "grad_norm": 3.177177563934956, + "learning_rate": 1.852353287153854e-05, + "loss": 0.8884, "step": 4248 }, { - "epoch": 0.8909624659257706, - "grad_norm": 6.770046406997616, - "learning_rate": 1.648961599887475e-05, - "loss": 1.518, + "epoch": 0.5999717593900028, + "grad_norm": 4.4153313057268555, + "learning_rate": 1.852273553479843e-05, + "loss": 1.1623, "step": 4249 }, { - "epoch": 0.891172153491298, - "grad_norm": 6.376799723545786, - "learning_rate": 1.6487893421276643e-05, - "loss": 1.6271, + "epoch": 0.6001129624399887, + "grad_norm": 5.493923319127232, + "learning_rate": 1.8521937999993627e-05, + "loss": 1.2422, "step": 4250 }, { - "epoch": 0.8913818410568253, - "grad_norm": 7.292859050577482, - "learning_rate": 1.648617051116275e-05, - "loss": 1.7005, + "epoch": 0.6002541654899746, + "grad_norm": 3.7266683297269165, + "learning_rate": 1.852114026714267e-05, + "loss": 1.1787, "step": 4251 }, { - "epoch": 0.8915915286223527, - "grad_norm": 6.668519049889091, - "learning_rate": 1.6484447268621364e-05, - "loss": 1.5505, + "epoch": 0.6003953685399605, + "grad_norm": 4.218267163153965, + "learning_rate": 1.8520342336264082e-05, + "loss": 1.1242, "step": 4252 }, { - "epoch": 0.8918012161878801, - "grad_norm": 7.278232045203354, - "learning_rate": 1.648272369374081e-05, - "loss": 1.816, + "epoch": 0.6005365715899463, + "grad_norm": 3.6243595320272783, + "learning_rate": 1.851954420737643e-05, + "loss": 1.0289, "step": 4253 }, { - "epoch": 0.8920109037534074, - "grad_norm": 7.177546690738252, - "learning_rate": 1.6480999786609423e-05, - "loss": 1.8321, + "epoch": 0.6006777746399322, + "grad_norm": 4.289339920288096, + "learning_rate": 1.8518745880498242e-05, + "loss": 1.1592, "step": 4254 }, { - "epoch": 0.8922205913189348, - "grad_norm": 7.95248473626415, - "learning_rate": 1.6479275547315558e-05, - "loss": 1.5383, + "epoch": 0.6008189776899181, + "grad_norm": 3.7300587142374866, + "learning_rate": 1.8517947355648084e-05, + "loss": 0.9113, "step": 4255 }, { - "epoch": 0.8924302788844621, - "grad_norm": 6.952452889383223, - "learning_rate": 1.6477550975947584e-05, - "loss": 1.4688, + "epoch": 0.600960180739904, + "grad_norm": 3.8710212539386974, + "learning_rate": 1.8517148632844502e-05, + "loss": 1.0612, "step": 4256 }, { - "epoch": 0.8926399664499896, - "grad_norm": 6.254976002686117, - "learning_rate": 1.6475826072593885e-05, - "loss": 1.5773, + "epoch": 0.6011013837898899, + "grad_norm": 3.3473262963679593, + "learning_rate": 1.851634971210607e-05, + "loss": 1.0003, "step": 4257 }, { - "epoch": 0.8928496540155169, - "grad_norm": 6.057883195296067, - "learning_rate": 1.647410083734287e-05, - "loss": 1.5676, + "epoch": 0.6012425868398757, + "grad_norm": 3.5941393630773066, + "learning_rate": 1.8515550593451347e-05, + "loss": 0.9572, "step": 4258 }, { - "epoch": 0.8930593415810443, - "grad_norm": 6.095931765122487, - "learning_rate": 1.647237527028296e-05, - "loss": 1.5402, + "epoch": 0.6013837898898616, + "grad_norm": 3.7935664549859673, + "learning_rate": 1.8514751276898903e-05, + "loss": 1.0241, "step": 4259 }, { - "epoch": 0.8932690291465716, - "grad_norm": 8.033310954860912, - "learning_rate": 1.6470649371502588e-05, - "loss": 1.8677, + "epoch": 0.6015249929398475, + "grad_norm": 4.366609812852916, + "learning_rate": 1.8513951762467318e-05, + "loss": 0.9878, "step": 4260 }, { - "epoch": 0.893478716712099, - "grad_norm": 6.165255631743311, - "learning_rate": 1.6468923141090214e-05, - "loss": 1.4886, + "epoch": 0.6016661959898334, + "grad_norm": 3.5354635410452815, + "learning_rate": 1.851315205017517e-05, + "loss": 1.0355, "step": 4261 }, { - "epoch": 0.8936884042776263, - "grad_norm": 7.286344461469252, - "learning_rate": 1.646719657913431e-05, - "loss": 1.9981, + "epoch": 0.6018073990398193, + "grad_norm": 3.200996948803274, + "learning_rate": 1.851235214004105e-05, + "loss": 1.1244, "step": 4262 }, { - "epoch": 0.8938980918431537, - "grad_norm": 7.196530703907088, - "learning_rate": 1.6465469685723365e-05, - "loss": 1.9035, + "epoch": 0.6019486020898052, + "grad_norm": 2.9081502393929854, + "learning_rate": 1.8511552032083534e-05, + "loss": 0.8222, "step": 4263 }, { - "epoch": 0.894107779408681, - "grad_norm": 7.4650745346189895, - "learning_rate": 1.646374246094588e-05, - "loss": 1.9181, + "epoch": 0.602089805139791, + "grad_norm": 3.4199189479816603, + "learning_rate": 1.8510751726321233e-05, + "loss": 1.1355, "step": 4264 }, { - "epoch": 0.8943174669742084, - "grad_norm": 7.725337489712534, - "learning_rate": 1.6462014904890387e-05, - "loss": 1.8381, + "epoch": 0.6022310081897769, + "grad_norm": 4.767391961866176, + "learning_rate": 1.850995122277273e-05, + "loss": 1.2989, "step": 4265 }, { - "epoch": 0.8945271545397357, - "grad_norm": 5.39072190055967, - "learning_rate": 1.6460287017645424e-05, - "loss": 1.4073, + "epoch": 0.6023722112397628, + "grad_norm": 3.929014871784442, + "learning_rate": 1.850915052145664e-05, + "loss": 1.1773, "step": 4266 }, { - "epoch": 0.8947368421052632, - "grad_norm": 6.030636488298617, - "learning_rate": 1.6458558799299544e-05, - "loss": 1.7902, + "epoch": 0.6025134142897487, + "grad_norm": 3.6256071754915244, + "learning_rate": 1.8508349622391567e-05, + "loss": 1.0806, "step": 4267 }, { - "epoch": 0.8949465296707905, - "grad_norm": 5.771952500052789, - "learning_rate": 1.6456830249941327e-05, - "loss": 1.5278, + "epoch": 0.6026546173397346, + "grad_norm": 4.151512387625766, + "learning_rate": 1.850754852559612e-05, + "loss": 1.298, "step": 4268 }, { - "epoch": 0.8951562172363179, - "grad_norm": 8.109291201178733, - "learning_rate": 1.6455101369659355e-05, - "loss": 2.0938, + "epoch": 0.6027958203897205, + "grad_norm": 3.9615881296394737, + "learning_rate": 1.8506747231088927e-05, + "loss": 1.0244, "step": 4269 }, { - "epoch": 0.8953659048018453, - "grad_norm": 6.619845440592631, - "learning_rate": 1.6453372158542248e-05, - "loss": 1.7231, + "epoch": 0.6029370234397063, + "grad_norm": 3.095863356698826, + "learning_rate": 1.8505945738888593e-05, + "loss": 0.9552, "step": 4270 }, { - "epoch": 0.8955755923673726, - "grad_norm": 7.555751792139053, - "learning_rate": 1.645164261667862e-05, - "loss": 1.6357, + "epoch": 0.6030782264896922, + "grad_norm": 4.167119209725287, + "learning_rate": 1.850514404901376e-05, + "loss": 1.2741, "step": 4271 }, { - "epoch": 0.8957852799329, - "grad_norm": 6.473899216348755, - "learning_rate": 1.6449912744157122e-05, - "loss": 1.6218, + "epoch": 0.6032194295396781, + "grad_norm": 3.845709141474949, + "learning_rate": 1.850434216148305e-05, + "loss": 0.9657, "step": 4272 }, { - "epoch": 0.8959949674984273, - "grad_norm": 8.22273344946025, - "learning_rate": 1.6448182541066405e-05, - "loss": 1.8389, + "epoch": 0.603360632589664, + "grad_norm": 3.233359135675125, + "learning_rate": 1.85035400763151e-05, + "loss": 0.8752, "step": 4273 }, { - "epoch": 0.8962046550639547, - "grad_norm": 6.256457764211807, - "learning_rate": 1.6446452007495155e-05, - "loss": 1.6211, + "epoch": 0.6035018356396498, + "grad_norm": 3.677522602664705, + "learning_rate": 1.8502737793528552e-05, + "loss": 1.1389, "step": 4274 }, { - "epoch": 0.896414342629482, - "grad_norm": 6.48400698732034, - "learning_rate": 1.6444721143532053e-05, - "loss": 1.8175, + "epoch": 0.6036430386896356, + "grad_norm": 2.8577663112988447, + "learning_rate": 1.850193531314205e-05, + "loss": 0.9664, "step": 4275 }, { - "epoch": 0.8966240301950095, - "grad_norm": 6.987088345497975, - "learning_rate": 1.6442989949265817e-05, - "loss": 1.919, + "epoch": 0.6037842417396215, + "grad_norm": 3.8460644676733753, + "learning_rate": 1.850113263517424e-05, + "loss": 1.2241, "step": 4276 }, { - "epoch": 0.8968337177605368, - "grad_norm": 6.462687716003618, - "learning_rate": 1.6441258424785173e-05, - "loss": 1.5565, + "epoch": 0.6039254447896074, + "grad_norm": 3.779842225333164, + "learning_rate": 1.8500329759643785e-05, + "loss": 1.0771, "step": 4277 }, { - "epoch": 0.8970434053260642, - "grad_norm": 5.79865585686706, - "learning_rate": 1.643952657017886e-05, - "loss": 1.7655, + "epoch": 0.6040666478395933, + "grad_norm": 3.2686051674757364, + "learning_rate": 1.849952668656933e-05, + "loss": 0.8744, "step": 4278 }, { - "epoch": 0.8972530928915915, - "grad_norm": 6.421857247942963, - "learning_rate": 1.6437794385535642e-05, - "loss": 1.7764, + "epoch": 0.6042078508895792, + "grad_norm": 3.7801485055639312, + "learning_rate": 1.849872341596955e-05, + "loss": 1.1913, "step": 4279 }, { - "epoch": 0.8974627804571189, - "grad_norm": 7.030308663351112, - "learning_rate": 1.64360618709443e-05, - "loss": 1.8292, + "epoch": 0.6043490539395651, + "grad_norm": 3.2090358461520716, + "learning_rate": 1.849791994786311e-05, + "loss": 0.9344, "step": 4280 }, { - "epoch": 0.8976724680226462, - "grad_norm": 6.993477496168257, - "learning_rate": 1.6434329026493617e-05, - "loss": 1.9691, + "epoch": 0.604490256989551, + "grad_norm": 4.241832249359513, + "learning_rate": 1.8497116282268678e-05, + "loss": 1.1513, "step": 4281 }, { - "epoch": 0.8978821555881736, - "grad_norm": 6.635039965067503, - "learning_rate": 1.643259585227242e-05, - "loss": 1.8039, + "epoch": 0.6046314600395368, + "grad_norm": 4.255957350041253, + "learning_rate": 1.849631241920493e-05, + "loss": 1.441, "step": 4282 }, { - "epoch": 0.8980918431537009, - "grad_norm": 7.292677118914848, - "learning_rate": 1.643086234836952e-05, - "loss": 1.9571, + "epoch": 0.6047726630895227, + "grad_norm": 4.289389383411926, + "learning_rate": 1.8495508358690553e-05, + "loss": 1.2956, "step": 4283 }, { - "epoch": 0.8983015307192284, - "grad_norm": 7.020340822127908, - "learning_rate": 1.6429128514873776e-05, - "loss": 1.7692, + "epoch": 0.6049138661395086, + "grad_norm": 4.742569549176212, + "learning_rate": 1.849470410074423e-05, + "loss": 1.1518, "step": 4284 }, { - "epoch": 0.8985112182847557, - "grad_norm": 6.617333002416124, - "learning_rate": 1.6427394351874046e-05, - "loss": 1.7467, + "epoch": 0.6050550691894945, + "grad_norm": 4.05783168612351, + "learning_rate": 1.8493899645384655e-05, + "loss": 1.0151, "step": 4285 }, { - "epoch": 0.8987209058502831, - "grad_norm": 6.045944329224551, - "learning_rate": 1.6425659859459204e-05, - "loss": 1.5847, + "epoch": 0.6051962722394804, + "grad_norm": 3.7282836227336755, + "learning_rate": 1.849309499263052e-05, + "loss": 0.8935, "step": 4286 }, { - "epoch": 0.8989305934158105, - "grad_norm": 6.015326204713119, - "learning_rate": 1.6423925037718152e-05, - "loss": 1.5621, + "epoch": 0.6053374752894662, + "grad_norm": 3.9518335745039996, + "learning_rate": 1.8492290142500525e-05, + "loss": 1.4142, "step": 4287 }, { - "epoch": 0.8991402809813378, - "grad_norm": 6.608819163596873, - "learning_rate": 1.64221898867398e-05, - "loss": 1.5137, + "epoch": 0.6054786783394521, + "grad_norm": 4.660699773767614, + "learning_rate": 1.8491485095013373e-05, + "loss": 1.3165, "step": 4288 }, { - "epoch": 0.8993499685468652, - "grad_norm": 7.098223131107678, - "learning_rate": 1.6420454406613075e-05, - "loss": 1.7714, + "epoch": 0.605619881389438, + "grad_norm": 3.6905204640720464, + "learning_rate": 1.8490679850187777e-05, + "loss": 0.8193, "step": 4289 }, { - "epoch": 0.8995596561123925, - "grad_norm": 8.486568921824515, - "learning_rate": 1.641871859742693e-05, - "loss": 2.1459, + "epoch": 0.6057610844394239, + "grad_norm": 3.3390000469663157, + "learning_rate": 1.8489874408042445e-05, + "loss": 0.9842, "step": 4290 }, { - "epoch": 0.8997693436779199, - "grad_norm": 6.34059069652641, - "learning_rate": 1.641698245927032e-05, - "loss": 1.608, + "epoch": 0.6059022874894098, + "grad_norm": 4.271465888664024, + "learning_rate": 1.8489068768596102e-05, + "loss": 1.2088, "step": 4291 }, { - "epoch": 0.8999790312434472, - "grad_norm": 6.233184821251443, - "learning_rate": 1.6415245992232234e-05, - "loss": 1.4338, + "epoch": 0.6060434905393957, + "grad_norm": 3.9510854025319784, + "learning_rate": 1.8488262931867464e-05, + "loss": 0.9423, "step": 4292 }, { - "epoch": 0.9001887188089747, - "grad_norm": 7.810188675632155, - "learning_rate": 1.641350919640166e-05, - "loss": 1.7228, + "epoch": 0.6061846935893815, + "grad_norm": 3.5102377307477304, + "learning_rate": 1.848745689787526e-05, + "loss": 0.9111, "step": 4293 }, { - "epoch": 0.900398406374502, - "grad_norm": 7.736446474571807, - "learning_rate": 1.6411772071867616e-05, - "loss": 1.8537, + "epoch": 0.6063258966393674, + "grad_norm": 3.828347108219323, + "learning_rate": 1.848665066663823e-05, + "loss": 1.1261, "step": 4294 }, { - "epoch": 0.9006080939400294, - "grad_norm": 5.750950421497219, - "learning_rate": 1.6410034618719136e-05, - "loss": 1.5984, + "epoch": 0.6064670996893533, + "grad_norm": 3.1182586821432716, + "learning_rate": 1.8485844238175096e-05, + "loss": 0.8194, "step": 4295 }, { - "epoch": 0.9008177815055567, - "grad_norm": 9.687445975352974, - "learning_rate": 1.6408296837045262e-05, - "loss": 1.9094, + "epoch": 0.6066083027393392, + "grad_norm": 3.268083492131184, + "learning_rate": 1.848503761250461e-05, + "loss": 0.9082, "step": 4296 }, { - "epoch": 0.9010274690710841, - "grad_norm": 7.057526666639502, - "learning_rate": 1.640655872693506e-05, - "loss": 1.6042, + "epoch": 0.6067495057893251, + "grad_norm": 3.805365360832415, + "learning_rate": 1.8484230789645513e-05, + "loss": 0.979, "step": 4297 }, { - "epoch": 0.9012371566366114, - "grad_norm": 6.9696296304593, - "learning_rate": 1.6404820288477613e-05, - "loss": 1.4502, + "epoch": 0.606890708839311, + "grad_norm": 3.704003677401258, + "learning_rate": 1.848342376961656e-05, + "loss": 0.9663, "step": 4298 }, { - "epoch": 0.9014468442021388, - "grad_norm": 5.529859940652647, - "learning_rate": 1.6403081521762016e-05, - "loss": 1.9686, + "epoch": 0.6070319118892968, + "grad_norm": 3.5517112409648597, + "learning_rate": 1.8482616552436496e-05, + "loss": 1.0243, "step": 4299 }, { - "epoch": 0.9016565317676661, - "grad_norm": 5.7892938212460825, - "learning_rate": 1.640134242687739e-05, - "loss": 1.5735, + "epoch": 0.6071731149392827, + "grad_norm": 3.2416394725908866, + "learning_rate": 1.8481809138124092e-05, + "loss": 0.9677, "step": 4300 }, { - "epoch": 0.9018662193331936, - "grad_norm": 7.5895051966563525, - "learning_rate": 1.6399603003912855e-05, - "loss": 1.8772, + "epoch": 0.6073143179892686, + "grad_norm": 3.46163890062641, + "learning_rate": 1.8481001526698105e-05, + "loss": 1.0115, "step": 4301 }, { - "epoch": 0.9020759068987209, - "grad_norm": 7.324913500832093, - "learning_rate": 1.639786325295757e-05, - "loss": 1.8145, + "epoch": 0.6074555210392545, + "grad_norm": 4.221948263292534, + "learning_rate": 1.8480193718177305e-05, + "loss": 1.2259, "step": 4302 }, { - "epoch": 0.9022855944642483, - "grad_norm": 7.505297131292149, - "learning_rate": 1.63961231741007e-05, - "loss": 1.9549, + "epoch": 0.6075967240892404, + "grad_norm": 3.8160697871476645, + "learning_rate": 1.8479385712580464e-05, + "loss": 0.8947, "step": 4303 }, { - "epoch": 0.9024952820297756, - "grad_norm": 6.298140946984438, - "learning_rate": 1.639438276743142e-05, - "loss": 1.7429, + "epoch": 0.6077379271392263, + "grad_norm": 3.520242089070327, + "learning_rate": 1.8478577509926367e-05, + "loss": 0.8917, "step": 4304 }, { - "epoch": 0.902704969595303, - "grad_norm": 7.627588090340259, - "learning_rate": 1.639264203303893e-05, - "loss": 2.0589, + "epoch": 0.6078791301892121, + "grad_norm": 2.960125950329372, + "learning_rate": 1.8477769110233782e-05, + "loss": 0.7706, "step": 4305 }, { - "epoch": 0.9029146571608304, - "grad_norm": 7.115784869860814, - "learning_rate": 1.6390900971012454e-05, - "loss": 1.8725, + "epoch": 0.608020333239198, + "grad_norm": 3.948698253197586, + "learning_rate": 1.847696051352151e-05, + "loss": 1.1429, "step": 4306 }, { - "epoch": 0.9031243447263577, - "grad_norm": 6.528479020808057, - "learning_rate": 1.638915958144122e-05, - "loss": 1.7042, + "epoch": 0.6081615362891839, + "grad_norm": 4.900719500156943, + "learning_rate": 1.8476151719808335e-05, + "loss": 0.8102, "step": 4307 }, { - "epoch": 0.9033340322918851, - "grad_norm": 6.041012901647171, - "learning_rate": 1.638741786441447e-05, - "loss": 1.6954, + "epoch": 0.6083027393391697, + "grad_norm": 3.857864701407322, + "learning_rate": 1.8475342729113053e-05, + "loss": 1.0524, "step": 4308 }, { - "epoch": 0.9035437198574124, - "grad_norm": 7.694660109251331, - "learning_rate": 1.638567582002148e-05, - "loss": 1.9204, + "epoch": 0.6084439423891556, + "grad_norm": 3.642662758904823, + "learning_rate": 1.8474533541454466e-05, + "loss": 1.2191, "step": 4309 }, { - "epoch": 0.9037534074229399, - "grad_norm": 6.694175874116762, - "learning_rate": 1.6383933448351525e-05, - "loss": 2.03, + "epoch": 0.6085851454391414, + "grad_norm": 3.5845742752865517, + "learning_rate": 1.8473724156851384e-05, + "loss": 0.9252, "step": 4310 }, { - "epoch": 0.9039630949884672, - "grad_norm": 6.5073501046553845, - "learning_rate": 1.638219074949391e-05, - "loss": 1.5927, + "epoch": 0.6087263484891273, + "grad_norm": 4.05289204848755, + "learning_rate": 1.847291457532261e-05, + "loss": 1.1514, "step": 4311 }, { - "epoch": 0.9041727825539946, - "grad_norm": 6.503760192136076, - "learning_rate": 1.638044772353795e-05, - "loss": 1.9448, + "epoch": 0.6088675515391132, + "grad_norm": 3.6204961252813543, + "learning_rate": 1.8472104796886957e-05, + "loss": 1.1462, "step": 4312 }, { - "epoch": 0.9043824701195219, - "grad_norm": 5.492668909432616, - "learning_rate": 1.637870437057298e-05, - "loss": 1.3937, + "epoch": 0.6090087545890991, + "grad_norm": 3.6751279773185295, + "learning_rate": 1.8471294821563247e-05, + "loss": 0.8422, "step": 4313 }, { - "epoch": 0.9045921576850493, - "grad_norm": 6.99995971419565, - "learning_rate": 1.6376960690688348e-05, - "loss": 1.5515, + "epoch": 0.609149957639085, + "grad_norm": 3.919763420356692, + "learning_rate": 1.847048464937031e-05, + "loss": 1.3057, "step": 4314 }, { - "epoch": 0.9048018452505766, - "grad_norm": 7.000553223129831, - "learning_rate": 1.6375216683973415e-05, - "loss": 1.552, + "epoch": 0.6092911606890709, + "grad_norm": 3.3365861210899306, + "learning_rate": 1.8469674280326963e-05, + "loss": 0.943, "step": 4315 }, { - "epoch": 0.905011532816104, - "grad_norm": 6.1831041876948145, - "learning_rate": 1.6373472350517573e-05, - "loss": 1.4621, + "epoch": 0.6094323637390567, + "grad_norm": 4.002395484372098, + "learning_rate": 1.8468863714452044e-05, + "loss": 0.7989, "step": 4316 }, { - "epoch": 0.9052212203816313, - "grad_norm": 7.191624789877629, - "learning_rate": 1.637172769041022e-05, - "loss": 1.596, + "epoch": 0.6095735667890426, + "grad_norm": 3.3102026849017263, + "learning_rate": 1.846805295176439e-05, + "loss": 0.9228, "step": 4317 }, { - "epoch": 0.9054309079471587, - "grad_norm": 7.218806387837956, - "learning_rate": 1.636998270374077e-05, - "loss": 2.148, + "epoch": 0.6097147698390285, + "grad_norm": 4.094722011425383, + "learning_rate": 1.8467241992282842e-05, + "loss": 1.1037, "step": 4318 }, { - "epoch": 0.905640595512686, - "grad_norm": 7.706265859111886, - "learning_rate": 1.636823739059866e-05, - "loss": 1.9338, + "epoch": 0.6098559728890144, + "grad_norm": 3.5560157936762504, + "learning_rate": 1.8466430836026247e-05, + "loss": 0.9872, "step": 4319 }, { - "epoch": 0.9058502830782135, - "grad_norm": 5.536449607180294, - "learning_rate": 1.6366491751073335e-05, - "loss": 1.6005, + "epoch": 0.6099971759390003, + "grad_norm": 3.4272592639305084, + "learning_rate": 1.846561948301346e-05, + "loss": 0.9431, "step": 4320 }, { - "epoch": 0.9060599706437408, - "grad_norm": 7.182639283017236, - "learning_rate": 1.636474578525427e-05, - "loss": 2.0927, + "epoch": 0.6101383789889862, + "grad_norm": 3.213289111056129, + "learning_rate": 1.846480793326333e-05, + "loss": 0.9638, "step": 4321 }, { - "epoch": 0.9062696582092682, - "grad_norm": 8.184584904694006, - "learning_rate": 1.636299949323094e-05, - "loss": 1.7981, + "epoch": 0.610279582038972, + "grad_norm": 3.418920951031553, + "learning_rate": 1.8463996186794716e-05, + "loss": 0.9983, "step": 4322 }, { - "epoch": 0.9064793457747956, - "grad_norm": 7.799748304093303, - "learning_rate": 1.6361252875092856e-05, - "loss": 1.541, + "epoch": 0.6104207850889579, + "grad_norm": 3.432638954570329, + "learning_rate": 1.846318424362649e-05, + "loss": 1.0573, "step": 4323 }, { - "epoch": 0.9066890333403229, - "grad_norm": 5.80287761151934, - "learning_rate": 1.6359505930929523e-05, - "loss": 1.7787, + "epoch": 0.6105619881389438, + "grad_norm": 4.473833099919569, + "learning_rate": 1.8462372103777512e-05, + "loss": 1.1035, "step": 4324 }, { - "epoch": 0.9068987209058503, - "grad_norm": 7.2185749565819854, - "learning_rate": 1.6357758660830483e-05, - "loss": 2.1301, + "epoch": 0.6107031911889297, + "grad_norm": 4.274874915998192, + "learning_rate": 1.8461559767266663e-05, + "loss": 1.1037, "step": 4325 }, { - "epoch": 0.9071084084713776, - "grad_norm": 6.546540913985563, - "learning_rate": 1.6356011064885286e-05, - "loss": 1.5384, + "epoch": 0.6108443942389156, + "grad_norm": 3.7647601499030636, + "learning_rate": 1.846074723411282e-05, + "loss": 1.1401, "step": 4326 }, { - "epoch": 0.907318096036905, - "grad_norm": 7.373222779960865, - "learning_rate": 1.63542631431835e-05, - "loss": 1.6398, + "epoch": 0.6109855972889014, + "grad_norm": 3.6657051160591974, + "learning_rate": 1.8459934504334868e-05, + "loss": 1.1786, "step": 4327 }, { - "epoch": 0.9075277836024324, - "grad_norm": 6.678594007554756, - "learning_rate": 1.63525148958147e-05, - "loss": 1.4834, + "epoch": 0.6111268003388873, + "grad_norm": 3.4727467595433477, + "learning_rate": 1.845912157795169e-05, + "loss": 0.971, "step": 4328 }, { - "epoch": 0.9077374711679598, - "grad_norm": 7.168803090071044, - "learning_rate": 1.63507663228685e-05, - "loss": 1.7961, + "epoch": 0.6112680033888732, + "grad_norm": 3.2968915284355638, + "learning_rate": 1.8458308454982177e-05, + "loss": 0.8461, "step": 4329 }, { - "epoch": 0.9079471587334871, - "grad_norm": 7.303216501263846, - "learning_rate": 1.634901742443451e-05, - "loss": 1.7225, + "epoch": 0.6114092064388591, + "grad_norm": 2.9382177570735037, + "learning_rate": 1.845749513544523e-05, + "loss": 0.845, "step": 4330 }, { - "epoch": 0.9081568462990145, - "grad_norm": 5.579091969326058, - "learning_rate": 1.6347268200602363e-05, - "loss": 1.7082, + "epoch": 0.611550409488845, + "grad_norm": 3.8970802737992822, + "learning_rate": 1.8456681619359748e-05, + "loss": 1.0907, "step": 4331 }, { - "epoch": 0.9083665338645418, - "grad_norm": 6.297473043422979, - "learning_rate": 1.6345518651461714e-05, - "loss": 1.7463, + "epoch": 0.6116916125388309, + "grad_norm": 3.3898756107880956, + "learning_rate": 1.845586790674464e-05, + "loss": 1.1002, "step": 4332 }, { - "epoch": 0.9085762214300692, - "grad_norm": 8.020946092659324, - "learning_rate": 1.6343768777102226e-05, - "loss": 1.6613, + "epoch": 0.6118328155888167, + "grad_norm": 3.950958939267451, + "learning_rate": 1.8455053997618814e-05, + "loss": 0.97, "step": 4333 }, { - "epoch": 0.9087859089955965, - "grad_norm": 6.108860659930576, - "learning_rate": 1.6342018577613586e-05, - "loss": 1.4376, + "epoch": 0.6119740186388026, + "grad_norm": 3.6938178612248684, + "learning_rate": 1.845423989200118e-05, + "loss": 1.1848, "step": 4334 }, { - "epoch": 0.9089955965611239, - "grad_norm": 6.772799753351255, - "learning_rate": 1.63402680530855e-05, - "loss": 1.8513, + "epoch": 0.6121152216887885, + "grad_norm": 3.97209680519993, + "learning_rate": 1.8453425589910665e-05, + "loss": 1.0352, "step": 4335 }, { - "epoch": 0.9092052841266512, - "grad_norm": 6.692228437107451, - "learning_rate": 1.6338517203607672e-05, - "loss": 1.8041, + "epoch": 0.6122564247387744, + "grad_norm": 5.18941579057397, + "learning_rate": 1.8452611091366188e-05, + "loss": 1.2571, "step": 4336 }, { - "epoch": 0.9094149716921787, - "grad_norm": 6.304840139369475, - "learning_rate": 1.6336766029269846e-05, - "loss": 1.6344, + "epoch": 0.6123976277887603, + "grad_norm": 3.1217568876186594, + "learning_rate": 1.8451796396386683e-05, + "loss": 0.8443, "step": 4337 }, { - "epoch": 0.909624659257706, - "grad_norm": 6.417303609540669, - "learning_rate": 1.633501453016177e-05, - "loss": 1.7031, + "epoch": 0.6125388308387462, + "grad_norm": 3.7954005089073015, + "learning_rate": 1.8450981504991077e-05, + "loss": 0.9554, "step": 4338 }, { - "epoch": 0.9098343468232334, - "grad_norm": 7.516928062667972, - "learning_rate": 1.6333262706373214e-05, - "loss": 1.8159, + "epoch": 0.612680033888732, + "grad_norm": 3.6412879588767524, + "learning_rate": 1.845016641719831e-05, + "loss": 1.0309, "step": 4339 }, { - "epoch": 0.9100440343887607, - "grad_norm": 5.853021315398659, - "learning_rate": 1.6331510557993957e-05, - "loss": 1.7995, + "epoch": 0.6128212369387179, + "grad_norm": 3.7442795923431413, + "learning_rate": 1.8449351133027327e-05, + "loss": 1.1098, "step": 4340 }, { - "epoch": 0.9102537219542881, - "grad_norm": 8.19628553891524, - "learning_rate": 1.6329758085113803e-05, - "loss": 2.2299, + "epoch": 0.6129624399887038, + "grad_norm": 3.5945135423653176, + "learning_rate": 1.8448535652497073e-05, + "loss": 1.0877, "step": 4341 }, { - "epoch": 0.9104634095198155, - "grad_norm": 6.239882178807351, - "learning_rate": 1.6328005287822572e-05, - "loss": 1.3961, + "epoch": 0.6131036430386896, + "grad_norm": 3.124375688020515, + "learning_rate": 1.8447719975626496e-05, + "loss": 0.9594, "step": 4342 }, { - "epoch": 0.9106730970853428, - "grad_norm": 7.0516206182275765, - "learning_rate": 1.6326252166210092e-05, - "loss": 1.6739, + "epoch": 0.6132448460886755, + "grad_norm": 4.1813427416423545, + "learning_rate": 1.8446904102434556e-05, + "loss": 1.0325, "step": 4343 }, { - "epoch": 0.9108827846508702, - "grad_norm": 8.01990140970183, - "learning_rate": 1.6324498720366218e-05, - "loss": 2.0441, + "epoch": 0.6133860491386613, + "grad_norm": 3.226743169077687, + "learning_rate": 1.8446088032940217e-05, + "loss": 0.9507, "step": 4344 }, { - "epoch": 0.9110924722163976, - "grad_norm": 6.519041413412794, - "learning_rate": 1.6322744950380812e-05, - "loss": 1.85, + "epoch": 0.6135272521886472, + "grad_norm": 3.2471833522178963, + "learning_rate": 1.8445271767162435e-05, + "loss": 0.9772, "step": 4345 }, { - "epoch": 0.911302159781925, - "grad_norm": 6.459089967252704, - "learning_rate": 1.6320990856343765e-05, - "loss": 1.6333, + "epoch": 0.6136684552386331, + "grad_norm": 12.002604965145798, + "learning_rate": 1.8444455305120187e-05, + "loss": 1.1078, "step": 4346 }, { - "epoch": 0.9115118473474523, - "grad_norm": 6.314516366446648, - "learning_rate": 1.631923643834497e-05, - "loss": 1.3681, + "epoch": 0.613809658288619, + "grad_norm": 3.7535403236409275, + "learning_rate": 1.8443638646832442e-05, + "loss": 0.9741, "step": 4347 }, { - "epoch": 0.9117215349129797, - "grad_norm": 7.099301769671213, - "learning_rate": 1.6317481696474347e-05, - "loss": 1.6458, + "epoch": 0.6139508613386049, + "grad_norm": 4.062758731877399, + "learning_rate": 1.8442821792318183e-05, + "loss": 1.1391, "step": 4348 }, { - "epoch": 0.911931222478507, - "grad_norm": 5.859678060164586, - "learning_rate": 1.6315726630821833e-05, - "loss": 1.532, + "epoch": 0.6140920643885908, + "grad_norm": 3.230238976145585, + "learning_rate": 1.844200474159639e-05, + "loss": 0.9042, "step": 4349 }, { - "epoch": 0.9121409100440344, - "grad_norm": 7.486706669146943, - "learning_rate": 1.6313971241477374e-05, - "loss": 1.5815, + "epoch": 0.6142332674385766, + "grad_norm": 4.216110181318829, + "learning_rate": 1.8441187494686055e-05, + "loss": 1.1695, "step": 4350 }, { - "epoch": 0.9123505976095617, - "grad_norm": 5.507327539317999, - "learning_rate": 1.6312215528530938e-05, - "loss": 1.6873, + "epoch": 0.6143744704885625, + "grad_norm": 3.203289150902373, + "learning_rate": 1.8440370051606165e-05, + "loss": 0.8446, "step": 4351 }, { - "epoch": 0.9125602851750891, - "grad_norm": 6.212626975976258, - "learning_rate": 1.6310459492072505e-05, - "loss": 1.7321, + "epoch": 0.6145156735385484, + "grad_norm": 4.225409869809968, + "learning_rate": 1.8439552412375725e-05, + "loss": 0.9955, "step": 4352 }, { - "epoch": 0.9127699727406164, - "grad_norm": 8.281062523950995, - "learning_rate": 1.6308703132192082e-05, - "loss": 2.0378, + "epoch": 0.6146568765885343, + "grad_norm": 3.6316339010142236, + "learning_rate": 1.8438734577013728e-05, + "loss": 0.9645, "step": 4353 }, { - "epoch": 0.9129796603061439, - "grad_norm": 6.381123499078793, - "learning_rate": 1.630694644897968e-05, - "loss": 1.9276, + "epoch": 0.6147980796385202, + "grad_norm": 3.913365408444451, + "learning_rate": 1.8437916545539185e-05, + "loss": 0.9768, "step": 4354 }, { - "epoch": 0.9131893478716712, - "grad_norm": 6.443945950221432, - "learning_rate": 1.6305189442525334e-05, - "loss": 1.8758, + "epoch": 0.6149392826885061, + "grad_norm": 4.2515260010201965, + "learning_rate": 1.8437098317971104e-05, + "loss": 1.0734, "step": 4355 }, { - "epoch": 0.9133990354371986, - "grad_norm": 6.613472721991775, - "learning_rate": 1.6303432112919095e-05, - "loss": 1.5967, + "epoch": 0.6150804857384919, + "grad_norm": 4.041407631860882, + "learning_rate": 1.8436279894328507e-05, + "loss": 1.1296, "step": 4356 }, { - "epoch": 0.9136087230027259, - "grad_norm": 5.974271645654126, - "learning_rate": 1.6301674460251024e-05, - "loss": 1.9305, + "epoch": 0.6152216887884778, + "grad_norm": 4.083777152102148, + "learning_rate": 1.8435461274630402e-05, + "loss": 1.0844, "step": 4357 }, { - "epoch": 0.9138184105682533, - "grad_norm": 6.545512474139671, - "learning_rate": 1.629991648461121e-05, - "loss": 1.5447, + "epoch": 0.6153628918384637, + "grad_norm": 4.561251812429453, + "learning_rate": 1.8434642458895823e-05, + "loss": 1.2106, "step": 4358 }, { - "epoch": 0.9140280981337807, - "grad_norm": 5.865910773887542, - "learning_rate": 1.6298158186089748e-05, - "loss": 1.7877, + "epoch": 0.6155040948884496, + "grad_norm": 3.2818716878503937, + "learning_rate": 1.8433823447143796e-05, + "loss": 0.9058, "step": 4359 }, { - "epoch": 0.914237785699308, - "grad_norm": 6.768863143133781, - "learning_rate": 1.6296399564776754e-05, - "loss": 1.6611, + "epoch": 0.6156452979384355, + "grad_norm": 3.5011028544096723, + "learning_rate": 1.8433004239393353e-05, + "loss": 1.02, "step": 4360 }, { - "epoch": 0.9144474732648354, - "grad_norm": 6.216003383808288, - "learning_rate": 1.6294640620762367e-05, - "loss": 1.6584, + "epoch": 0.6157865009884214, + "grad_norm": 3.6274211625064607, + "learning_rate": 1.8432184835663535e-05, + "loss": 1.0762, "step": 4361 }, { - "epoch": 0.9146571608303627, - "grad_norm": 7.547124297060335, - "learning_rate": 1.6292881354136726e-05, - "loss": 2.0497, + "epoch": 0.6159277040384072, + "grad_norm": 3.7649414902412053, + "learning_rate": 1.8431365235973383e-05, + "loss": 1.1366, "step": 4362 }, { - "epoch": 0.9148668483958902, - "grad_norm": 7.147278435862349, - "learning_rate": 1.629112176499e-05, - "loss": 1.5562, + "epoch": 0.6160689070883931, + "grad_norm": 4.352915838656869, + "learning_rate": 1.8430545440341942e-05, + "loss": 1.1059, "step": 4363 }, { - "epoch": 0.9150765359614175, - "grad_norm": 7.001188116167043, - "learning_rate": 1.6289361853412376e-05, - "loss": 1.7919, + "epoch": 0.616210110138379, + "grad_norm": 3.9600291454201906, + "learning_rate": 1.8429725448788267e-05, + "loss": 1.0093, "step": 4364 }, { - "epoch": 0.9152862235269449, - "grad_norm": 6.520064265585425, - "learning_rate": 1.628760161949405e-05, - "loss": 1.7465, + "epoch": 0.6163513131883649, + "grad_norm": 3.7443006441089977, + "learning_rate": 1.8428905261331412e-05, + "loss": 0.8644, "step": 4365 }, { - "epoch": 0.9154959110924722, - "grad_norm": 7.615162229771016, - "learning_rate": 1.628584106332523e-05, - "loss": 1.673, + "epoch": 0.6164925162383508, + "grad_norm": 4.144582192396024, + "learning_rate": 1.8428084877990443e-05, + "loss": 0.9091, "step": 4366 }, { - "epoch": 0.9157055986579996, - "grad_norm": 6.512046843743319, - "learning_rate": 1.6284080184996157e-05, - "loss": 1.576, + "epoch": 0.6166337192883367, + "grad_norm": 3.4799150978699314, + "learning_rate": 1.8427264298784418e-05, + "loss": 0.9291, "step": 4367 }, { - "epoch": 0.9159152862235269, - "grad_norm": 6.495102613086033, - "learning_rate": 1.6282318984597075e-05, - "loss": 1.7918, + "epoch": 0.6167749223383225, + "grad_norm": 3.6674010679912827, + "learning_rate": 1.8426443523732412e-05, + "loss": 1.0794, "step": 4368 }, { - "epoch": 0.9161249737890543, - "grad_norm": 6.357872338896982, - "learning_rate": 1.6280557462218246e-05, - "loss": 1.7552, + "epoch": 0.6169161253883084, + "grad_norm": 4.548581679043314, + "learning_rate": 1.8425622552853494e-05, + "loss": 1.0694, "step": 4369 }, { - "epoch": 0.9163346613545816, - "grad_norm": 5.835192482795335, - "learning_rate": 1.627879561794996e-05, - "loss": 1.4341, + "epoch": 0.6170573284382943, + "grad_norm": 4.033874777887552, + "learning_rate": 1.8424801386166752e-05, + "loss": 0.9868, "step": 4370 }, { - "epoch": 0.916544348920109, - "grad_norm": 7.0697480943292215, - "learning_rate": 1.6277033451882503e-05, - "loss": 1.8518, + "epoch": 0.6171985314882802, + "grad_norm": 4.402225524661612, + "learning_rate": 1.8423980023691258e-05, + "loss": 1.1061, "step": 4371 }, { - "epoch": 0.9167540364856364, - "grad_norm": 5.288844709476438, - "learning_rate": 1.6275270964106197e-05, - "loss": 1.5693, + "epoch": 0.6173397345382661, + "grad_norm": 4.600419846281784, + "learning_rate": 1.842315846544611e-05, + "loss": 1.3261, "step": 4372 }, { - "epoch": 0.9169637240511638, - "grad_norm": 6.362519194294375, - "learning_rate": 1.6273508154711372e-05, - "loss": 1.4956, + "epoch": 0.617480937588252, + "grad_norm": 3.6130993731055905, + "learning_rate": 1.8422336711450396e-05, + "loss": 0.8885, "step": 4373 }, { - "epoch": 0.9171734116166911, - "grad_norm": 5.7498749336798705, - "learning_rate": 1.627174502378837e-05, - "loss": 1.5004, + "epoch": 0.6176221406382378, + "grad_norm": 3.762301154754903, + "learning_rate": 1.8421514761723216e-05, + "loss": 0.9497, "step": 4374 }, { - "epoch": 0.9173830991822185, - "grad_norm": 5.991968662252214, - "learning_rate": 1.626998157142756e-05, - "loss": 1.6713, + "epoch": 0.6177633436882237, + "grad_norm": 4.411073240054198, + "learning_rate": 1.842069261628367e-05, + "loss": 1.2685, "step": 4375 }, { - "epoch": 0.9175927867477458, - "grad_norm": 6.221388357799579, - "learning_rate": 1.6268217797719317e-05, - "loss": 1.7192, + "epoch": 0.6179045467382095, + "grad_norm": 5.0687021611726975, + "learning_rate": 1.841987027515086e-05, + "loss": 1.3425, "step": 4376 }, { - "epoch": 0.9178024743132732, - "grad_norm": 7.0164166629880915, - "learning_rate": 1.6266453702754043e-05, - "loss": 1.6554, + "epoch": 0.6180457497881954, + "grad_norm": 3.854728007837796, + "learning_rate": 1.8419047738343905e-05, + "loss": 1.1232, "step": 4377 }, { - "epoch": 0.9180121618788006, - "grad_norm": 5.866641158548073, - "learning_rate": 1.6264689286622143e-05, - "loss": 1.591, + "epoch": 0.6181869528381813, + "grad_norm": 4.524296759978584, + "learning_rate": 1.8418225005881916e-05, + "loss": 1.281, "step": 4378 }, { - "epoch": 0.9182218494443279, - "grad_norm": 7.413725333290152, - "learning_rate": 1.626292454941406e-05, - "loss": 1.8048, + "epoch": 0.6183281558881671, + "grad_norm": 3.406193395504181, + "learning_rate": 1.841740207778401e-05, + "loss": 0.8997, "step": 4379 }, { - "epoch": 0.9184315370098554, - "grad_norm": 8.298297650330117, - "learning_rate": 1.6261159491220224e-05, - "loss": 1.8587, + "epoch": 0.618469358938153, + "grad_norm": 4.063073434545334, + "learning_rate": 1.8416578954069318e-05, + "loss": 1.0026, "step": 4380 }, { - "epoch": 0.9186412245753827, - "grad_norm": 8.295866874468631, - "learning_rate": 1.625939411213111e-05, - "loss": 1.808, + "epoch": 0.6186105619881389, + "grad_norm": 3.835876532332979, + "learning_rate": 1.8415755634756968e-05, + "loss": 1.1256, "step": 4381 }, { - "epoch": 0.9188509121409101, - "grad_norm": 6.552629265318845, - "learning_rate": 1.6257628412237188e-05, - "loss": 1.326, + "epoch": 0.6187517650381248, + "grad_norm": 4.256828421338441, + "learning_rate": 1.841493211986609e-05, + "loss": 0.9758, "step": 4382 }, { - "epoch": 0.9190605997064374, - "grad_norm": 6.2615053461182875, - "learning_rate": 1.6255862391628957e-05, - "loss": 1.6671, + "epoch": 0.6188929680881107, + "grad_norm": 3.7133289117110286, + "learning_rate": 1.841410840941582e-05, + "loss": 1.2061, "step": 4383 }, { - "epoch": 0.9192702872719648, - "grad_norm": 6.614092219307581, - "learning_rate": 1.625409605039693e-05, - "loss": 1.7288, + "epoch": 0.6190341711380966, + "grad_norm": 3.38850620404612, + "learning_rate": 1.841328450342531e-05, + "loss": 0.9146, "step": 4384 }, { - "epoch": 0.9194799748374921, - "grad_norm": 6.105639247716342, - "learning_rate": 1.6252329388631628e-05, - "loss": 1.6215, + "epoch": 0.6191753741880824, + "grad_norm": 3.70629461396281, + "learning_rate": 1.8412460401913695e-05, + "loss": 1.098, "step": 4385 }, { - "epoch": 0.9196896624030195, - "grad_norm": 6.2749386543622645, - "learning_rate": 1.6250562406423606e-05, - "loss": 2.1094, + "epoch": 0.6193165772380683, + "grad_norm": 3.042261920644096, + "learning_rate": 1.8411636104900138e-05, + "loss": 0.8615, "step": 4386 }, { - "epoch": 0.9198993499685468, - "grad_norm": 6.847189163776937, - "learning_rate": 1.6248795103863416e-05, - "loss": 1.601, + "epoch": 0.6194577802880542, + "grad_norm": 3.2142485653704163, + "learning_rate": 1.841081161240379e-05, + "loss": 0.867, "step": 4387 }, { - "epoch": 0.9201090375340742, - "grad_norm": 7.33543487838931, - "learning_rate": 1.624702748104164e-05, - "loss": 1.8603, + "epoch": 0.6195989833380401, + "grad_norm": 3.5280286072296625, + "learning_rate": 1.840998692444381e-05, + "loss": 0.8558, "step": 4388 }, { - "epoch": 0.9203187250996016, - "grad_norm": 6.213328629086073, - "learning_rate": 1.6245259538048873e-05, - "loss": 1.7125, + "epoch": 0.619740186388026, + "grad_norm": 3.582594442166063, + "learning_rate": 1.840916204103937e-05, + "loss": 0.9091, "step": 4389 }, { - "epoch": 0.920528412665129, - "grad_norm": 6.768527269479866, - "learning_rate": 1.624349127497572e-05, - "loss": 1.441, + "epoch": 0.6198813894380119, + "grad_norm": 8.016979710903913, + "learning_rate": 1.840833696220963e-05, + "loss": 1.015, "step": 4390 }, { - "epoch": 0.9207381002306563, - "grad_norm": 6.661431142157733, - "learning_rate": 1.6241722691912816e-05, - "loss": 2.0778, + "epoch": 0.6200225924879977, + "grad_norm": 3.39726898594206, + "learning_rate": 1.8407511687973776e-05, + "loss": 0.9433, "step": 4391 }, { - "epoch": 0.9209477877961837, - "grad_norm": 7.067486426076362, - "learning_rate": 1.6239953788950792e-05, - "loss": 1.5105, + "epoch": 0.6201637955379836, + "grad_norm": 3.345535187457047, + "learning_rate": 1.840668621835098e-05, + "loss": 1.0486, "step": 4392 }, { - "epoch": 0.921157475361711, - "grad_norm": 7.059220657734244, - "learning_rate": 1.6238184566180315e-05, - "loss": 1.9681, + "epoch": 0.6203049985879695, + "grad_norm": 3.4042547687665112, + "learning_rate": 1.8405860553360428e-05, + "loss": 0.9434, "step": 4393 }, { - "epoch": 0.9213671629272384, - "grad_norm": 5.990077654128932, - "learning_rate": 1.623641502369206e-05, - "loss": 1.4067, + "epoch": 0.6204462016379554, + "grad_norm": 3.9051811726060186, + "learning_rate": 1.84050346930213e-05, + "loss": 1.0895, "step": 4394 }, { - "epoch": 0.9215768504927658, - "grad_norm": 6.802717561780429, - "learning_rate": 1.623464516157672e-05, - "loss": 1.5818, + "epoch": 0.6205874046879413, + "grad_norm": 3.6391237022508496, + "learning_rate": 1.8404208637352802e-05, + "loss": 1.1197, "step": 4395 }, { - "epoch": 0.9217865380582931, - "grad_norm": 5.818947875778048, - "learning_rate": 1.6232874979925e-05, - "loss": 1.3551, + "epoch": 0.6207286077379272, + "grad_norm": 4.229181776013685, + "learning_rate": 1.8403382386374124e-05, + "loss": 1.11, "step": 4396 }, { - "epoch": 0.9219962256238206, - "grad_norm": 6.7915483422950835, - "learning_rate": 1.623110447882763e-05, - "loss": 1.6253, + "epoch": 0.620869810787913, + "grad_norm": 3.250385696788391, + "learning_rate": 1.8402555940104466e-05, + "loss": 0.9462, "step": 4397 }, { - "epoch": 0.9222059131893479, - "grad_norm": 7.065093576468098, - "learning_rate": 1.6229333658375343e-05, - "loss": 1.8848, + "epoch": 0.6210110138378989, + "grad_norm": 4.422670888092435, + "learning_rate": 1.840172929856304e-05, + "loss": 1.167, "step": 4398 }, { - "epoch": 0.9224156007548753, - "grad_norm": 6.662458923393933, - "learning_rate": 1.6227562518658907e-05, - "loss": 1.7856, + "epoch": 0.6211522168878848, + "grad_norm": 3.729328508456831, + "learning_rate": 1.840090246176905e-05, + "loss": 1.1157, "step": 4399 }, { - "epoch": 0.9226252883204026, - "grad_norm": 6.055326744451304, - "learning_rate": 1.6225791059769086e-05, - "loss": 1.3741, + "epoch": 0.6212934199378707, + "grad_norm": 3.5917787914221164, + "learning_rate": 1.840007542974172e-05, + "loss": 1.0857, "step": 4400 }, { - "epoch": 0.92283497588593, - "grad_norm": 6.428053918225503, - "learning_rate": 1.622401928179668e-05, - "loss": 1.6846, + "epoch": 0.6214346229878566, + "grad_norm": 3.765418931913234, + "learning_rate": 1.839924820250026e-05, + "loss": 1.1719, "step": 4401 }, { - "epoch": 0.9230446634514573, - "grad_norm": 7.345024217483868, - "learning_rate": 1.6222247184832486e-05, - "loss": 1.7402, + "epoch": 0.6215758260378424, + "grad_norm": 3.5311564364710315, + "learning_rate": 1.8398420780063905e-05, + "loss": 0.9262, "step": 4402 }, { - "epoch": 0.9232543510169847, - "grad_norm": 5.623346553951054, - "learning_rate": 1.6220474768967335e-05, - "loss": 1.4672, + "epoch": 0.6217170290878283, + "grad_norm": 3.371886888159024, + "learning_rate": 1.839759316245187e-05, + "loss": 0.9837, "step": 4403 }, { - "epoch": 0.923464038582512, - "grad_norm": 7.850432426108634, - "learning_rate": 1.6218702034292064e-05, - "loss": 1.5921, + "epoch": 0.6218582321378142, + "grad_norm": 4.939380592632123, + "learning_rate": 1.8396765349683404e-05, + "loss": 1.2113, "step": 4404 }, { - "epoch": 0.9236737261480394, - "grad_norm": 7.114620783336331, - "learning_rate": 1.6216928980897526e-05, - "loss": 1.5157, + "epoch": 0.6219994351878001, + "grad_norm": 4.262762004402425, + "learning_rate": 1.8395937341777732e-05, + "loss": 1.2345, "step": 4405 }, { - "epoch": 0.9238834137135667, - "grad_norm": 7.478132435368127, - "learning_rate": 1.6215155608874595e-05, - "loss": 1.9406, + "epoch": 0.622140638237786, + "grad_norm": 3.405183986160233, + "learning_rate": 1.8395109138754104e-05, + "loss": 0.8409, "step": 4406 }, { - "epoch": 0.9240931012790942, - "grad_norm": 6.836786602419382, - "learning_rate": 1.621338191831416e-05, - "loss": 2.0092, + "epoch": 0.6222818412877719, + "grad_norm": 3.842974881606262, + "learning_rate": 1.8394280740631765e-05, + "loss": 1.1841, "step": 4407 }, { - "epoch": 0.9243027888446215, - "grad_norm": 8.042484983343705, - "learning_rate": 1.6211607909307126e-05, - "loss": 2.0921, + "epoch": 0.6224230443377577, + "grad_norm": 3.293031420713474, + "learning_rate": 1.8393452147429965e-05, + "loss": 0.8353, "step": 4408 }, { - "epoch": 0.9245124764101489, - "grad_norm": 7.372346267507749, - "learning_rate": 1.620983358194441e-05, - "loss": 2.108, + "epoch": 0.6225642473877436, + "grad_norm": 6.341168204060529, + "learning_rate": 1.8392623359167964e-05, + "loss": 1.3582, "step": 4409 }, { - "epoch": 0.9247221639756762, - "grad_norm": 7.019074967021153, - "learning_rate": 1.6208058936316955e-05, - "loss": 1.8297, + "epoch": 0.6227054504377294, + "grad_norm": 3.4525857501510275, + "learning_rate": 1.8391794375865025e-05, + "loss": 0.9091, "step": 4410 }, { - "epoch": 0.9249318515412036, - "grad_norm": 7.2236405393593435, - "learning_rate": 1.6206283972515713e-05, - "loss": 1.4667, + "epoch": 0.6228466534877153, + "grad_norm": 3.3273481256198507, + "learning_rate": 1.8390965197540404e-05, + "loss": 1.1268, "step": 4411 }, { - "epoch": 0.925141539106731, - "grad_norm": 7.46208428836598, - "learning_rate": 1.620450869063165e-05, - "loss": 1.9439, + "epoch": 0.6229878565377012, + "grad_norm": 3.906910916070192, + "learning_rate": 1.8390135824213376e-05, + "loss": 0.9928, "step": 4412 }, { - "epoch": 0.9253512266722583, - "grad_norm": 6.024431963693183, - "learning_rate": 1.620273309075576e-05, - "loss": 1.5034, + "epoch": 0.623129059587687, + "grad_norm": 3.1274694188244045, + "learning_rate": 1.8389306255903216e-05, + "loss": 0.9818, "step": 4413 }, { - "epoch": 0.9255609142377857, - "grad_norm": 6.259155450799531, - "learning_rate": 1.6200957172979038e-05, - "loss": 1.4678, + "epoch": 0.6232702626376729, + "grad_norm": 3.634100659794148, + "learning_rate": 1.8388476492629198e-05, + "loss": 1.0588, "step": 4414 }, { - "epoch": 0.925770601803313, - "grad_norm": 6.548697144643111, - "learning_rate": 1.6199180937392505e-05, - "loss": 1.6529, + "epoch": 0.6234114656876588, + "grad_norm": 4.412308863264425, + "learning_rate": 1.8387646534410612e-05, + "loss": 1.1713, "step": 4415 }, { - "epoch": 0.9259802893688405, - "grad_norm": 6.117135598673797, - "learning_rate": 1.6197404384087198e-05, - "loss": 1.4944, + "epoch": 0.6235526687376447, + "grad_norm": 3.9035968303001995, + "learning_rate": 1.838681638126674e-05, + "loss": 0.9356, "step": 4416 }, { - "epoch": 0.9261899769343678, - "grad_norm": 6.261652688816058, - "learning_rate": 1.6195627513154167e-05, - "loss": 1.6021, + "epoch": 0.6236938717876306, + "grad_norm": 3.93375096694715, + "learning_rate": 1.8385986033216883e-05, + "loss": 1.0768, "step": 4417 }, { - "epoch": 0.9263996644998952, - "grad_norm": 7.107856936870377, - "learning_rate": 1.619385032468448e-05, - "loss": 1.6406, + "epoch": 0.6238350748376165, + "grad_norm": 3.050506509267695, + "learning_rate": 1.8385155490280327e-05, + "loss": 0.7046, "step": 4418 }, { - "epoch": 0.9266093520654225, - "grad_norm": 6.8613218708699355, - "learning_rate": 1.6192072818769223e-05, - "loss": 1.7249, + "epoch": 0.6239762778876023, + "grad_norm": 4.065606605231039, + "learning_rate": 1.8384324752476384e-05, + "loss": 1.3575, "step": 4419 }, { - "epoch": 0.9268190396309499, - "grad_norm": 5.616859411034356, - "learning_rate": 1.6190294995499493e-05, - "loss": 1.4266, + "epoch": 0.6241174809375882, + "grad_norm": 3.767112275529218, + "learning_rate": 1.838349381982435e-05, + "loss": 0.9412, "step": 4420 }, { - "epoch": 0.9270287271964772, - "grad_norm": 5.610035598230709, - "learning_rate": 1.6188516854966407e-05, - "loss": 1.6625, + "epoch": 0.6242586839875741, + "grad_norm": 3.940423550970751, + "learning_rate": 1.838266269234354e-05, + "loss": 1.2461, "step": 4421 }, { - "epoch": 0.9272384147620046, - "grad_norm": 7.508050911568481, - "learning_rate": 1.61867383972611e-05, - "loss": 2.2655, + "epoch": 0.62439988703756, + "grad_norm": 3.8664067894759766, + "learning_rate": 1.838183137005327e-05, + "loss": 1.0906, "step": 4422 }, { - "epoch": 0.9274481023275319, - "grad_norm": 6.174966617556254, - "learning_rate": 1.6184959622474724e-05, - "loss": 1.5804, + "epoch": 0.6245410900875459, + "grad_norm": 4.511575420446997, + "learning_rate": 1.8380999852972864e-05, + "loss": 1.1844, "step": 4423 }, { - "epoch": 0.9276577898930594, - "grad_norm": 5.832840890122981, - "learning_rate": 1.6183180530698434e-05, - "loss": 1.7264, + "epoch": 0.6246822931375318, + "grad_norm": 3.773325226169781, + "learning_rate": 1.8380168141121635e-05, + "loss": 0.9064, "step": 4424 }, { - "epoch": 0.9278674774585867, - "grad_norm": 6.691305729856465, - "learning_rate": 1.618140112202342e-05, - "loss": 1.8952, + "epoch": 0.6248234961875176, + "grad_norm": 6.5215619801270135, + "learning_rate": 1.8379336234518917e-05, + "loss": 1.1895, "step": 4425 }, { - "epoch": 0.9280771650241141, - "grad_norm": 6.378138513348659, - "learning_rate": 1.6179621396540874e-05, - "loss": 1.7918, + "epoch": 0.6249646992375035, + "grad_norm": 3.827810219302066, + "learning_rate": 1.8378504133184047e-05, + "loss": 0.9361, "step": 4426 }, { - "epoch": 0.9282868525896414, - "grad_norm": 6.83764016309168, - "learning_rate": 1.617784135434202e-05, - "loss": 1.8642, + "epoch": 0.6251059022874894, + "grad_norm": 4.05771314673562, + "learning_rate": 1.837767183713636e-05, + "loss": 0.9726, "step": 4427 }, { - "epoch": 0.9284965401551688, - "grad_norm": 5.996763995698975, - "learning_rate": 1.6176060995518077e-05, - "loss": 1.5763, + "epoch": 0.6252471053374753, + "grad_norm": 3.749656983279884, + "learning_rate": 1.8376839346395194e-05, + "loss": 1.0087, "step": 4428 }, { - "epoch": 0.9287062277206961, - "grad_norm": 7.824905011652463, - "learning_rate": 1.6174280320160298e-05, - "loss": 1.6793, + "epoch": 0.6253883083874612, + "grad_norm": 3.7481365913493954, + "learning_rate": 1.83760066609799e-05, + "loss": 1.0726, "step": 4429 }, { - "epoch": 0.9289159152862235, - "grad_norm": 6.846672910165432, - "learning_rate": 1.617249932835994e-05, - "loss": 1.7084, + "epoch": 0.6255295114374471, + "grad_norm": 3.621531124764263, + "learning_rate": 1.837517378090983e-05, + "loss": 1.0061, "step": 4430 }, { - "epoch": 0.9291256028517509, - "grad_norm": 5.353500356099213, - "learning_rate": 1.6170718020208292e-05, - "loss": 1.4591, + "epoch": 0.6256707144874329, + "grad_norm": 3.7742884626722653, + "learning_rate": 1.8374340706204335e-05, + "loss": 1.085, "step": 4431 }, { - "epoch": 0.9293352904172782, - "grad_norm": 6.606189606109909, - "learning_rate": 1.6168936395796638e-05, - "loss": 1.9792, + "epoch": 0.6258119175374188, + "grad_norm": 3.2934977661687923, + "learning_rate": 1.8373507436882784e-05, + "loss": 0.8606, "step": 4432 }, { - "epoch": 0.9295449779828057, - "grad_norm": 7.740272371235913, - "learning_rate": 1.6167154455216296e-05, - "loss": 2.2183, + "epoch": 0.6259531205874047, + "grad_norm": 3.6011481022996708, + "learning_rate": 1.8372673972964535e-05, + "loss": 0.8677, "step": 4433 }, { - "epoch": 0.929754665548333, - "grad_norm": 6.251280791506547, - "learning_rate": 1.6165372198558593e-05, - "loss": 1.5812, + "epoch": 0.6260943236373906, + "grad_norm": 3.5833876856541007, + "learning_rate": 1.8371840314468956e-05, + "loss": 0.971, "step": 4434 }, { - "epoch": 0.9299643531138604, - "grad_norm": 6.637952676385502, - "learning_rate": 1.616358962591487e-05, - "loss": 1.7054, + "epoch": 0.6262355266873765, + "grad_norm": 3.962042712675888, + "learning_rate": 1.8371006461415425e-05, + "loss": 1.3831, "step": 4435 }, { - "epoch": 0.9301740406793877, - "grad_norm": 7.23101764833605, - "learning_rate": 1.616180673737649e-05, - "loss": 2.3731, + "epoch": 0.6263767297373624, + "grad_norm": 4.032990100519072, + "learning_rate": 1.837017241382332e-05, + "loss": 1.1031, "step": 4436 }, { - "epoch": 0.9303837282449151, - "grad_norm": 7.718208960777992, - "learning_rate": 1.6160023533034826e-05, - "loss": 1.9321, + "epoch": 0.6265179327873482, + "grad_norm": 3.5215285586942793, + "learning_rate": 1.836933817171202e-05, + "loss": 0.9561, "step": 4437 }, { - "epoch": 0.9305934158104424, - "grad_norm": 5.9818174332065865, - "learning_rate": 1.615824001298127e-05, - "loss": 1.4085, + "epoch": 0.6266591358373341, + "grad_norm": 4.019527275141383, + "learning_rate": 1.836850373510092e-05, + "loss": 1.1747, "step": 4438 }, { - "epoch": 0.9308031033759698, - "grad_norm": 7.112599732605032, - "learning_rate": 1.6156456177307235e-05, - "loss": 1.8498, + "epoch": 0.62680033888732, + "grad_norm": 4.611558473214138, + "learning_rate": 1.8367669104009404e-05, + "loss": 1.1272, "step": 4439 }, { - "epoch": 0.9310127909414971, - "grad_norm": 5.769584308947202, - "learning_rate": 1.6154672026104146e-05, - "loss": 1.6647, + "epoch": 0.6269415419373059, + "grad_norm": 3.8696414448772556, + "learning_rate": 1.8366834278456872e-05, + "loss": 1.0262, "step": 4440 }, { - "epoch": 0.9312224785070246, - "grad_norm": 6.726857785313026, - "learning_rate": 1.6152887559463436e-05, - "loss": 1.8672, + "epoch": 0.6270827449872918, + "grad_norm": 3.6289432295911843, + "learning_rate": 1.8365999258462723e-05, + "loss": 0.9288, "step": 4441 }, { - "epoch": 0.9314321660725519, - "grad_norm": 7.507906691618299, - "learning_rate": 1.6151102777476567e-05, - "loss": 2.1798, + "epoch": 0.6272239480372777, + "grad_norm": 3.5102175085059617, + "learning_rate": 1.8365164044046367e-05, + "loss": 1.0798, "step": 4442 }, { - "epoch": 0.9316418536380793, - "grad_norm": 5.2920271644766865, - "learning_rate": 1.6149317680235014e-05, - "loss": 1.5182, + "epoch": 0.6273651510872635, + "grad_norm": 4.158317837204322, + "learning_rate": 1.836432863522721e-05, + "loss": 1.146, "step": 4443 }, { - "epoch": 0.9318515412036066, - "grad_norm": 6.5719985556222875, - "learning_rate": 1.6147532267830264e-05, - "loss": 2.1591, + "epoch": 0.6275063541372493, + "grad_norm": 3.2960379221288565, + "learning_rate": 1.836349303202467e-05, + "loss": 0.8905, "step": 4444 }, { - "epoch": 0.932061228769134, - "grad_norm": 6.428791949906032, - "learning_rate": 1.6145746540353826e-05, - "loss": 1.572, + "epoch": 0.6276475571872352, + "grad_norm": 4.611075690617447, + "learning_rate": 1.8362657234458163e-05, + "loss": 1.3082, "step": 4445 }, { - "epoch": 0.9322709163346613, - "grad_norm": 6.1152972462119966, - "learning_rate": 1.6143960497897216e-05, - "loss": 1.697, + "epoch": 0.6277887602372211, + "grad_norm": 3.4786485043090507, + "learning_rate": 1.836182124254711e-05, + "loss": 0.9914, "step": 4446 }, { - "epoch": 0.9324806039001887, - "grad_norm": 6.8090274655414955, - "learning_rate": 1.6142174140551973e-05, - "loss": 1.7058, + "epoch": 0.627929963287207, + "grad_norm": 3.387774997106242, + "learning_rate": 1.836098505631095e-05, + "loss": 1.0317, "step": 4447 }, { - "epoch": 0.9326902914657161, - "grad_norm": 5.831374745137305, - "learning_rate": 1.614038746840965e-05, - "loss": 1.4237, + "epoch": 0.6280711663371928, + "grad_norm": 3.3823558115944894, + "learning_rate": 1.83601486757691e-05, + "loss": 0.889, "step": 4448 }, { - "epoch": 0.9328999790312434, - "grad_norm": 6.09164193779769, - "learning_rate": 1.6138600481561827e-05, - "loss": 1.8915, + "epoch": 0.6282123693871787, + "grad_norm": 3.9062011448442835, + "learning_rate": 1.8359312100941006e-05, + "loss": 1.0739, "step": 4449 }, { - "epoch": 0.9331096665967709, - "grad_norm": 6.228854432734358, - "learning_rate": 1.6136813180100075e-05, - "loss": 1.7031, + "epoch": 0.6283535724371646, + "grad_norm": 3.786089113585838, + "learning_rate": 1.835847533184611e-05, + "loss": 1.154, "step": 4450 }, { - "epoch": 0.9333193541622982, - "grad_norm": 7.497199914688314, - "learning_rate": 1.6135025564116007e-05, - "loss": 1.6823, + "epoch": 0.6284947754871505, + "grad_norm": 3.815584600994966, + "learning_rate": 1.835763836850386e-05, + "loss": 0.8192, "step": 4451 }, { - "epoch": 0.9335290417278256, - "grad_norm": 6.314755304800982, - "learning_rate": 1.6133237633701236e-05, - "loss": 2.1506, + "epoch": 0.6286359785371364, + "grad_norm": 3.729234535841036, + "learning_rate": 1.83568012109337e-05, + "loss": 1.2437, "step": 4452 }, { - "epoch": 0.9337387292933529, - "grad_norm": 5.962640269927995, - "learning_rate": 1.6131449388947402e-05, - "loss": 1.5752, + "epoch": 0.6287771815871223, + "grad_norm": 4.992764756871374, + "learning_rate": 1.8355963859155094e-05, + "loss": 1.3867, "step": 4453 }, { - "epoch": 0.9339484168588803, - "grad_norm": 5.797161920662345, - "learning_rate": 1.612966082994615e-05, - "loss": 1.909, + "epoch": 0.6289183846371081, + "grad_norm": 3.9709959593868565, + "learning_rate": 1.8355126313187496e-05, + "loss": 1.0025, "step": 4454 }, { - "epoch": 0.9341581044244076, - "grad_norm": 6.146511821637059, - "learning_rate": 1.6127871956789148e-05, - "loss": 1.7289, + "epoch": 0.629059587687094, + "grad_norm": 3.704040045931481, + "learning_rate": 1.8354288573050364e-05, + "loss": 0.9998, "step": 4455 }, { - "epoch": 0.934367791989935, - "grad_norm": 6.877633850633511, - "learning_rate": 1.6126082769568086e-05, - "loss": 1.7565, + "epoch": 0.6292007907370799, + "grad_norm": 3.2631988806846355, + "learning_rate": 1.8353450638763178e-05, + "loss": 1.0733, "step": 4456 }, { - "epoch": 0.9345774795554623, - "grad_norm": 6.9776631666589495, - "learning_rate": 1.6124293268374652e-05, - "loss": 1.789, + "epoch": 0.6293419937870658, + "grad_norm": 3.5694840123375706, + "learning_rate": 1.8352612510345408e-05, + "loss": 0.94, "step": 4457 }, { - "epoch": 0.9347871671209897, - "grad_norm": 6.131643937481767, - "learning_rate": 1.6122503453300566e-05, - "loss": 1.4719, + "epoch": 0.6294831968370517, + "grad_norm": 3.9160449820073158, + "learning_rate": 1.835177418781653e-05, + "loss": 1.2328, "step": 4458 }, { - "epoch": 0.934996854686517, - "grad_norm": 7.592613666484798, - "learning_rate": 1.612071332443756e-05, - "loss": 1.5795, + "epoch": 0.6296243998870376, + "grad_norm": 3.1263720788105855, + "learning_rate": 1.8350935671196026e-05, + "loss": 0.8892, "step": 4459 }, { - "epoch": 0.9352065422520445, - "grad_norm": 5.855304175246936, - "learning_rate": 1.6118922881877385e-05, - "loss": 1.5437, + "epoch": 0.6297656029370234, + "grad_norm": 3.475668549940923, + "learning_rate": 1.8350096960503383e-05, + "loss": 1.0001, "step": 4460 }, { - "epoch": 0.9354162298175718, - "grad_norm": 6.998569760176806, - "learning_rate": 1.611713212571179e-05, - "loss": 1.3634, + "epoch": 0.6299068059870093, + "grad_norm": 4.243678612462539, + "learning_rate": 1.834925805575809e-05, + "loss": 1.4809, "step": 4461 }, { - "epoch": 0.9356259173830992, - "grad_norm": 5.763690518963403, - "learning_rate": 1.6115341056032568e-05, - "loss": 1.7428, + "epoch": 0.6300480090369952, + "grad_norm": 3.7825593357338567, + "learning_rate": 1.834841895697965e-05, + "loss": 0.9324, "step": 4462 }, { - "epoch": 0.9358356049486265, - "grad_norm": 6.618312838524231, - "learning_rate": 1.6113549672931513e-05, - "loss": 1.92, + "epoch": 0.6301892120869811, + "grad_norm": 3.8191435834360137, + "learning_rate": 1.834757966418756e-05, + "loss": 1.0293, "step": 4463 }, { - "epoch": 0.9360452925141539, - "grad_norm": 8.841933727785282, - "learning_rate": 1.6111757976500432e-05, - "loss": 1.6921, + "epoch": 0.630330415136967, + "grad_norm": 4.590071906850395, + "learning_rate": 1.8346740177401318e-05, + "loss": 1.1511, "step": 4464 }, { - "epoch": 0.9362549800796812, - "grad_norm": 6.20136682744596, - "learning_rate": 1.6109965966831156e-05, - "loss": 1.3835, + "epoch": 0.6304716181869529, + "grad_norm": 3.542681945452479, + "learning_rate": 1.834590049664044e-05, + "loss": 0.9322, "step": 4465 }, { - "epoch": 0.9364646676452086, - "grad_norm": 6.6192848257958525, - "learning_rate": 1.6108173644015527e-05, - "loss": 1.638, + "epoch": 0.6306128212369387, + "grad_norm": 3.395470354662677, + "learning_rate": 1.8345060621924443e-05, + "loss": 0.742, "step": 4466 }, { - "epoch": 0.936674355210736, - "grad_norm": 8.95348991362724, - "learning_rate": 1.61063810081454e-05, - "loss": 1.8111, + "epoch": 0.6307540242869246, + "grad_norm": 3.6866421696617837, + "learning_rate": 1.834422055327284e-05, + "loss": 1.0092, "step": 4467 }, { - "epoch": 0.9368840427762634, - "grad_norm": 6.547302454340777, - "learning_rate": 1.6104588059312658e-05, - "loss": 1.6214, + "epoch": 0.6308952273369105, + "grad_norm": 3.143910194067715, + "learning_rate": 1.8343380290705153e-05, + "loss": 0.8565, "step": 4468 }, { - "epoch": 0.9370937303417908, - "grad_norm": 7.014334918725269, - "learning_rate": 1.6102794797609193e-05, - "loss": 1.7615, + "epoch": 0.6310364303868964, + "grad_norm": 2.874535376548412, + "learning_rate": 1.8342539834240913e-05, + "loss": 0.7365, "step": 4469 }, { - "epoch": 0.9373034179073181, - "grad_norm": 5.708616849453963, - "learning_rate": 1.6101001223126903e-05, - "loss": 1.3088, + "epoch": 0.6311776334368823, + "grad_norm": 3.6555748165155006, + "learning_rate": 1.834169918389965e-05, + "loss": 1.0443, "step": 4470 }, { - "epoch": 0.9375131054728455, - "grad_norm": 7.246624871091289, - "learning_rate": 1.6099207335957725e-05, - "loss": 1.6269, + "epoch": 0.6313188364868682, + "grad_norm": 3.4407911794432766, + "learning_rate": 1.8340858339700898e-05, + "loss": 0.9479, "step": 4471 }, { - "epoch": 0.9377227930383728, - "grad_norm": 7.429109919948727, - "learning_rate": 1.6097413136193592e-05, - "loss": 1.7847, + "epoch": 0.631460039536854, + "grad_norm": 3.2905696274577485, + "learning_rate": 1.83400173016642e-05, + "loss": 1.0659, "step": 4472 }, { - "epoch": 0.9379324806039002, - "grad_norm": 6.939310604116794, - "learning_rate": 1.6095618623926456e-05, - "loss": 1.7187, + "epoch": 0.6316012425868399, + "grad_norm": 2.9714262691637883, + "learning_rate": 1.8339176069809105e-05, + "loss": 0.8688, "step": 4473 }, { - "epoch": 0.9381421681694275, - "grad_norm": 7.465801384192674, - "learning_rate": 1.6093823799248296e-05, - "loss": 1.9004, + "epoch": 0.6317424456368258, + "grad_norm": 3.6993155010881624, + "learning_rate": 1.833833464415516e-05, + "loss": 1.0227, "step": 4474 }, { - "epoch": 0.9383518557349549, - "grad_norm": 6.514105338582315, - "learning_rate": 1.6092028662251098e-05, - "loss": 1.5694, + "epoch": 0.6318836486868117, + "grad_norm": 3.301771523100553, + "learning_rate": 1.8337493024721916e-05, + "loss": 0.9338, "step": 4475 }, { - "epoch": 0.9385615433004822, - "grad_norm": 5.677454220762757, - "learning_rate": 1.6090233213026866e-05, - "loss": 1.2352, + "epoch": 0.6320248517367976, + "grad_norm": 4.342635213912344, + "learning_rate": 1.8336651211528934e-05, + "loss": 1.0832, "step": 4476 }, { - "epoch": 0.9387712308660097, - "grad_norm": 7.951403162372309, - "learning_rate": 1.6088437451667615e-05, - "loss": 1.7742, + "epoch": 0.6321660547867834, + "grad_norm": 3.217873586231962, + "learning_rate": 1.8335809204595783e-05, + "loss": 0.9106, "step": 4477 }, { - "epoch": 0.938980918431537, - "grad_norm": 6.6573018463867255, - "learning_rate": 1.608664137826539e-05, - "loss": 1.4948, + "epoch": 0.6323072578367692, + "grad_norm": 4.306907120449864, + "learning_rate": 1.833496700394202e-05, + "loss": 1.0176, "step": 4478 }, { - "epoch": 0.9391906059970644, - "grad_norm": 6.213036392454229, - "learning_rate": 1.608484499291223e-05, - "loss": 1.6946, + "epoch": 0.6324484608867551, + "grad_norm": 3.2652759061837955, + "learning_rate": 1.833412460958723e-05, + "loss": 0.9687, "step": 4479 }, { - "epoch": 0.9394002935625917, - "grad_norm": 6.038991206255365, - "learning_rate": 1.6083048295700218e-05, - "loss": 1.6243, + "epoch": 0.632589663936741, + "grad_norm": 4.007952258897854, + "learning_rate": 1.833328202155098e-05, + "loss": 0.9891, "step": 4480 }, { - "epoch": 0.9396099811281191, - "grad_norm": 6.066801593153094, - "learning_rate": 1.6081251286721426e-05, - "loss": 1.4496, + "epoch": 0.6327308669867269, + "grad_norm": 4.5062408290577896, + "learning_rate": 1.8332439239852853e-05, + "loss": 1.191, "step": 4481 }, { - "epoch": 0.9398196686936464, - "grad_norm": 7.372406252383504, - "learning_rate": 1.6079453966067957e-05, - "loss": 1.6421, + "epoch": 0.6328720700367128, + "grad_norm": 4.087579168531247, + "learning_rate": 1.833159626451244e-05, + "loss": 1.1485, "step": 4482 }, { - "epoch": 0.9400293562591738, - "grad_norm": 7.151892012867839, - "learning_rate": 1.6077656333831934e-05, - "loss": 1.6256, + "epoch": 0.6330132730866986, + "grad_norm": 3.2725089384653963, + "learning_rate": 1.8330753095549327e-05, + "loss": 0.9366, "step": 4483 }, { - "epoch": 0.9402390438247012, - "grad_norm": 5.8873624800887265, - "learning_rate": 1.6075858390105477e-05, - "loss": 1.768, + "epoch": 0.6331544761366845, + "grad_norm": 3.5713121535573147, + "learning_rate": 1.8329909732983105e-05, + "loss": 0.8435, "step": 4484 }, { - "epoch": 0.9404487313902286, - "grad_norm": 6.76356366776248, - "learning_rate": 1.607406013498074e-05, - "loss": 1.9942, + "epoch": 0.6332956791866704, + "grad_norm": 3.475280902638243, + "learning_rate": 1.8329066176833382e-05, + "loss": 1.0355, "step": 4485 }, { - "epoch": 0.940658418955756, - "grad_norm": 6.402948221953007, - "learning_rate": 1.607226156854989e-05, - "loss": 1.5599, + "epoch": 0.6334368822366563, + "grad_norm": 3.3673255309496564, + "learning_rate": 1.832822242711976e-05, + "loss": 0.8821, "step": 4486 }, { - "epoch": 0.9408681065212833, - "grad_norm": 6.594757726520265, - "learning_rate": 1.6070462690905098e-05, - "loss": 1.4347, + "epoch": 0.6335780852866422, + "grad_norm": 2.879470937192115, + "learning_rate": 1.8327378483861845e-05, + "loss": 0.7917, "step": 4487 }, { - "epoch": 0.9410777940868107, - "grad_norm": 5.879376147903356, - "learning_rate": 1.606866350213857e-05, - "loss": 1.5229, + "epoch": 0.633719288336628, + "grad_norm": 3.5712047907540923, + "learning_rate": 1.832653434707925e-05, + "loss": 1.0643, "step": 4488 }, { - "epoch": 0.941287481652338, - "grad_norm": 6.642760870292059, - "learning_rate": 1.6066864002342505e-05, - "loss": 1.7565, + "epoch": 0.6338604913866139, + "grad_norm": 4.021387237877244, + "learning_rate": 1.832569001679159e-05, + "loss": 1.1849, "step": 4489 }, { - "epoch": 0.9414971692178654, - "grad_norm": 6.187776560876245, - "learning_rate": 1.606506419160914e-05, - "loss": 1.7074, + "epoch": 0.6340016944365998, + "grad_norm": 4.1168449114814205, + "learning_rate": 1.832484549301849e-05, + "loss": 1.0742, "step": 4490 }, { - "epoch": 0.9417068567833927, - "grad_norm": 6.410270377822095, - "learning_rate": 1.606326407003072e-05, - "loss": 1.7316, + "epoch": 0.6341428974865857, + "grad_norm": 4.254266241993345, + "learning_rate": 1.8324000775779576e-05, + "loss": 1.4046, "step": 4491 }, { - "epoch": 0.9419165443489201, - "grad_norm": 7.3406793803613954, - "learning_rate": 1.6061463637699495e-05, - "loss": 1.6427, + "epoch": 0.6342841005365716, + "grad_norm": 3.5818406374412124, + "learning_rate": 1.8323155865094483e-05, + "loss": 0.9332, "step": 4492 }, { - "epoch": 0.9421262319144474, - "grad_norm": 5.63502249057864, - "learning_rate": 1.605966289470775e-05, - "loss": 1.5482, + "epoch": 0.6344253035865575, + "grad_norm": 3.390283448361259, + "learning_rate": 1.832231076098284e-05, + "loss": 0.8011, "step": 4493 }, { - "epoch": 0.9423359194799749, - "grad_norm": 5.895473895118337, - "learning_rate": 1.605786184114777e-05, - "loss": 1.4428, + "epoch": 0.6345665066365433, + "grad_norm": 3.539876510569891, + "learning_rate": 1.8321465463464287e-05, + "loss": 0.9926, "step": 4494 }, { - "epoch": 0.9425456070455022, - "grad_norm": 6.343393969410663, - "learning_rate": 1.6056060477111864e-05, - "loss": 2.04, + "epoch": 0.6347077096865292, + "grad_norm": 3.9181161661553783, + "learning_rate": 1.8320619972558474e-05, + "loss": 1.0555, "step": 4495 }, { - "epoch": 0.9427552946110296, - "grad_norm": 6.703423611785393, - "learning_rate": 1.6054258802692353e-05, - "loss": 1.6578, + "epoch": 0.6348489127365151, + "grad_norm": 3.81105112581345, + "learning_rate": 1.831977428828504e-05, + "loss": 1.0853, "step": 4496 }, { - "epoch": 0.9429649821765569, - "grad_norm": 6.479350483314323, - "learning_rate": 1.605245681798158e-05, - "loss": 1.7604, + "epoch": 0.634990115786501, + "grad_norm": 3.0529870825959327, + "learning_rate": 1.831892841066365e-05, + "loss": 1.0287, "step": 4497 }, { - "epoch": 0.9431746697420843, - "grad_norm": 6.485721806327503, - "learning_rate": 1.6050654523071894e-05, - "loss": 1.633, + "epoch": 0.6351313188364869, + "grad_norm": 4.162422965998469, + "learning_rate": 1.8318082339713955e-05, + "loss": 1.0866, "step": 4498 }, { - "epoch": 0.9433843573076116, - "grad_norm": 6.169951459210537, - "learning_rate": 1.604885191805567e-05, - "loss": 1.7649, + "epoch": 0.6352725218864728, + "grad_norm": 3.84856250614326, + "learning_rate": 1.831723607545562e-05, + "loss": 1.1286, "step": 4499 }, { - "epoch": 0.943594044873139, - "grad_norm": 6.464780003616582, - "learning_rate": 1.6047049003025298e-05, - "loss": 1.6471, + "epoch": 0.6354137249364586, + "grad_norm": 3.3861787769267546, + "learning_rate": 1.831638961790831e-05, + "loss": 0.9862, "step": 4500 }, { - "epoch": 0.9438037324386663, - "grad_norm": 6.193191443638445, - "learning_rate": 1.6045245778073176e-05, - "loss": 1.5919, + "epoch": 0.6355549279864445, + "grad_norm": 4.055853400177247, + "learning_rate": 1.8315542967091695e-05, + "loss": 1.0254, "step": 4501 }, { - "epoch": 0.9440134200041937, - "grad_norm": 6.986993507371011, - "learning_rate": 1.604344224329172e-05, - "loss": 1.8541, + "epoch": 0.6356961310364304, + "grad_norm": 3.904491008047052, + "learning_rate": 1.8314696123025456e-05, + "loss": 1.0947, "step": 4502 }, { - "epoch": 0.9442231075697212, - "grad_norm": 6.040887580301437, - "learning_rate": 1.604163839877337e-05, - "loss": 1.8934, + "epoch": 0.6358373340864163, + "grad_norm": 3.6570228640694866, + "learning_rate": 1.8313849085729267e-05, + "loss": 0.9937, "step": 4503 }, { - "epoch": 0.9444327951352485, - "grad_norm": 6.2020766958797395, - "learning_rate": 1.603983424461057e-05, - "loss": 1.5522, + "epoch": 0.6359785371364022, + "grad_norm": 4.152901635512655, + "learning_rate": 1.8313001855222812e-05, + "loss": 1.1166, "step": 4504 }, { - "epoch": 0.9446424827007759, - "grad_norm": 6.1808520910949065, - "learning_rate": 1.6038029780895792e-05, - "loss": 1.6089, + "epoch": 0.6361197401863881, + "grad_norm": 3.315713716575013, + "learning_rate": 1.831215443152579e-05, + "loss": 0.9015, "step": 4505 }, { - "epoch": 0.9448521702663032, - "grad_norm": 7.3752134766282635, - "learning_rate": 1.6036225007721514e-05, - "loss": 2.0526, + "epoch": 0.6362609432363739, + "grad_norm": 3.410684538342167, + "learning_rate": 1.8311306814657886e-05, + "loss": 1.0261, "step": 4506 }, { - "epoch": 0.9450618578318306, - "grad_norm": 6.958233505604151, - "learning_rate": 1.6034419925180236e-05, - "loss": 1.571, + "epoch": 0.6364021462863598, + "grad_norm": 3.840334847038781, + "learning_rate": 1.8310459004638805e-05, + "loss": 0.9529, "step": 4507 }, { - "epoch": 0.9452715453973579, - "grad_norm": 6.980913167830229, - "learning_rate": 1.603261453336447e-05, - "loss": 1.6079, + "epoch": 0.6365433493363457, + "grad_norm": 3.65332010702626, + "learning_rate": 1.8309611001488242e-05, + "loss": 1.0287, "step": 4508 }, { - "epoch": 0.9454812329628853, - "grad_norm": 6.076386193352435, - "learning_rate": 1.6030808832366746e-05, - "loss": 1.7133, + "epoch": 0.6366845523863316, + "grad_norm": 3.933885186415056, + "learning_rate": 1.830876280522591e-05, + "loss": 1.2359, "step": 4509 }, { - "epoch": 0.9456909205284126, - "grad_norm": 5.34987463263882, - "learning_rate": 1.6029002822279613e-05, - "loss": 1.6037, + "epoch": 0.6368257554363175, + "grad_norm": 3.5427175444104626, + "learning_rate": 1.8307914415871516e-05, + "loss": 0.9466, "step": 4510 }, { - "epoch": 0.94590060809394, - "grad_norm": 8.577410465804133, - "learning_rate": 1.6027196503195623e-05, - "loss": 2.3296, + "epoch": 0.6369669584863034, + "grad_norm": 3.1932845287353095, + "learning_rate": 1.8307065833444784e-05, + "loss": 0.8355, "step": 4511 }, { - "epoch": 0.9461102956594674, - "grad_norm": 6.872323653709511, - "learning_rate": 1.602538987520736e-05, - "loss": 1.7701, + "epoch": 0.6371081615362891, + "grad_norm": 3.1010186561725277, + "learning_rate": 1.8306217057965427e-05, + "loss": 0.8894, "step": 4512 }, { - "epoch": 0.9463199832249948, - "grad_norm": 6.325983267266496, - "learning_rate": 1.6023582938407417e-05, - "loss": 1.6765, + "epoch": 0.637249364586275, + "grad_norm": 4.006587176440935, + "learning_rate": 1.830536808945317e-05, + "loss": 0.9065, "step": 4513 }, { - "epoch": 0.9465296707905221, - "grad_norm": 6.1655850707221935, - "learning_rate": 1.60217756928884e-05, - "loss": 1.9077, + "epoch": 0.6373905676362609, + "grad_norm": 3.5575402574691735, + "learning_rate": 1.8304518927927745e-05, + "loss": 1.1287, "step": 4514 }, { - "epoch": 0.9467393583560495, - "grad_norm": 6.096162362692154, - "learning_rate": 1.6019968138742938e-05, - "loss": 1.697, + "epoch": 0.6375317706862468, + "grad_norm": 3.19681203157768, + "learning_rate": 1.8303669573408892e-05, + "loss": 1.0517, "step": 4515 }, { - "epoch": 0.9469490459215768, - "grad_norm": 7.415107139185473, - "learning_rate": 1.6018160276063663e-05, - "loss": 2.1718, + "epoch": 0.6376729737362327, + "grad_norm": 4.1789453598548745, + "learning_rate": 1.830282002591634e-05, + "loss": 1.1777, "step": 4516 }, { - "epoch": 0.9471587334871042, - "grad_norm": 6.1149474435779485, - "learning_rate": 1.601635210494324e-05, - "loss": 1.384, + "epoch": 0.6378141767862185, + "grad_norm": 3.516854343342237, + "learning_rate": 1.8301970285469836e-05, + "loss": 1.0064, "step": 4517 }, { - "epoch": 0.9473684210526315, - "grad_norm": 6.396663552752187, - "learning_rate": 1.6014543625474334e-05, - "loss": 1.695, + "epoch": 0.6379553798362044, + "grad_norm": 4.217738613973506, + "learning_rate": 1.830112035208913e-05, + "loss": 1.3973, "step": 4518 }, { - "epoch": 0.9475781086181589, - "grad_norm": 5.784633772417223, - "learning_rate": 1.6012734837749632e-05, - "loss": 1.8603, + "epoch": 0.6380965828861903, + "grad_norm": 3.3091890250436995, + "learning_rate": 1.830027022579397e-05, + "loss": 0.9522, "step": 4519 }, { - "epoch": 0.9477877961836864, - "grad_norm": 6.499001302760238, - "learning_rate": 1.6010925741861848e-05, - "loss": 1.8, + "epoch": 0.6382377859361762, + "grad_norm": 3.8105978136497964, + "learning_rate": 1.8299419906604115e-05, + "loss": 1.0089, "step": 4520 }, { - "epoch": 0.9479974837492137, - "grad_norm": 5.5183814267454885, - "learning_rate": 1.6009116337903692e-05, - "loss": 1.8045, + "epoch": 0.6383789889861621, + "grad_norm": 4.150422685226725, + "learning_rate": 1.829856939453933e-05, + "loss": 1.4429, "step": 4521 }, { - "epoch": 0.9482071713147411, - "grad_norm": 5.994249173378188, - "learning_rate": 1.6007306625967897e-05, - "loss": 1.6435, + "epoch": 0.638520192036148, + "grad_norm": 3.519358245190021, + "learning_rate": 1.8297718689619374e-05, + "loss": 0.9234, "step": 4522 }, { - "epoch": 0.9484168588802684, - "grad_norm": 5.995555086356783, - "learning_rate": 1.6005496606147223e-05, - "loss": 1.8597, + "epoch": 0.6386613950861338, + "grad_norm": 3.419689646828112, + "learning_rate": 1.8296867791864015e-05, + "loss": 0.9182, "step": 4523 }, { - "epoch": 0.9486265464457958, - "grad_norm": 6.152916742314905, - "learning_rate": 1.600368627853443e-05, - "loss": 1.8165, + "epoch": 0.6388025981361197, + "grad_norm": 3.933473215591213, + "learning_rate": 1.8296016701293037e-05, + "loss": 1.0111, "step": 4524 }, { - "epoch": 0.9488362340113231, - "grad_norm": 5.958726669185426, - "learning_rate": 1.6001875643222304e-05, - "loss": 1.9461, + "epoch": 0.6389438011861056, + "grad_norm": 3.656649283461363, + "learning_rate": 1.8295165417926207e-05, + "loss": 0.9708, "step": 4525 }, { - "epoch": 0.9490459215768505, - "grad_norm": 6.791255862748508, - "learning_rate": 1.600006470030364e-05, - "loss": 1.8562, + "epoch": 0.6390850042360915, + "grad_norm": 3.2013994759170146, + "learning_rate": 1.829431394178332e-05, + "loss": 0.9283, "step": 4526 }, { - "epoch": 0.9492556091423778, - "grad_norm": 5.988010677214452, - "learning_rate": 1.5998253449871254e-05, - "loss": 1.7902, + "epoch": 0.6392262072860774, + "grad_norm": 3.5633902628225376, + "learning_rate": 1.829346227288416e-05, + "loss": 1.0057, "step": 4527 }, { - "epoch": 0.9494652967079052, - "grad_norm": 6.099693587861169, - "learning_rate": 1.5996441892017976e-05, - "loss": 1.595, + "epoch": 0.6393674103360633, + "grad_norm": 4.063219815764296, + "learning_rate": 1.8292610411248515e-05, + "loss": 1.0876, "step": 4528 }, { - "epoch": 0.9496749842734326, - "grad_norm": 5.631905644757514, - "learning_rate": 1.599463002683665e-05, - "loss": 1.8557, + "epoch": 0.6395086133860491, + "grad_norm": 3.285624569423119, + "learning_rate": 1.829175835689619e-05, + "loss": 0.9535, "step": 4529 }, { - "epoch": 0.94988467183896, - "grad_norm": 6.205132243304965, - "learning_rate": 1.599281785442014e-05, - "loss": 1.7436, + "epoch": 0.639649816436035, + "grad_norm": 3.8741562196396733, + "learning_rate": 1.8290906109846974e-05, + "loss": 1.0871, "step": 4530 }, { - "epoch": 0.9500943594044873, - "grad_norm": 6.708527451715467, - "learning_rate": 1.5991005374861316e-05, - "loss": 1.8037, + "epoch": 0.6397910194860209, + "grad_norm": 3.921147166257154, + "learning_rate": 1.8290053670120688e-05, + "loss": 1.1746, "step": 4531 }, { - "epoch": 0.9503040469700147, - "grad_norm": 6.623699121242019, - "learning_rate": 1.598919258825308e-05, - "loss": 2.0272, + "epoch": 0.6399322225360068, + "grad_norm": 3.3778631453096444, + "learning_rate": 1.828920103773713e-05, + "loss": 0.9642, "step": 4532 }, { - "epoch": 0.950513734535542, - "grad_norm": 5.851493749418373, - "learning_rate": 1.5987379494688337e-05, - "loss": 1.5981, + "epoch": 0.6400734255859927, + "grad_norm": 3.157425385591998, + "learning_rate": 1.8288348212716123e-05, + "loss": 0.8477, "step": 4533 }, { - "epoch": 0.9507234221010694, - "grad_norm": 6.873572559800961, - "learning_rate": 1.5985566094260007e-05, - "loss": 1.85, + "epoch": 0.6402146286359786, + "grad_norm": 3.251755664706043, + "learning_rate": 1.8287495195077482e-05, + "loss": 0.9638, "step": 4534 }, { - "epoch": 0.9509331096665967, - "grad_norm": 5.6998256818711965, - "learning_rate": 1.5983752387061032e-05, - "loss": 1.225, + "epoch": 0.6403558316859644, + "grad_norm": 3.4943158578558937, + "learning_rate": 1.8286641984841034e-05, + "loss": 1.0252, "step": 4535 }, { - "epoch": 0.9511427972321241, - "grad_norm": 6.969952717142911, - "learning_rate": 1.5981938373184377e-05, - "loss": 1.7099, + "epoch": 0.6404970347359503, + "grad_norm": 4.262836446468636, + "learning_rate": 1.8285788582026602e-05, + "loss": 1.1046, "step": 4536 }, { - "epoch": 0.9513524847976516, - "grad_norm": 5.852717917542091, - "learning_rate": 1.5980124052722995e-05, - "loss": 1.6431, + "epoch": 0.6406382377859362, + "grad_norm": 3.458998047442154, + "learning_rate": 1.828493498665402e-05, + "loss": 1.0034, "step": 4537 }, { - "epoch": 0.9515621723631789, - "grad_norm": 6.461349536437548, - "learning_rate": 1.597830942576989e-05, - "loss": 1.7041, + "epoch": 0.6407794408359221, + "grad_norm": 3.527031350757502, + "learning_rate": 1.828408119874313e-05, + "loss": 1.0655, "step": 4538 }, { - "epoch": 0.9517718599287063, - "grad_norm": 6.691571438771863, - "learning_rate": 1.597649449241806e-05, - "loss": 1.9284, + "epoch": 0.640920643885908, + "grad_norm": 3.866473359486658, + "learning_rate": 1.828322721831377e-05, + "loss": 1.1108, "step": 4539 }, { - "epoch": 0.9519815474942336, - "grad_norm": 6.308227383453972, - "learning_rate": 1.597467925276051e-05, - "loss": 1.7432, + "epoch": 0.6410618469358939, + "grad_norm": 4.54847754312386, + "learning_rate": 1.8282373045385786e-05, + "loss": 1.2367, "step": 4540 }, { - "epoch": 0.952191235059761, - "grad_norm": 5.88915827088278, - "learning_rate": 1.5972863706890295e-05, - "loss": 1.2667, + "epoch": 0.6412030499858797, + "grad_norm": 3.7804342098021553, + "learning_rate": 1.8281518679979033e-05, + "loss": 0.8964, "step": 4541 }, { - "epoch": 0.9524009226252883, - "grad_norm": 6.982953354675416, - "learning_rate": 1.5971047854900456e-05, - "loss": 1.6122, + "epoch": 0.6413442530358656, + "grad_norm": 4.13911516392743, + "learning_rate": 1.8280664122113356e-05, + "loss": 1.1259, "step": 4542 }, { - "epoch": 0.9526106101908157, - "grad_norm": 5.752859914088138, - "learning_rate": 1.5969231696884056e-05, - "loss": 1.4876, + "epoch": 0.6414854560858515, + "grad_norm": 3.3151382805248812, + "learning_rate": 1.8279809371808624e-05, + "loss": 0.9799, "step": 4543 }, { - "epoch": 0.952820297756343, - "grad_norm": 6.107082342256429, - "learning_rate": 1.596741523293418e-05, - "loss": 1.4963, + "epoch": 0.6416266591358374, + "grad_norm": 3.5757881553227744, + "learning_rate": 1.82789544290847e-05, + "loss": 1.0838, "step": 4544 }, { - "epoch": 0.9530299853218704, - "grad_norm": 7.116168528640246, - "learning_rate": 1.596559846314392e-05, - "loss": 1.9147, + "epoch": 0.6417678621858233, + "grad_norm": 3.3304208783556883, + "learning_rate": 1.8278099293961447e-05, + "loss": 0.9105, "step": 4545 }, { - "epoch": 0.9532396728873977, - "grad_norm": 7.594009396455394, - "learning_rate": 1.5963781387606392e-05, - "loss": 1.8329, + "epoch": 0.641909065235809, + "grad_norm": 4.039868994568356, + "learning_rate": 1.8277243966458744e-05, + "loss": 1.1773, "step": 4546 }, { - "epoch": 0.9534493604529252, - "grad_norm": 6.3269892407735036, - "learning_rate": 1.596196400641473e-05, - "loss": 1.8286, + "epoch": 0.6420502682857949, + "grad_norm": 4.101694453999152, + "learning_rate": 1.8276388446596465e-05, + "loss": 1.4507, "step": 4547 }, { - "epoch": 0.9536590480184525, - "grad_norm": 6.394814109983584, - "learning_rate": 1.5960146319662065e-05, - "loss": 1.679, + "epoch": 0.6421914713357808, + "grad_norm": 3.683479892194015, + "learning_rate": 1.827553273439449e-05, + "loss": 0.9877, "step": 4548 }, { - "epoch": 0.9538687355839799, - "grad_norm": 6.129156696230484, - "learning_rate": 1.5958328327441568e-05, - "loss": 1.818, + "epoch": 0.6423326743857667, + "grad_norm": 3.2534788849997804, + "learning_rate": 1.8274676829872714e-05, + "loss": 0.9815, "step": 4549 }, { - "epoch": 0.9540784231495072, - "grad_norm": 5.626376146621979, - "learning_rate": 1.5956510029846408e-05, - "loss": 1.7069, + "epoch": 0.6424738774357526, + "grad_norm": 3.2377073043618165, + "learning_rate": 1.8273820733051016e-05, + "loss": 0.9049, "step": 4550 }, { - "epoch": 0.9542881107150346, - "grad_norm": 6.8157582578920355, - "learning_rate": 1.595469142696978e-05, - "loss": 1.8133, + "epoch": 0.6426150804857385, + "grad_norm": 3.5475742046770202, + "learning_rate": 1.8272964443949305e-05, + "loss": 0.9928, "step": 4551 }, { - "epoch": 0.9544977982805619, - "grad_norm": 6.412778511828664, - "learning_rate": 1.5952872518904887e-05, - "loss": 1.7625, + "epoch": 0.6427562835357243, + "grad_norm": 3.719771490078364, + "learning_rate": 1.8272107962587465e-05, + "loss": 1.1384, "step": 4552 }, { - "epoch": 0.9547074858460893, - "grad_norm": 6.260219544721869, - "learning_rate": 1.5951053305744954e-05, - "loss": 1.7489, + "epoch": 0.6428974865857102, + "grad_norm": 3.5243567234010467, + "learning_rate": 1.8271251288985414e-05, + "loss": 1.0709, "step": 4553 }, { - "epoch": 0.9549171734116166, - "grad_norm": 6.6505690868592495, - "learning_rate": 1.5949233787583215e-05, - "loss": 2.095, + "epoch": 0.6430386896356961, + "grad_norm": 3.433692503565806, + "learning_rate": 1.827039442316305e-05, + "loss": 1.0366, "step": 4554 }, { - "epoch": 0.955126860977144, - "grad_norm": 5.273690642091402, - "learning_rate": 1.594741396451293e-05, - "loss": 1.3721, + "epoch": 0.643179892685682, + "grad_norm": 3.965191933649661, + "learning_rate": 1.8269537365140294e-05, + "loss": 0.9199, "step": 4555 }, { - "epoch": 0.9553365485426715, - "grad_norm": 5.504043299719686, - "learning_rate": 1.594559383662736e-05, - "loss": 1.2659, + "epoch": 0.6433210957356679, + "grad_norm": 3.6968802292751732, + "learning_rate": 1.8268680114937064e-05, + "loss": 0.9139, "step": 4556 }, { - "epoch": 0.9555462361081988, - "grad_norm": 6.947730056911571, - "learning_rate": 1.5943773404019792e-05, - "loss": 1.7903, + "epoch": 0.6434622987856538, + "grad_norm": 3.425189899571042, + "learning_rate": 1.8267822672573276e-05, + "loss": 0.9445, "step": 4557 }, { - "epoch": 0.9557559236737262, - "grad_norm": 6.203974909715322, - "learning_rate": 1.5941952666783533e-05, - "loss": 1.472, + "epoch": 0.6436035018356396, + "grad_norm": 3.326960335496322, + "learning_rate": 1.8266965038068856e-05, + "loss": 0.8838, "step": 4558 }, { - "epoch": 0.9559656112392535, - "grad_norm": 7.61949736418357, - "learning_rate": 1.5940131625011895e-05, - "loss": 1.9565, + "epoch": 0.6437447048856255, + "grad_norm": 3.3413559533621897, + "learning_rate": 1.8266107211443747e-05, + "loss": 0.9792, "step": 4559 }, { - "epoch": 0.9561752988047809, - "grad_norm": 5.814309019725462, - "learning_rate": 1.5938310278798205e-05, - "loss": 1.6565, + "epoch": 0.6438859079356114, + "grad_norm": 3.549229661574941, + "learning_rate": 1.8265249192717868e-05, + "loss": 1.0903, "step": 4560 }, { - "epoch": 0.9563849863703082, - "grad_norm": 5.757177845230832, - "learning_rate": 1.5936488628235815e-05, - "loss": 1.7122, + "epoch": 0.6440271109855973, + "grad_norm": 3.6421008045368284, + "learning_rate": 1.8264390981911174e-05, + "loss": 1.1925, "step": 4561 }, { - "epoch": 0.9565946739358356, - "grad_norm": 5.422158293647324, - "learning_rate": 1.5934666673418087e-05, - "loss": 1.521, + "epoch": 0.6441683140355832, + "grad_norm": 3.720524097986373, + "learning_rate": 1.8263532579043598e-05, + "loss": 1.0794, "step": 4562 }, { - "epoch": 0.9568043615013629, - "grad_norm": 6.968081015071573, - "learning_rate": 1.5932844414438396e-05, - "loss": 1.536, + "epoch": 0.644309517085569, + "grad_norm": 3.436122760357629, + "learning_rate": 1.8262673984135096e-05, + "loss": 1.1836, "step": 4563 }, { - "epoch": 0.9570140490668904, - "grad_norm": 7.639328884700881, - "learning_rate": 1.593102185139014e-05, - "loss": 1.829, + "epoch": 0.6444507201355549, + "grad_norm": 4.031870773754906, + "learning_rate": 1.8261815197205617e-05, + "loss": 1.0171, "step": 4564 }, { - "epoch": 0.9572237366324177, - "grad_norm": 7.6757736312789575, - "learning_rate": 1.592919898436673e-05, - "loss": 1.7969, + "epoch": 0.6445919231855408, + "grad_norm": 3.7045512501971944, + "learning_rate": 1.8260956218275125e-05, + "loss": 1.1864, "step": 4565 }, { - "epoch": 0.9574334241979451, - "grad_norm": 7.060474843598261, - "learning_rate": 1.5927375813461587e-05, - "loss": 1.832, + "epoch": 0.6447331262355267, + "grad_norm": 3.824516092540899, + "learning_rate": 1.8260097047363575e-05, + "loss": 1.1279, "step": 4566 }, { - "epoch": 0.9576431117634724, - "grad_norm": 7.537485009525664, - "learning_rate": 1.5925552338768155e-05, - "loss": 1.7992, + "epoch": 0.6448743292855126, + "grad_norm": 3.5204651403061935, + "learning_rate": 1.8259237684490937e-05, + "loss": 0.9931, "step": 4567 }, { - "epoch": 0.9578527993289998, - "grad_norm": 6.252338307285478, - "learning_rate": 1.5923728560379882e-05, - "loss": 1.4291, + "epoch": 0.6450155323354985, + "grad_norm": 3.7345374798212596, + "learning_rate": 1.8258378129677184e-05, + "loss": 1.1322, "step": 4568 }, { - "epoch": 0.9580624868945271, - "grad_norm": 6.704310944156857, - "learning_rate": 1.592190447839025e-05, - "loss": 1.788, + "epoch": 0.6451567353854843, + "grad_norm": 3.8539690488262375, + "learning_rate": 1.8257518382942286e-05, + "loss": 0.9222, "step": 4569 }, { - "epoch": 0.9582721744600545, - "grad_norm": 5.620503607854182, - "learning_rate": 1.5920080092892744e-05, - "loss": 1.127, + "epoch": 0.6452979384354702, + "grad_norm": 4.159862384564203, + "learning_rate": 1.8256658444306233e-05, + "loss": 1.0485, "step": 4570 }, { - "epoch": 0.9584818620255818, - "grad_norm": 6.257402605301782, - "learning_rate": 1.5918255403980865e-05, - "loss": 1.6515, + "epoch": 0.6454391414854561, + "grad_norm": 3.85053409061351, + "learning_rate": 1.8255798313789e-05, + "loss": 0.8996, "step": 4571 }, { - "epoch": 0.9586915495911092, - "grad_norm": 6.3976192662948055, - "learning_rate": 1.591643041174813e-05, - "loss": 1.7114, + "epoch": 0.645580344535442, + "grad_norm": 3.434964937458507, + "learning_rate": 1.825493799141058e-05, + "loss": 1.003, "step": 4572 }, { - "epoch": 0.9589012371566367, - "grad_norm": 6.032598021799135, - "learning_rate": 1.5914605116288075e-05, - "loss": 1.4725, + "epoch": 0.6457215475854279, + "grad_norm": 3.546202297219499, + "learning_rate": 1.8254077477190965e-05, + "loss": 1.1473, "step": 4573 }, { - "epoch": 0.959110924722164, - "grad_norm": 7.855252083920772, - "learning_rate": 1.591277951769425e-05, - "loss": 1.9874, + "epoch": 0.6458627506354138, + "grad_norm": 4.063426613953572, + "learning_rate": 1.8253216771150153e-05, + "loss": 1.3927, "step": 4574 }, { - "epoch": 0.9593206122876914, - "grad_norm": 5.998395434232328, - "learning_rate": 1.5910953616060217e-05, - "loss": 1.7291, + "epoch": 0.6460039536853996, + "grad_norm": 4.387307867119283, + "learning_rate": 1.825235587330815e-05, + "loss": 1.3608, "step": 4575 }, { - "epoch": 0.9595302998532187, - "grad_norm": 5.9393630955492105, - "learning_rate": 1.5909127411479563e-05, - "loss": 1.7275, + "epoch": 0.6461451567353855, + "grad_norm": 4.64016898026523, + "learning_rate": 1.825149478368496e-05, + "loss": 1.3142, "step": 4576 }, { - "epoch": 0.9597399874187461, - "grad_norm": 6.987690709005374, - "learning_rate": 1.5907300904045877e-05, - "loss": 1.4758, + "epoch": 0.6462863597853714, + "grad_norm": 3.884925241223521, + "learning_rate": 1.8250633502300595e-05, + "loss": 1.1283, "step": 4577 }, { - "epoch": 0.9599496749842734, - "grad_norm": 8.41346772379831, - "learning_rate": 1.5905474093852776e-05, - "loss": 1.7811, + "epoch": 0.6464275628353573, + "grad_norm": 3.5173560731835427, + "learning_rate": 1.8249772029175067e-05, + "loss": 1.2375, "step": 4578 }, { - "epoch": 0.9601593625498008, - "grad_norm": 7.363456930144115, - "learning_rate": 1.590364698099389e-05, - "loss": 1.5923, + "epoch": 0.6465687658853432, + "grad_norm": 3.4559690525684017, + "learning_rate": 1.82489103643284e-05, + "loss": 0.8954, "step": 4579 }, { - "epoch": 0.9603690501153281, - "grad_norm": 7.055045643140238, - "learning_rate": 1.5901819565562848e-05, - "loss": 2.1664, + "epoch": 0.646709968935329, + "grad_norm": 3.449057794178173, + "learning_rate": 1.8248048507780626e-05, + "loss": 1.1819, "step": 4580 }, { - "epoch": 0.9605787376808556, - "grad_norm": 6.356748596114354, - "learning_rate": 1.5899991847653323e-05, - "loss": 1.4083, + "epoch": 0.6468511719853148, + "grad_norm": 4.628988383032907, + "learning_rate": 1.824718645955176e-05, + "loss": 1.3103, "step": 4581 }, { - "epoch": 0.9607884252463829, - "grad_norm": 6.319358262812723, - "learning_rate": 1.5898163827358983e-05, - "loss": 1.589, + "epoch": 0.6469923750353007, + "grad_norm": 4.230663153231694, + "learning_rate": 1.8246324219661848e-05, + "loss": 1.1269, "step": 4582 }, { - "epoch": 0.9609981128119103, - "grad_norm": 6.275077172947977, - "learning_rate": 1.589633550477351e-05, - "loss": 1.629, + "epoch": 0.6471335780852866, + "grad_norm": 4.635457842218894, + "learning_rate": 1.8245461788130913e-05, + "loss": 1.1859, "step": 4583 }, { - "epoch": 0.9612078003774376, - "grad_norm": 5.551624146851175, - "learning_rate": 1.589450687999062e-05, - "loss": 1.512, + "epoch": 0.6472747811352725, + "grad_norm": 3.8603934556932202, + "learning_rate": 1.8244599164979015e-05, + "loss": 0.9436, "step": 4584 }, { - "epoch": 0.961417487942965, - "grad_norm": 5.818591173106327, - "learning_rate": 1.5892677953104032e-05, - "loss": 1.6917, + "epoch": 0.6474159841852584, + "grad_norm": 3.239888791174741, + "learning_rate": 1.824373635022619e-05, + "loss": 0.8821, "step": 4585 }, { - "epoch": 0.9616271755084923, - "grad_norm": 6.530800859708341, - "learning_rate": 1.5890848724207476e-05, - "loss": 1.3499, + "epoch": 0.6475571872352442, + "grad_norm": 3.796674885944984, + "learning_rate": 1.8242873343892494e-05, + "loss": 1.0962, "step": 4586 }, { - "epoch": 0.9618368630740197, - "grad_norm": 6.317686780656677, - "learning_rate": 1.5889019193394703e-05, - "loss": 1.5654, + "epoch": 0.6476983902852301, + "grad_norm": 3.3894691424612065, + "learning_rate": 1.824201014599798e-05, + "loss": 0.9973, "step": 4587 }, { - "epoch": 0.962046550639547, - "grad_norm": 7.2755995189196625, - "learning_rate": 1.5887189360759487e-05, - "loss": 1.7316, + "epoch": 0.647839593335216, + "grad_norm": 3.7404034486926916, + "learning_rate": 1.824114675656271e-05, + "loss": 1.2176, "step": 4588 }, { - "epoch": 0.9622562382050744, - "grad_norm": 6.768192470348803, - "learning_rate": 1.5885359226395603e-05, - "loss": 1.7223, + "epoch": 0.6479807963852019, + "grad_norm": 3.2981377484431467, + "learning_rate": 1.824028317560675e-05, + "loss": 0.9491, "step": 4589 }, { - "epoch": 0.9624659257706017, - "grad_norm": 7.503512442048614, - "learning_rate": 1.5883528790396853e-05, - "loss": 1.7287, + "epoch": 0.6481219994351878, + "grad_norm": 3.773070793854085, + "learning_rate": 1.823941940315017e-05, + "loss": 1.0863, "step": 4590 }, { - "epoch": 0.9626756133361292, - "grad_norm": 6.511368591079411, - "learning_rate": 1.5881698052857047e-05, - "loss": 1.3884, + "epoch": 0.6482632024851737, + "grad_norm": 4.686594941398059, + "learning_rate": 1.8238555439213033e-05, + "loss": 1.1405, "step": 4591 }, { - "epoch": 0.9628853009016566, - "grad_norm": 7.347753474197154, - "learning_rate": 1.5879867013870012e-05, - "loss": 1.7772, + "epoch": 0.6484044055351595, + "grad_norm": 4.468332682798484, + "learning_rate": 1.823769128381543e-05, + "loss": 1.1687, "step": 4592 }, { - "epoch": 0.9630949884671839, - "grad_norm": 6.191386099009408, - "learning_rate": 1.5878035673529597e-05, - "loss": 1.6401, + "epoch": 0.6485456085851454, + "grad_norm": 3.748583185874479, + "learning_rate": 1.823682693697744e-05, + "loss": 1.0472, "step": 4593 }, { - "epoch": 0.9633046760327113, - "grad_norm": 7.46404297369746, - "learning_rate": 1.587620403192966e-05, - "loss": 1.937, + "epoch": 0.6486868116351313, + "grad_norm": 4.288275618474232, + "learning_rate": 1.823596239871915e-05, + "loss": 1.0521, "step": 4594 }, { - "epoch": 0.9635143635982386, - "grad_norm": 5.94210095297413, - "learning_rate": 1.587437208916407e-05, - "loss": 1.3525, + "epoch": 0.6488280146851172, + "grad_norm": 3.990739923176175, + "learning_rate": 1.823509766906065e-05, + "loss": 1.111, "step": 4595 }, { - "epoch": 0.963724051163766, - "grad_norm": 6.098101893787626, - "learning_rate": 1.5872539845326726e-05, - "loss": 1.8992, + "epoch": 0.6489692177351031, + "grad_norm": 4.020429423251817, + "learning_rate": 1.8234232748022033e-05, + "loss": 1.2982, "step": 4596 }, { - "epoch": 0.9639337387292933, - "grad_norm": 5.886779164198539, - "learning_rate": 1.5870707300511528e-05, - "loss": 1.5636, + "epoch": 0.649110420785089, + "grad_norm": 4.486556060264408, + "learning_rate": 1.8233367635623407e-05, + "loss": 1.2132, "step": 4597 }, { - "epoch": 0.9641434262948207, - "grad_norm": 7.063862744816543, - "learning_rate": 1.5868874454812398e-05, - "loss": 1.6299, + "epoch": 0.6492516238350748, + "grad_norm": 3.708488563165353, + "learning_rate": 1.823250233188487e-05, + "loss": 0.9743, "step": 4598 }, { - "epoch": 0.964353113860348, - "grad_norm": 6.598440461335364, - "learning_rate": 1.5867041308323274e-05, - "loss": 1.718, + "epoch": 0.6493928268850607, + "grad_norm": 3.331277906535079, + "learning_rate": 1.8231636836826538e-05, + "loss": 1.0684, "step": 4599 }, { - "epoch": 0.9645628014258755, - "grad_norm": 7.584531756935614, - "learning_rate": 1.5865207861138107e-05, - "loss": 1.8372, + "epoch": 0.6495340299350466, + "grad_norm": 4.586559809840767, + "learning_rate": 1.8230771150468517e-05, + "loss": 1.186, "step": 4600 }, { - "epoch": 0.9647724889914028, - "grad_norm": 6.928126624095986, - "learning_rate": 1.5863374113350864e-05, - "loss": 1.4052, + "epoch": 0.6496752329850325, + "grad_norm": 3.6494978936888347, + "learning_rate": 1.8229905272830932e-05, + "loss": 0.9938, "step": 4601 }, { - "epoch": 0.9649821765569302, - "grad_norm": 6.863964059057822, - "learning_rate": 1.5861540065055532e-05, - "loss": 1.827, + "epoch": 0.6498164360350184, + "grad_norm": 4.5140921628629975, + "learning_rate": 1.8229039203933903e-05, + "loss": 1.0305, "step": 4602 }, { - "epoch": 0.9651918641224575, - "grad_norm": 7.002270321597739, - "learning_rate": 1.58597057163461e-05, - "loss": 1.6869, + "epoch": 0.6499576390850043, + "grad_norm": 3.7473310258422785, + "learning_rate": 1.8228172943797554e-05, + "loss": 1.2156, "step": 4603 }, { - "epoch": 0.9654015516879849, - "grad_norm": 6.43765929042971, - "learning_rate": 1.5857871067316592e-05, - "loss": 1.2841, + "epoch": 0.6500988421349901, + "grad_norm": 3.8694364782183106, + "learning_rate": 1.8227306492442022e-05, + "loss": 1.0766, "step": 4604 }, { - "epoch": 0.9656112392535122, - "grad_norm": 7.623892005949478, - "learning_rate": 1.5856036118061028e-05, - "loss": 1.8407, + "epoch": 0.650240045184976, + "grad_norm": 3.6788366402542367, + "learning_rate": 1.8226439849887437e-05, + "loss": 0.9183, "step": 4605 }, { - "epoch": 0.9658209268190396, - "grad_norm": 6.474918336242843, - "learning_rate": 1.5854200868673455e-05, - "loss": 1.8253, + "epoch": 0.6503812482349619, + "grad_norm": 3.50909888915248, + "learning_rate": 1.8225573016153945e-05, + "loss": 1.1211, "step": 4606 }, { - "epoch": 0.9660306143845669, - "grad_norm": 6.463211750060084, - "learning_rate": 1.5852365319247936e-05, - "loss": 1.8542, + "epoch": 0.6505224512849478, + "grad_norm": 3.618161915774754, + "learning_rate": 1.8224705991261688e-05, + "loss": 1.001, "step": 4607 }, { - "epoch": 0.9662403019500944, - "grad_norm": 6.25127139064424, - "learning_rate": 1.5850529469878544e-05, - "loss": 1.7682, + "epoch": 0.6506636543349337, + "grad_norm": 3.8930015488095058, + "learning_rate": 1.822383877523082e-05, + "loss": 1.1374, "step": 4608 }, { - "epoch": 0.9664499895156218, - "grad_norm": 8.370090315226767, - "learning_rate": 1.584869332065937e-05, - "loss": 1.5027, + "epoch": 0.6508048573849196, + "grad_norm": 3.6089539550890617, + "learning_rate": 1.8222971368081483e-05, + "loss": 1.0412, "step": 4609 }, { - "epoch": 0.9666596770811491, - "grad_norm": 6.883920768953059, - "learning_rate": 1.584685687168452e-05, - "loss": 1.8097, + "epoch": 0.6509460604349054, + "grad_norm": 3.3313654850020553, + "learning_rate": 1.8222103769833844e-05, + "loss": 0.9294, "step": 4610 }, { - "epoch": 0.9668693646466765, - "grad_norm": 7.066718086976704, - "learning_rate": 1.584502012304811e-05, - "loss": 1.2618, + "epoch": 0.6510872634848913, + "grad_norm": 3.778713039415247, + "learning_rate": 1.8221235980508067e-05, + "loss": 1.0493, "step": 4611 }, { - "epoch": 0.9670790522122038, - "grad_norm": 7.704975294289999, - "learning_rate": 1.5843183074844283e-05, - "loss": 1.624, + "epoch": 0.6512284665348772, + "grad_norm": 4.30633508263959, + "learning_rate": 1.8220368000124316e-05, + "loss": 1.1642, "step": 4612 }, { - "epoch": 0.9672887397777312, - "grad_norm": 6.712239176117779, - "learning_rate": 1.584134572716719e-05, - "loss": 1.8167, + "epoch": 0.6513696695848631, + "grad_norm": 3.429061173126783, + "learning_rate": 1.821949982870276e-05, + "loss": 0.9445, "step": 4613 }, { - "epoch": 0.9674984273432585, - "grad_norm": 7.734517705930198, - "learning_rate": 1.5839508080110995e-05, - "loss": 1.4983, + "epoch": 0.6515108726348489, + "grad_norm": 3.3524600570242082, + "learning_rate": 1.8218631466263584e-05, + "loss": 0.8737, "step": 4614 }, { - "epoch": 0.9677081149087859, - "grad_norm": 7.033353227657182, - "learning_rate": 1.5837670133769888e-05, - "loss": 1.8377, + "epoch": 0.6516520756848347, + "grad_norm": 3.3400958016346274, + "learning_rate": 1.8217762912826956e-05, + "loss": 1.0053, "step": 4615 }, { - "epoch": 0.9679178024743132, - "grad_norm": 7.833608984245218, - "learning_rate": 1.5835831888238057e-05, - "loss": 1.7102, + "epoch": 0.6517932787348206, + "grad_norm": 4.250109656551829, + "learning_rate": 1.821689416841307e-05, + "loss": 1.1588, "step": 4616 }, { - "epoch": 0.9681274900398407, - "grad_norm": 7.079612115190486, - "learning_rate": 1.5833993343609725e-05, - "loss": 1.8071, + "epoch": 0.6519344817848065, + "grad_norm": 3.5752164322423194, + "learning_rate": 1.821602523304211e-05, + "loss": 1.105, "step": 4617 }, { - "epoch": 0.968337177605368, - "grad_norm": 8.97197578130918, - "learning_rate": 1.583215449997911e-05, - "loss": 1.9485, + "epoch": 0.6520756848347924, + "grad_norm": 4.682130443328893, + "learning_rate": 1.8215156106734274e-05, + "loss": 1.1023, "step": 4618 }, { - "epoch": 0.9685468651708954, - "grad_norm": 6.643944754498032, - "learning_rate": 1.5830315357440464e-05, - "loss": 1.165, + "epoch": 0.6522168878847783, + "grad_norm": 3.5833662404807067, + "learning_rate": 1.8214286789509755e-05, + "loss": 0.9021, "step": 4619 }, { - "epoch": 0.9687565527364227, - "grad_norm": 7.5221030348114395, - "learning_rate": 1.5828475916088044e-05, - "loss": 1.7425, + "epoch": 0.6523580909347642, + "grad_norm": 3.9118971844220756, + "learning_rate": 1.821341728138876e-05, + "loss": 1.0303, "step": 4620 }, { - "epoch": 0.9689662403019501, - "grad_norm": 6.110224733321276, - "learning_rate": 1.5826636176016123e-05, - "loss": 1.5333, + "epoch": 0.65249929398475, + "grad_norm": 3.5423796957880898, + "learning_rate": 1.8212547582391497e-05, + "loss": 1.0113, "step": 4621 }, { - "epoch": 0.9691759278674774, - "grad_norm": 7.336909225708022, - "learning_rate": 1.5824796137318993e-05, - "loss": 1.8197, + "epoch": 0.6526404970347359, + "grad_norm": 3.5313099655981723, + "learning_rate": 1.821167769253817e-05, + "loss": 1.0141, "step": 4622 }, { - "epoch": 0.9693856154330048, - "grad_norm": 6.596170542005773, - "learning_rate": 1.582295580009096e-05, - "loss": 1.5548, + "epoch": 0.6527817000847218, + "grad_norm": 3.431586762782378, + "learning_rate": 1.8210807611849002e-05, + "loss": 1.0405, "step": 4623 }, { - "epoch": 0.9695953029985321, - "grad_norm": 6.548505595951554, - "learning_rate": 1.582111516442634e-05, - "loss": 1.5336, + "epoch": 0.6529229031347077, + "grad_norm": 3.93252851326111, + "learning_rate": 1.820993734034421e-05, + "loss": 1.1038, "step": 4624 }, { - "epoch": 0.9698049905640596, - "grad_norm": 5.972803844713978, - "learning_rate": 1.5819274230419472e-05, - "loss": 1.2644, + "epoch": 0.6530641061846936, + "grad_norm": 3.859233873508907, + "learning_rate": 1.8209066878044025e-05, + "loss": 1.2042, "step": 4625 }, { - "epoch": 0.9700146781295869, - "grad_norm": 5.857470228695838, - "learning_rate": 1.581743299816471e-05, - "loss": 1.2006, + "epoch": 0.6532053092346795, + "grad_norm": 3.8734210535665943, + "learning_rate": 1.8208196224968663e-05, + "loss": 1.2707, "step": 4626 }, { - "epoch": 0.9702243656951143, - "grad_norm": 6.334808805287599, - "learning_rate": 1.5815591467756412e-05, - "loss": 1.5085, + "epoch": 0.6533465122846653, + "grad_norm": 3.1039815283465058, + "learning_rate": 1.820732538113837e-05, + "loss": 0.8601, "step": 4627 }, { - "epoch": 0.9704340532606417, - "grad_norm": 6.813430107317001, - "learning_rate": 1.5813749639288967e-05, - "loss": 1.4082, + "epoch": 0.6534877153346512, + "grad_norm": 3.542434455116299, + "learning_rate": 1.820645434657338e-05, + "loss": 0.9444, "step": 4628 }, { - "epoch": 0.970643740826169, - "grad_norm": 7.846657462999528, - "learning_rate": 1.581190751285677e-05, - "loss": 1.9396, + "epoch": 0.6536289183846371, + "grad_norm": 3.0512050082119524, + "learning_rate": 1.8205583121293936e-05, + "loss": 0.8367, "step": 4629 }, { - "epoch": 0.9708534283916964, - "grad_norm": 6.147825416776932, - "learning_rate": 1.5810065088554232e-05, - "loss": 1.464, + "epoch": 0.653770121434623, + "grad_norm": 3.7902289317954536, + "learning_rate": 1.8204711705320282e-05, + "loss": 1.0869, "step": 4630 }, { - "epoch": 0.9710631159572237, - "grad_norm": 5.531721340604371, - "learning_rate": 1.5808222366475783e-05, - "loss": 1.5946, + "epoch": 0.6539113244846089, + "grad_norm": 3.6035631993689297, + "learning_rate": 1.820384009867267e-05, + "loss": 0.8744, "step": 4631 }, { - "epoch": 0.9712728035227511, - "grad_norm": 8.293966409640001, - "learning_rate": 1.580637934671586e-05, - "loss": 1.8766, + "epoch": 0.6540525275345948, + "grad_norm": 3.198058640850185, + "learning_rate": 1.820296830137136e-05, + "loss": 0.8624, "step": 4632 }, { - "epoch": 0.9714824910882784, - "grad_norm": 7.463534787353004, - "learning_rate": 1.580453602936893e-05, - "loss": 1.3587, + "epoch": 0.6541937305845806, + "grad_norm": 3.1074222694897107, + "learning_rate": 1.820209631343661e-05, + "loss": 0.7617, "step": 4633 }, { - "epoch": 0.9716921786538059, - "grad_norm": 7.020277913397013, - "learning_rate": 1.5802692414529456e-05, - "loss": 2.1281, + "epoch": 0.6543349336345665, + "grad_norm": 3.1027927781208935, + "learning_rate": 1.820122413488868e-05, + "loss": 0.9057, "step": 4634 }, { - "epoch": 0.9719018662193332, - "grad_norm": 5.7496402752023466, - "learning_rate": 1.5800848502291935e-05, - "loss": 1.281, + "epoch": 0.6544761366845524, + "grad_norm": 3.4057336540790657, + "learning_rate": 1.8200351765747846e-05, + "loss": 0.9429, "step": 4635 }, { - "epoch": 0.9721115537848606, - "grad_norm": 6.473100760349967, - "learning_rate": 1.5799004292750866e-05, - "loss": 1.6311, + "epoch": 0.6546173397345383, + "grad_norm": 3.1630887201071007, + "learning_rate": 1.8199479206034374e-05, + "loss": 0.8692, "step": 4636 }, { - "epoch": 0.9723212413503879, - "grad_norm": 8.12287168595937, - "learning_rate": 1.5797159786000767e-05, - "loss": 1.7443, + "epoch": 0.6547585427845242, + "grad_norm": 3.4564615527659965, + "learning_rate": 1.819860645576855e-05, + "loss": 0.9472, "step": 4637 }, { - "epoch": 0.9725309289159153, - "grad_norm": 7.530576394355859, - "learning_rate": 1.579531498213618e-05, - "loss": 1.851, + "epoch": 0.65489974583451, + "grad_norm": 3.374162776184678, + "learning_rate": 1.8197733514970655e-05, + "loss": 0.8467, "step": 4638 }, { - "epoch": 0.9727406164814426, - "grad_norm": 7.158474886576736, - "learning_rate": 1.579346988125164e-05, - "loss": 1.657, + "epoch": 0.6550409488844959, + "grad_norm": 3.9076348143316912, + "learning_rate": 1.819686038366097e-05, + "loss": 1.0205, "step": 4639 }, { - "epoch": 0.97295030404697, - "grad_norm": 6.788429752325326, - "learning_rate": 1.579162448344173e-05, - "loss": 1.5973, + "epoch": 0.6551821519344818, + "grad_norm": 3.372368585320994, + "learning_rate": 1.819598706185979e-05, + "loss": 0.8872, "step": 4640 }, { - "epoch": 0.9731599916124973, - "grad_norm": 8.254831359054881, - "learning_rate": 1.5789778788801013e-05, - "loss": 1.7952, + "epoch": 0.6553233549844677, + "grad_norm": 4.017091323041263, + "learning_rate": 1.819511354958741e-05, + "loss": 1.0744, "step": 4641 }, { - "epoch": 0.9733696791780247, - "grad_norm": 5.829114624100832, - "learning_rate": 1.5787932797424094e-05, - "loss": 1.5307, + "epoch": 0.6554645580344536, + "grad_norm": 4.437701355038319, + "learning_rate": 1.8194239846864133e-05, + "loss": 1.2738, "step": 4642 }, { - "epoch": 0.973579366743552, - "grad_norm": 8.938174126033726, - "learning_rate": 1.5786086509405584e-05, - "loss": 1.4481, + "epoch": 0.6556057610844395, + "grad_norm": 4.095355965809119, + "learning_rate": 1.8193365953710257e-05, + "loss": 1.0416, "step": 4643 }, { - "epoch": 0.9737890543090795, - "grad_norm": 6.163980375886287, - "learning_rate": 1.5784239924840097e-05, - "loss": 1.347, + "epoch": 0.6557469641344253, + "grad_norm": 3.3131417486233237, + "learning_rate": 1.81924918701461e-05, + "loss": 1.1107, "step": 4644 }, { - "epoch": 0.9739987418746069, - "grad_norm": 7.584073575320734, - "learning_rate": 1.5782393043822287e-05, - "loss": 1.5172, + "epoch": 0.6558881671844112, + "grad_norm": 4.420830292414085, + "learning_rate": 1.8191617596191963e-05, + "loss": 1.1637, "step": 4645 }, { - "epoch": 0.9742084294401342, - "grad_norm": 5.582625614393856, - "learning_rate": 1.5780545866446802e-05, - "loss": 1.6059, + "epoch": 0.6560293702343971, + "grad_norm": 3.910580543913456, + "learning_rate": 1.8190743131868176e-05, + "loss": 1.0787, "step": 4646 }, { - "epoch": 0.9744181170056616, - "grad_norm": 7.523203143265562, - "learning_rate": 1.5778698392808317e-05, - "loss": 1.8865, + "epoch": 0.656170573284383, + "grad_norm": 4.895015583266957, + "learning_rate": 1.818986847719505e-05, + "loss": 1.1761, "step": 4647 }, { - "epoch": 0.9746278045711889, - "grad_norm": 6.36718586983104, - "learning_rate": 1.577685062300152e-05, - "loss": 1.6981, + "epoch": 0.6563117763343688, + "grad_norm": 3.122385716415178, + "learning_rate": 1.8188993632192926e-05, + "loss": 0.8288, "step": 4648 }, { - "epoch": 0.9748374921367163, - "grad_norm": 5.694735695367508, - "learning_rate": 1.5775002557121104e-05, - "loss": 1.4363, + "epoch": 0.6564529793843547, + "grad_norm": 3.0130979949093777, + "learning_rate": 1.818811859688212e-05, + "loss": 0.8368, "step": 4649 }, { - "epoch": 0.9750471797022436, - "grad_norm": 6.021705808041867, - "learning_rate": 1.5773154195261793e-05, - "loss": 1.5108, + "epoch": 0.6565941824343405, + "grad_norm": 3.4756726689541737, + "learning_rate": 1.8187243371282976e-05, + "loss": 0.95, "step": 4650 }, { - "epoch": 0.975256867267771, - "grad_norm": 7.272458829094439, - "learning_rate": 1.5771305537518316e-05, - "loss": 1.7476, + "epoch": 0.6567353854843264, + "grad_norm": 3.090300048298037, + "learning_rate": 1.8186367955415833e-05, + "loss": 0.8527, "step": 4651 }, { - "epoch": 0.9754665548332984, - "grad_norm": 7.365379028599278, - "learning_rate": 1.576945658398542e-05, - "loss": 2.0312, + "epoch": 0.6568765885343123, + "grad_norm": 3.44735902907066, + "learning_rate": 1.8185492349301035e-05, + "loss": 0.9747, "step": 4652 }, { - "epoch": 0.9756762423988258, - "grad_norm": 5.925481968503258, - "learning_rate": 1.5767607334757867e-05, - "loss": 1.2852, + "epoch": 0.6570177915842982, + "grad_norm": 3.3898378037844004, + "learning_rate": 1.8184616552958926e-05, + "loss": 0.9305, "step": 4653 }, { - "epoch": 0.9758859299643531, - "grad_norm": 6.671681388741526, - "learning_rate": 1.5765757789930434e-05, - "loss": 1.7179, + "epoch": 0.6571589946342841, + "grad_norm": 3.298769520854058, + "learning_rate": 1.8183740566409867e-05, + "loss": 0.9132, "step": 4654 }, { - "epoch": 0.9760956175298805, - "grad_norm": 6.304346432997182, - "learning_rate": 1.576390794959792e-05, - "loss": 1.9099, + "epoch": 0.65730019768427, + "grad_norm": 3.8820296016745792, + "learning_rate": 1.8182864389674207e-05, + "loss": 1.1299, "step": 4655 }, { - "epoch": 0.9763053050954078, - "grad_norm": 6.321280161926331, - "learning_rate": 1.576205781385512e-05, - "loss": 1.6482, + "epoch": 0.6574414007342558, + "grad_norm": 3.588698581002136, + "learning_rate": 1.8181988022772315e-05, + "loss": 1.0762, "step": 4656 }, { - "epoch": 0.9765149926609352, - "grad_norm": 6.978277251728305, - "learning_rate": 1.5760207382796867e-05, - "loss": 1.3028, + "epoch": 0.6575826037842417, + "grad_norm": 3.045969057374051, + "learning_rate": 1.8181111465724554e-05, + "loss": 0.7468, "step": 4657 }, { - "epoch": 0.9767246802264625, - "grad_norm": 7.211551033650958, - "learning_rate": 1.5758356656517993e-05, - "loss": 1.456, + "epoch": 0.6577238068342276, + "grad_norm": 3.759168538342847, + "learning_rate": 1.81802347185513e-05, + "loss": 1.0298, "step": 4658 }, { - "epoch": 0.9769343677919899, - "grad_norm": 7.4160608843310705, - "learning_rate": 1.575650563511335e-05, - "loss": 1.7712, + "epoch": 0.6578650098842135, + "grad_norm": 3.3440842806922304, + "learning_rate": 1.817935778127292e-05, + "loss": 0.974, "step": 4659 }, { - "epoch": 0.9771440553575172, - "grad_norm": 6.518884928478189, - "learning_rate": 1.5754654318677814e-05, - "loss": 1.8428, + "epoch": 0.6580062129341994, + "grad_norm": 4.643981061042282, + "learning_rate": 1.8178480653909795e-05, + "loss": 1.3181, "step": 4660 }, { - "epoch": 0.9773537429230447, - "grad_norm": 7.707212910732268, - "learning_rate": 1.575280270730626e-05, - "loss": 1.7144, + "epoch": 0.6581474159841852, + "grad_norm": 3.5715994510089573, + "learning_rate": 1.8177603336482315e-05, + "loss": 1.0588, "step": 4661 }, { - "epoch": 0.9775634304885721, - "grad_norm": 6.501589529580172, - "learning_rate": 1.5750950801093592e-05, - "loss": 1.6494, + "epoch": 0.6582886190341711, + "grad_norm": 3.612075824056748, + "learning_rate": 1.8176725829010868e-05, + "loss": 1.1072, "step": 4662 }, { - "epoch": 0.9777731180540994, - "grad_norm": 7.6504116768994095, - "learning_rate": 1.574909860013472e-05, - "loss": 1.9984, + "epoch": 0.658429822084157, + "grad_norm": 3.64374437676882, + "learning_rate": 1.817584813151584e-05, + "loss": 1.0264, "step": 4663 }, { - "epoch": 0.9779828056196268, - "grad_norm": 6.1782548361359035, - "learning_rate": 1.5747246104524574e-05, - "loss": 1.957, + "epoch": 0.6585710251341429, + "grad_norm": 3.2892141835326596, + "learning_rate": 1.817497024401763e-05, + "loss": 0.9023, "step": 4664 }, { - "epoch": 0.9781924931851541, - "grad_norm": 6.233855322639774, - "learning_rate": 1.57453933143581e-05, - "loss": 1.3755, + "epoch": 0.6587122281841288, + "grad_norm": 3.2971175944963345, + "learning_rate": 1.8174092166536646e-05, + "loss": 0.9194, "step": 4665 }, { - "epoch": 0.9784021807506815, - "grad_norm": 7.4175380207094355, - "learning_rate": 1.5743540229730245e-05, - "loss": 2.101, + "epoch": 0.6588534312341147, + "grad_norm": 3.2932146963095668, + "learning_rate": 1.817321389909329e-05, + "loss": 0.9572, "step": 4666 }, { - "epoch": 0.9786118683162088, - "grad_norm": 7.017750218503154, - "learning_rate": 1.5741686850735996e-05, - "loss": 1.6182, + "epoch": 0.6589946342841005, + "grad_norm": 3.4346237562271855, + "learning_rate": 1.817233544170797e-05, + "loss": 1.0022, "step": 4667 }, { - "epoch": 0.9788215558817362, - "grad_norm": 6.109148405615685, - "learning_rate": 1.5739833177470338e-05, - "loss": 1.7079, + "epoch": 0.6591358373340864, + "grad_norm": 3.4097256785502537, + "learning_rate": 1.817145679440111e-05, + "loss": 1.1754, "step": 4668 }, { - "epoch": 0.9790312434472636, - "grad_norm": 6.773429746029386, - "learning_rate": 1.5737979210028277e-05, - "loss": 1.8969, + "epoch": 0.6592770403840723, + "grad_norm": 4.416772184062021, + "learning_rate": 1.8170577957193115e-05, + "loss": 1.199, "step": 4669 }, { - "epoch": 0.979240931012791, - "grad_norm": 6.06593542688359, - "learning_rate": 1.5736124948504827e-05, - "loss": 1.6872, + "epoch": 0.6594182434340582, + "grad_norm": 4.123528459243989, + "learning_rate": 1.816969893010442e-05, + "loss": 1.067, "step": 4670 }, { - "epoch": 0.9794506185783183, - "grad_norm": 5.456591584844584, - "learning_rate": 1.5734270392995024e-05, - "loss": 1.2133, + "epoch": 0.6595594464840441, + "grad_norm": 3.841921627395763, + "learning_rate": 1.8168819713155453e-05, + "loss": 0.9206, "step": 4671 }, { - "epoch": 0.9796603061438457, - "grad_norm": 6.542448073494205, - "learning_rate": 1.573241554359392e-05, - "loss": 1.748, + "epoch": 0.65970064953403, + "grad_norm": 3.7253958761317705, + "learning_rate": 1.8167940306366642e-05, + "loss": 1.0666, "step": 4672 }, { - "epoch": 0.979869993709373, - "grad_norm": 5.458869267173071, - "learning_rate": 1.5730560400396572e-05, - "loss": 1.628, + "epoch": 0.6598418525840158, + "grad_norm": 4.415109649298847, + "learning_rate": 1.8167060709758425e-05, + "loss": 1.3117, "step": 4673 }, { - "epoch": 0.9800796812749004, - "grad_norm": 5.624290066860508, - "learning_rate": 1.572870496349807e-05, - "loss": 1.6756, + "epoch": 0.6599830556340017, + "grad_norm": 4.4215949538484445, + "learning_rate": 1.8166180923351244e-05, + "loss": 1.102, "step": 4674 }, { - "epoch": 0.9802893688404277, - "grad_norm": 6.646962163372272, - "learning_rate": 1.5726849232993503e-05, - "loss": 2.0415, + "epoch": 0.6601242586839876, + "grad_norm": 3.3355497891831765, + "learning_rate": 1.8165300947165544e-05, + "loss": 0.8919, "step": 4675 }, { - "epoch": 0.9804990564059551, - "grad_norm": 6.278787587664155, - "learning_rate": 1.5724993208977973e-05, - "loss": 1.8056, + "epoch": 0.6602654617339735, + "grad_norm": 4.068710837204282, + "learning_rate": 1.8164420781221777e-05, + "loss": 1.249, "step": 4676 }, { - "epoch": 0.9807087439714824, - "grad_norm": 5.599934607581942, - "learning_rate": 1.5723136891546616e-05, - "loss": 1.311, + "epoch": 0.6604066647839594, + "grad_norm": 3.2622439655953523, + "learning_rate": 1.81635404255404e-05, + "loss": 0.9239, "step": 4677 }, { - "epoch": 0.9809184315370099, - "grad_norm": 6.713989285555248, - "learning_rate": 1.572128028079457e-05, - "loss": 1.9383, + "epoch": 0.6605478678339453, + "grad_norm": 3.8161659707197395, + "learning_rate": 1.8162659880141865e-05, + "loss": 1.2931, "step": 4678 }, { - "epoch": 0.9811281191025372, - "grad_norm": 6.585273285797768, - "learning_rate": 1.5719423376816983e-05, - "loss": 1.7311, + "epoch": 0.6606890708839311, + "grad_norm": 3.1943028110608314, + "learning_rate": 1.8161779145046642e-05, + "loss": 0.8951, "step": 4679 }, { - "epoch": 0.9813378066680646, - "grad_norm": 6.981889839176656, - "learning_rate": 1.5717566179709028e-05, - "loss": 1.3141, + "epoch": 0.660830273933917, + "grad_norm": 3.6976607956718346, + "learning_rate": 1.8160898220275196e-05, + "loss": 1.1755, "step": 4680 }, { - "epoch": 0.981547494233592, - "grad_norm": 7.482025653531907, - "learning_rate": 1.5715708689565895e-05, - "loss": 1.8212, + "epoch": 0.6609714769839029, + "grad_norm": 3.236632876944451, + "learning_rate": 1.8160017105848e-05, + "loss": 0.8717, "step": 4681 }, { - "epoch": 0.9817571817991193, - "grad_norm": 7.479972131518916, - "learning_rate": 1.571385090648277e-05, - "loss": 1.6145, + "epoch": 0.6611126800338887, + "grad_norm": 3.596044533880722, + "learning_rate": 1.815913580178553e-05, + "loss": 0.9898, "step": 4682 }, { - "epoch": 0.9819668693646467, - "grad_norm": 6.511970499706646, - "learning_rate": 1.571199283055488e-05, - "loss": 1.683, + "epoch": 0.6612538830838746, + "grad_norm": 3.385224742606756, + "learning_rate": 1.8158254308108264e-05, + "loss": 0.9642, "step": 4683 }, { - "epoch": 0.982176556930174, - "grad_norm": 8.46534380142125, - "learning_rate": 1.5710134461877454e-05, - "loss": 1.8409, + "epoch": 0.6613950861338604, + "grad_norm": 3.5255333387281045, + "learning_rate": 1.81573726248367e-05, + "loss": 1.0195, "step": 4684 }, { - "epoch": 0.9823862444957014, - "grad_norm": 5.746535991179911, - "learning_rate": 1.5708275800545724e-05, - "loss": 1.5957, + "epoch": 0.6615362891838463, + "grad_norm": 3.896605729789775, + "learning_rate": 1.815649075199131e-05, + "loss": 0.9732, "step": 4685 }, { - "epoch": 0.9825959320612287, - "grad_norm": 6.090738886487232, - "learning_rate": 1.5706416846654967e-05, - "loss": 1.7316, + "epoch": 0.6616774922338322, + "grad_norm": 3.675805447545512, + "learning_rate": 1.8155608689592604e-05, + "loss": 0.9367, "step": 4686 }, { - "epoch": 0.9828056196267562, - "grad_norm": 6.283248584151236, - "learning_rate": 1.5704557600300447e-05, - "loss": 1.4384, + "epoch": 0.6618186952838181, + "grad_norm": 4.920856744816194, + "learning_rate": 1.815472643766107e-05, + "loss": 1.0818, "step": 4687 }, { - "epoch": 0.9830153071922835, - "grad_norm": 5.536807851806472, - "learning_rate": 1.570269806157745e-05, - "loss": 1.4214, + "epoch": 0.661959898333804, + "grad_norm": 4.356999492060913, + "learning_rate": 1.815384399621722e-05, + "loss": 1.0801, "step": 4688 }, { - "epoch": 0.9832249947578109, - "grad_norm": 6.575722458961724, - "learning_rate": 1.5700838230581295e-05, - "loss": 1.4482, + "epoch": 0.6621011013837899, + "grad_norm": 3.396975648266339, + "learning_rate": 1.8152961365281553e-05, + "loss": 1.0822, "step": 4689 }, { - "epoch": 0.9834346823233382, - "grad_norm": 6.924901059445336, - "learning_rate": 1.569897810740729e-05, - "loss": 1.7714, + "epoch": 0.6622423044337757, + "grad_norm": 4.1227904246854, + "learning_rate": 1.8152078544874582e-05, + "loss": 1.1275, "step": 4690 }, { - "epoch": 0.9836443698888656, - "grad_norm": 6.706251417751598, - "learning_rate": 1.5697117692150773e-05, - "loss": 1.7371, + "epoch": 0.6623835074837616, + "grad_norm": 3.23330422351804, + "learning_rate": 1.815119553501683e-05, + "loss": 0.8303, "step": 4691 }, { - "epoch": 0.9838540574543929, - "grad_norm": 6.7450677192777215, - "learning_rate": 1.5695256984907094e-05, - "loss": 1.5486, + "epoch": 0.6625247105337475, + "grad_norm": 4.492672864303307, + "learning_rate": 1.8150312335728818e-05, + "loss": 1.1039, "step": 4692 }, { - "epoch": 0.9840637450199203, - "grad_norm": 6.895724100984174, - "learning_rate": 1.5693395985771615e-05, - "loss": 1.8585, + "epoch": 0.6626659135837334, + "grad_norm": 3.3019735844623197, + "learning_rate": 1.814942894703106e-05, + "loss": 1.0844, "step": 4693 }, { - "epoch": 0.9842734325854476, - "grad_norm": 7.150128314685859, - "learning_rate": 1.5691534694839714e-05, - "loss": 1.7073, + "epoch": 0.6628071166337193, + "grad_norm": 4.0414694710092665, + "learning_rate": 1.8148545368944096e-05, + "loss": 1.2137, "step": 4694 }, { - "epoch": 0.984483120150975, - "grad_norm": 5.175895080754177, - "learning_rate": 1.5689673112206793e-05, - "loss": 1.4491, + "epoch": 0.6629483196837052, + "grad_norm": 3.8366277613489346, + "learning_rate": 1.814766160148846e-05, + "loss": 0.8661, "step": 4695 }, { - "epoch": 0.9846928077165024, - "grad_norm": 6.223516134523837, - "learning_rate": 1.5687811237968254e-05, - "loss": 1.5606, + "epoch": 0.663089522733691, + "grad_norm": 3.859848802374575, + "learning_rate": 1.814677764468468e-05, + "loss": 1.2148, "step": 4696 }, { - "epoch": 0.9849024952820298, - "grad_norm": 6.561371979098763, - "learning_rate": 1.568594907221953e-05, - "loss": 1.6077, + "epoch": 0.6632307257836769, + "grad_norm": 4.284039735137657, + "learning_rate": 1.8145893498553313e-05, + "loss": 0.9882, "step": 4697 }, { - "epoch": 0.9851121828475572, - "grad_norm": 7.842256970049425, - "learning_rate": 1.5684086615056047e-05, - "loss": 1.3531, + "epoch": 0.6633719288336628, + "grad_norm": 3.9422573045588254, + "learning_rate": 1.8145009163114894e-05, + "loss": 1.11, "step": 4698 }, { - "epoch": 0.9853218704130845, - "grad_norm": 7.06878644138483, - "learning_rate": 1.568222386657327e-05, - "loss": 1.53, + "epoch": 0.6635131318836487, + "grad_norm": 3.769067108200693, + "learning_rate": 1.814412463838998e-05, + "loss": 0.8708, "step": 4699 }, { - "epoch": 0.9855315579786119, - "grad_norm": 7.100400552780994, - "learning_rate": 1.5680360826866662e-05, - "loss": 1.8881, + "epoch": 0.6636543349336346, + "grad_norm": 3.7481965732626343, + "learning_rate": 1.8143239924399127e-05, + "loss": 1.0826, "step": 4700 }, { - "epoch": 0.9857412455441392, - "grad_norm": 7.003120317100073, - "learning_rate": 1.5678497496031714e-05, - "loss": 1.8249, + "epoch": 0.6637955379836205, + "grad_norm": 3.763426290397732, + "learning_rate": 1.8142355021162896e-05, + "loss": 1.0971, "step": 4701 }, { - "epoch": 0.9859509331096666, - "grad_norm": 7.579406378747845, - "learning_rate": 1.5676633874163916e-05, - "loss": 1.731, + "epoch": 0.6639367410336063, + "grad_norm": 3.5463667396049354, + "learning_rate": 1.8141469928701852e-05, + "loss": 1.0123, "step": 4702 }, { - "epoch": 0.9861606206751939, - "grad_norm": 5.992297313752019, - "learning_rate": 1.567476996135879e-05, - "loss": 1.5333, + "epoch": 0.6640779440835922, + "grad_norm": 3.6445530707877674, + "learning_rate": 1.814058464703656e-05, + "loss": 1.1036, "step": 4703 }, { - "epoch": 0.9863703082407214, - "grad_norm": 7.128370011162646, - "learning_rate": 1.5672905757711857e-05, - "loss": 1.3557, + "epoch": 0.6642191471335781, + "grad_norm": 3.3180508079938855, + "learning_rate": 1.81396991761876e-05, + "loss": 0.8006, "step": 4704 }, { - "epoch": 0.9865799958062487, - "grad_norm": 8.58932128755457, - "learning_rate": 1.5671041263318665e-05, - "loss": 1.5978, + "epoch": 0.664360350183564, + "grad_norm": 3.4741113454864565, + "learning_rate": 1.813881351617554e-05, + "loss": 1.1895, "step": 4705 }, { - "epoch": 0.9867896833717761, - "grad_norm": 7.108877637254589, - "learning_rate": 1.5669176478274775e-05, - "loss": 1.688, + "epoch": 0.6645015532335499, + "grad_norm": 3.9110491193051224, + "learning_rate": 1.8137927667020975e-05, + "loss": 1.1032, "step": 4706 }, { - "epoch": 0.9869993709373034, - "grad_norm": 6.23929333269962, - "learning_rate": 1.5667311402675758e-05, - "loss": 1.6544, + "epoch": 0.6646427562835358, + "grad_norm": 3.2135479742135233, + "learning_rate": 1.8137041628744484e-05, + "loss": 0.7431, "step": 4707 }, { - "epoch": 0.9872090585028308, - "grad_norm": 6.506240507750133, - "learning_rate": 1.56654460366172e-05, - "loss": 1.7887, + "epoch": 0.6647839593335216, + "grad_norm": 3.180459684122676, + "learning_rate": 1.8136155401366658e-05, + "loss": 0.785, "step": 4708 }, { - "epoch": 0.9874187460683581, - "grad_norm": 5.937561033901428, - "learning_rate": 1.5663580380194708e-05, - "loss": 1.5289, + "epoch": 0.6649251623835075, + "grad_norm": 2.923757329219554, + "learning_rate": 1.8135268984908096e-05, + "loss": 0.8922, "step": 4709 }, { - "epoch": 0.9876284336338855, - "grad_norm": 6.28247731839628, - "learning_rate": 1.56617144335039e-05, - "loss": 1.2513, + "epoch": 0.6650663654334934, + "grad_norm": 3.920675770943473, + "learning_rate": 1.8134382379389396e-05, + "loss": 1.2585, "step": 4710 }, { - "epoch": 0.9878381211994128, - "grad_norm": 7.625473899361198, - "learning_rate": 1.5659848196640403e-05, - "loss": 1.6971, + "epoch": 0.6652075684834793, + "grad_norm": 3.3123937070912604, + "learning_rate": 1.8133495584831162e-05, + "loss": 0.8287, "step": 4711 }, { - "epoch": 0.9880478087649402, - "grad_norm": 6.204804902850047, - "learning_rate": 1.5657981669699874e-05, - "loss": 1.3682, + "epoch": 0.6653487715334652, + "grad_norm": 3.7887501415193308, + "learning_rate": 1.8132608601254003e-05, + "loss": 0.7796, "step": 4712 }, { - "epoch": 0.9882574963304676, - "grad_norm": 7.00849610229715, - "learning_rate": 1.5656114852777968e-05, - "loss": 1.8106, + "epoch": 0.665489974583451, + "grad_norm": 3.6513554320498702, + "learning_rate": 1.813172142867853e-05, + "loss": 0.8681, "step": 4713 }, { - "epoch": 0.988467183895995, - "grad_norm": 7.232532497026672, - "learning_rate": 1.565424774597037e-05, - "loss": 2.1506, + "epoch": 0.6656311776334369, + "grad_norm": 3.8061712385153887, + "learning_rate": 1.8130834067125368e-05, + "loss": 1.0904, "step": 4714 }, { - "epoch": 0.9886768714615223, - "grad_norm": 6.126261473716029, - "learning_rate": 1.565238034937276e-05, - "loss": 1.5816, + "epoch": 0.6657723806834228, + "grad_norm": 4.708096852592885, + "learning_rate": 1.8129946516615132e-05, + "loss": 1.3727, "step": 4715 }, { - "epoch": 0.9888865590270497, - "grad_norm": 7.29537048186078, - "learning_rate": 1.5650512663080862e-05, - "loss": 1.7248, + "epoch": 0.6659135837334086, + "grad_norm": 3.844778511070779, + "learning_rate": 1.8129058777168447e-05, + "loss": 1.1557, "step": 4716 }, { - "epoch": 0.9890962465925771, - "grad_norm": 6.54284687184934, - "learning_rate": 1.5648644687190388e-05, - "loss": 1.5076, + "epoch": 0.6660547867833945, + "grad_norm": 3.7033263031726245, + "learning_rate": 1.8128170848805948e-05, + "loss": 1.0681, "step": 4717 }, { - "epoch": 0.9893059341581044, - "grad_norm": 7.560713721326173, - "learning_rate": 1.5646776421797073e-05, - "loss": 1.8843, + "epoch": 0.6661959898333804, + "grad_norm": 3.642178480920516, + "learning_rate": 1.812728273154827e-05, + "loss": 0.9452, "step": 4718 }, { - "epoch": 0.9895156217236318, - "grad_norm": 6.141870452737586, - "learning_rate": 1.5644907866996676e-05, - "loss": 1.7725, + "epoch": 0.6663371928833662, + "grad_norm": 3.7812079580389284, + "learning_rate": 1.812639442541605e-05, + "loss": 0.8593, "step": 4719 }, { - "epoch": 0.9897253092891591, - "grad_norm": 7.456775166813868, - "learning_rate": 1.564303902288496e-05, - "loss": 1.7688, + "epoch": 0.6664783959333521, + "grad_norm": 3.3106297504726476, + "learning_rate": 1.8125505930429936e-05, + "loss": 0.8577, "step": 4720 }, { - "epoch": 0.9899349968546866, - "grad_norm": 5.829284499255656, - "learning_rate": 1.5641169889557705e-05, - "loss": 1.5737, + "epoch": 0.666619598983338, + "grad_norm": 3.9747291825481637, + "learning_rate": 1.812461724661057e-05, + "loss": 1.086, "step": 4721 }, { - "epoch": 0.9901446844202139, - "grad_norm": 6.595997551041795, - "learning_rate": 1.5639300467110706e-05, - "loss": 1.6998, + "epoch": 0.6667608020333239, + "grad_norm": 4.0402096432963335, + "learning_rate": 1.812372837397861e-05, + "loss": 1.1567, "step": 4722 }, { - "epoch": 0.9903543719857413, - "grad_norm": 6.998664899999526, - "learning_rate": 1.5637430755639782e-05, - "loss": 1.698, + "epoch": 0.6669020050833098, + "grad_norm": 3.856774707620448, + "learning_rate": 1.812283931255471e-05, + "loss": 1.1974, "step": 4723 }, { - "epoch": 0.9905640595512686, - "grad_norm": 6.110093213570619, - "learning_rate": 1.5635560755240753e-05, - "loss": 1.4847, + "epoch": 0.6670432081332957, + "grad_norm": 3.2283315080963897, + "learning_rate": 1.812195006235953e-05, + "loss": 0.7623, "step": 4724 }, { - "epoch": 0.990773747116796, - "grad_norm": 7.483475634816272, - "learning_rate": 1.5633690466009462e-05, - "loss": 1.8429, + "epoch": 0.6671844111832815, + "grad_norm": 3.5052753435558444, + "learning_rate": 1.812106062341374e-05, + "loss": 0.8269, "step": 4725 }, { - "epoch": 0.9909834346823233, - "grad_norm": 7.636147999959991, - "learning_rate": 1.5631819888041763e-05, - "loss": 1.482, + "epoch": 0.6673256142332674, + "grad_norm": 3.8187666575054298, + "learning_rate": 1.812017099573801e-05, + "loss": 0.9153, "step": 4726 }, { - "epoch": 0.9911931222478507, - "grad_norm": 7.459828034141028, - "learning_rate": 1.5629949021433527e-05, - "loss": 1.7581, + "epoch": 0.6674668172832533, + "grad_norm": 3.8672699831054844, + "learning_rate": 1.8119281179353012e-05, + "loss": 1.0505, "step": 4727 }, { - "epoch": 0.991402809813378, - "grad_norm": 7.035740233990961, - "learning_rate": 1.5628077866280637e-05, - "loss": 1.5578, + "epoch": 0.6676080203332392, + "grad_norm": 5.965859385275775, + "learning_rate": 1.8118391174279426e-05, + "loss": 1.3257, "step": 4728 }, { - "epoch": 0.9916124973789054, - "grad_norm": 6.84929234695881, - "learning_rate": 1.5626206422678995e-05, - "loss": 1.5546, + "epoch": 0.6677492233832251, + "grad_norm": 3.564251006849823, + "learning_rate": 1.811750098053793e-05, + "loss": 0.9496, "step": 4729 }, { - "epoch": 0.9918221849444327, - "grad_norm": 6.529377440298712, - "learning_rate": 1.5624334690724518e-05, - "loss": 1.5729, + "epoch": 0.667890426433211, + "grad_norm": 3.3011830896753755, + "learning_rate": 1.8116610598149225e-05, + "loss": 0.8024, "step": 4730 }, { - "epoch": 0.9920318725099602, - "grad_norm": 6.4583840153164225, - "learning_rate": 1.5622462670513132e-05, - "loss": 1.7093, + "epoch": 0.6680316294831968, + "grad_norm": 3.607815564240608, + "learning_rate": 1.811572002713399e-05, + "loss": 0.8944, "step": 4731 }, { - "epoch": 0.9922415600754875, - "grad_norm": 7.8356550577953685, - "learning_rate": 1.562059036214078e-05, - "loss": 1.73, + "epoch": 0.6681728325331827, + "grad_norm": 4.119697025101832, + "learning_rate": 1.811482926751293e-05, + "loss": 1.2629, "step": 4732 }, { - "epoch": 0.9924512476410149, - "grad_norm": 6.777204223262935, - "learning_rate": 1.561871776570343e-05, - "loss": 1.9414, + "epoch": 0.6683140355831686, + "grad_norm": 4.658072246054126, + "learning_rate": 1.8113938319306742e-05, + "loss": 0.9771, "step": 4733 }, { - "epoch": 0.9926609352065423, - "grad_norm": 6.165735943166361, - "learning_rate": 1.5616844881297046e-05, - "loss": 1.5659, + "epoch": 0.6684552386331545, + "grad_norm": 3.5058724391364984, + "learning_rate": 1.8113047182536128e-05, + "loss": 1.1764, "step": 4734 }, { - "epoch": 0.9928706227720696, - "grad_norm": 5.974788351642358, - "learning_rate": 1.5614971709017615e-05, - "loss": 1.77, + "epoch": 0.6685964416831404, + "grad_norm": 3.3339197772764546, + "learning_rate": 1.8112155857221804e-05, + "loss": 0.9223, "step": 4735 }, { - "epoch": 0.993080310337597, - "grad_norm": 7.163448647259221, - "learning_rate": 1.5613098248961153e-05, - "loss": 1.8803, + "epoch": 0.6687376447331262, + "grad_norm": 2.9983833803004, + "learning_rate": 1.811126434338448e-05, + "loss": 0.8341, "step": 4736 }, { - "epoch": 0.9932899979031243, - "grad_norm": 5.74652630865415, - "learning_rate": 1.5611224501223662e-05, - "loss": 1.624, + "epoch": 0.6688788477831121, + "grad_norm": 3.1666045022221163, + "learning_rate": 1.8110372641044877e-05, + "loss": 0.808, "step": 4737 }, { - "epoch": 0.9934996854686517, - "grad_norm": 6.302638991479064, - "learning_rate": 1.5609350465901192e-05, - "loss": 1.5766, + "epoch": 0.669020050833098, + "grad_norm": 2.747128193298705, + "learning_rate": 1.8109480750223714e-05, + "loss": 0.8157, "step": 4738 }, { - "epoch": 0.993709373034179, - "grad_norm": 6.0251103955830425, - "learning_rate": 1.5607476143089777e-05, - "loss": 1.7713, + "epoch": 0.6691612538830839, + "grad_norm": 3.991494124470312, + "learning_rate": 1.810858867094172e-05, + "loss": 1.0451, "step": 4739 }, { - "epoch": 0.9939190605997065, - "grad_norm": 5.991077590894774, - "learning_rate": 1.5605601532885484e-05, - "loss": 1.707, + "epoch": 0.6693024569330698, + "grad_norm": 3.8759447533167375, + "learning_rate": 1.810769640321963e-05, + "loss": 0.9854, "step": 4740 }, { - "epoch": 0.9941287481652338, - "grad_norm": 5.574971282067392, - "learning_rate": 1.560372663538439e-05, - "loss": 1.4232, + "epoch": 0.6694436599830557, + "grad_norm": 3.6382780142886215, + "learning_rate": 1.8106803947078176e-05, + "loss": 0.9664, "step": 4741 }, { - "epoch": 0.9943384357307612, - "grad_norm": 6.935163289653459, - "learning_rate": 1.560185145068259e-05, - "loss": 1.6434, + "epoch": 0.6695848630330415, + "grad_norm": 3.5736061001178645, + "learning_rate": 1.81059113025381e-05, + "loss": 0.8898, "step": 4742 }, { - "epoch": 0.9945481232962885, - "grad_norm": 6.841965116816619, - "learning_rate": 1.559997597887618e-05, - "loss": 1.6611, + "epoch": 0.6697260660830274, + "grad_norm": 4.107436665773372, + "learning_rate": 1.8105018469620148e-05, + "loss": 1.0438, "step": 4743 }, { - "epoch": 0.9947578108618159, - "grad_norm": 6.521858204046354, - "learning_rate": 1.5598100220061297e-05, - "loss": 1.5034, + "epoch": 0.6698672691330133, + "grad_norm": 3.9626905472942693, + "learning_rate": 1.8104125448345066e-05, + "loss": 0.962, "step": 4744 }, { - "epoch": 0.9949674984273432, - "grad_norm": 5.902121992098352, - "learning_rate": 1.5596224174334062e-05, - "loss": 1.7083, + "epoch": 0.6700084721829992, + "grad_norm": 3.389731162657271, + "learning_rate": 1.8103232238733604e-05, + "loss": 0.9988, "step": 4745 }, { - "epoch": 0.9951771859928706, - "grad_norm": 6.624911567062908, - "learning_rate": 1.5594347841790636e-05, - "loss": 1.6365, + "epoch": 0.6701496752329851, + "grad_norm": 3.592591468334822, + "learning_rate": 1.810233884080653e-05, + "loss": 0.9002, "step": 4746 }, { - "epoch": 0.9953868735583979, - "grad_norm": 6.787391084546669, - "learning_rate": 1.5592471222527177e-05, - "loss": 1.8257, + "epoch": 0.670290878282971, + "grad_norm": 3.4309640671956956, + "learning_rate": 1.8101445254584598e-05, + "loss": 0.9629, "step": 4747 }, { - "epoch": 0.9955965611239254, - "grad_norm": 7.896686393093549, - "learning_rate": 1.5590594316639872e-05, - "loss": 1.872, + "epoch": 0.6704320813329568, + "grad_norm": 3.668855648686981, + "learning_rate": 1.8100551480088574e-05, + "loss": 1.0746, "step": 4748 }, { - "epoch": 0.9958062486894527, - "grad_norm": 7.2389605839494315, - "learning_rate": 1.558871712422491e-05, - "loss": 1.6712, + "epoch": 0.6705732843829427, + "grad_norm": 4.275233030252731, + "learning_rate": 1.8099657517339238e-05, + "loss": 1.1161, "step": 4749 }, { - "epoch": 0.9960159362549801, - "grad_norm": 6.077597398922661, - "learning_rate": 1.55868396453785e-05, - "loss": 1.82, + "epoch": 0.6707144874329285, + "grad_norm": 3.278405843435245, + "learning_rate": 1.8098763366357354e-05, + "loss": 0.9256, "step": 4750 }, { - "epoch": 0.9962256238205074, - "grad_norm": 7.557189163290166, - "learning_rate": 1.5584961880196873e-05, - "loss": 1.9075, + "epoch": 0.6708556904829144, + "grad_norm": 3.7703346705322565, + "learning_rate": 1.809786902716371e-05, + "loss": 1.0297, "step": 4751 }, { - "epoch": 0.9964353113860348, - "grad_norm": 6.98272421469055, - "learning_rate": 1.5583083828776257e-05, - "loss": 1.6065, + "epoch": 0.6709968935329003, + "grad_norm": 3.917623566421039, + "learning_rate": 1.8096974499779084e-05, + "loss": 1.1661, "step": 4752 }, { - "epoch": 0.9966449989515622, - "grad_norm": 8.54891787340911, - "learning_rate": 1.5581205491212915e-05, - "loss": 1.5479, + "epoch": 0.6711380965828861, + "grad_norm": 3.0268611824431746, + "learning_rate": 1.809607978422427e-05, + "loss": 0.8715, "step": 4753 }, { - "epoch": 0.9968546865170895, - "grad_norm": 5.647946622961197, - "learning_rate": 1.557932686760311e-05, - "loss": 1.8178, + "epoch": 0.671279299632872, + "grad_norm": 3.0041851383344675, + "learning_rate": 1.8095184880520058e-05, + "loss": 1.0014, "step": 4754 }, { - "epoch": 0.9970643740826169, - "grad_norm": 6.763389138666307, - "learning_rate": 1.5577447958043127e-05, - "loss": 1.8128, + "epoch": 0.6714205026828579, + "grad_norm": 4.355659125118408, + "learning_rate": 1.8094289788687245e-05, + "loss": 1.2732, "step": 4755 }, { - "epoch": 0.9972740616481442, - "grad_norm": 5.655368189708778, - "learning_rate": 1.5575568762629265e-05, - "loss": 1.4167, + "epoch": 0.6715617057328438, + "grad_norm": 3.8891004624817835, + "learning_rate": 1.8093394508746635e-05, + "loss": 0.9823, "step": 4756 }, { - "epoch": 0.9974837492136717, - "grad_norm": 6.129613806477013, - "learning_rate": 1.557368928145783e-05, - "loss": 1.8118, + "epoch": 0.6717029087828297, + "grad_norm": 3.687737187095739, + "learning_rate": 1.809249904071903e-05, + "loss": 1.0306, "step": 4757 }, { - "epoch": 0.997693436779199, - "grad_norm": 7.183523443011787, - "learning_rate": 1.5571809514625154e-05, - "loss": 1.6398, + "epoch": 0.6718441118328156, + "grad_norm": 4.320731686852281, + "learning_rate": 1.8091603384625243e-05, + "loss": 1.2813, "step": 4758 }, { - "epoch": 0.9979031243447264, - "grad_norm": 6.0028636404971225, - "learning_rate": 1.5569929462227578e-05, - "loss": 1.4096, + "epoch": 0.6719853148828014, + "grad_norm": 3.4446081881025883, + "learning_rate": 1.8090707540486084e-05, + "loss": 1.016, "step": 4759 }, { - "epoch": 0.9981128119102537, - "grad_norm": 7.583668733915585, - "learning_rate": 1.556804912436145e-05, - "loss": 2.1504, + "epoch": 0.6721265179327873, + "grad_norm": 3.4882347161783613, + "learning_rate": 1.8089811508322382e-05, + "loss": 0.9352, "step": 4760 }, { - "epoch": 0.9983224994757811, - "grad_norm": 6.538615120859601, - "learning_rate": 1.5566168501123153e-05, - "loss": 1.444, + "epoch": 0.6722677209827732, + "grad_norm": 3.992682276576138, + "learning_rate": 1.808891528815495e-05, + "loss": 1.1728, "step": 4761 }, { - "epoch": 0.9985321870413084, - "grad_norm": 7.002487362281974, - "learning_rate": 1.5564287592609065e-05, - "loss": 1.7205, + "epoch": 0.6724089240327591, + "grad_norm": 3.780274392650913, + "learning_rate": 1.8088018880004622e-05, + "loss": 0.9996, "step": 4762 }, { - "epoch": 0.9987418746068358, - "grad_norm": 6.740027524260676, - "learning_rate": 1.556240639891559e-05, - "loss": 1.4106, + "epoch": 0.672550127082745, + "grad_norm": 4.836477736250618, + "learning_rate": 1.8087122283892225e-05, + "loss": 1.2319, "step": 4763 }, { - "epoch": 0.9989515621723631, - "grad_norm": 7.469900794982689, - "learning_rate": 1.5560524920139135e-05, - "loss": 1.4109, + "epoch": 0.6726913301327309, + "grad_norm": 4.326133942304628, + "learning_rate": 1.8086225499838602e-05, + "loss": 1.2238, "step": 4764 }, { - "epoch": 0.9991612497378906, - "grad_norm": 7.930028683270726, - "learning_rate": 1.5558643156376138e-05, - "loss": 1.6216, + "epoch": 0.6728325331827167, + "grad_norm": 3.688863732220834, + "learning_rate": 1.808532852786459e-05, + "loss": 1.0643, "step": 4765 }, { - "epoch": 0.9993709373034179, - "grad_norm": 7.080676572829301, - "learning_rate": 1.5556761107723034e-05, - "loss": 1.7556, + "epoch": 0.6729737362327026, + "grad_norm": 3.7866731790223276, + "learning_rate": 1.8084431367991032e-05, + "loss": 1.0984, "step": 4766 }, { - "epoch": 0.9995806248689453, - "grad_norm": 6.68080725151864, - "learning_rate": 1.555487877427629e-05, - "loss": 1.576, + "epoch": 0.6731149392826885, + "grad_norm": 4.195853810018272, + "learning_rate": 1.8083534020238785e-05, + "loss": 1.38, "step": 4767 }, { - "epoch": 0.9997903124344726, - "grad_norm": 6.608495204505117, - "learning_rate": 1.5552996156132368e-05, - "loss": 1.7584, + "epoch": 0.6732561423326744, + "grad_norm": 4.415064308465423, + "learning_rate": 1.80826364846287e-05, + "loss": 1.3547, "step": 4768 }, { - "epoch": 1.0, - "grad_norm": 6.347222962776922, - "learning_rate": 1.5551113253387765e-05, - "loss": 1.6321, + "epoch": 0.6733973453826603, + "grad_norm": 3.960475445166232, + "learning_rate": 1.8081738761181625e-05, + "loss": 1.1361, "step": 4769 }, { - "epoch": 1.0002096875655273, - "grad_norm": 6.160992356998679, - "learning_rate": 1.554923006613898e-05, - "loss": 0.7599, + "epoch": 0.6735385484326462, + "grad_norm": 3.985634674969819, + "learning_rate": 1.808084084991844e-05, + "loss": 1.0786, "step": 4770 }, { - "epoch": 1.0004193751310548, - "grad_norm": 7.003062246576398, - "learning_rate": 1.5547346594482527e-05, - "loss": 0.955, + "epoch": 0.673679751482632, + "grad_norm": 3.179007587443989, + "learning_rate": 1.8079942750859997e-05, + "loss": 0.8858, "step": 4771 }, { - "epoch": 1.0006290626965821, - "grad_norm": 6.141910853326404, - "learning_rate": 1.5545462838514945e-05, - "loss": 0.9296, + "epoch": 0.6738209545326179, + "grad_norm": 4.370074151945814, + "learning_rate": 1.8079044464027177e-05, + "loss": 1.1869, "step": 4772 }, { - "epoch": 1.0008387502621094, - "grad_norm": 5.597492055554543, - "learning_rate": 1.554357879833277e-05, - "loss": 0.9148, + "epoch": 0.6739621575826038, + "grad_norm": 3.636154697813383, + "learning_rate": 1.8078145989440855e-05, + "loss": 1.0024, "step": 4773 }, { - "epoch": 1.0010484378276368, - "grad_norm": 5.528091974130121, - "learning_rate": 1.554169447403257e-05, - "loss": 0.8728, + "epoch": 0.6741033606325897, + "grad_norm": 3.572663990130696, + "learning_rate": 1.8077247327121904e-05, + "loss": 0.9112, "step": 4774 }, { - "epoch": 1.0012581253931643, - "grad_norm": 6.762864367165756, - "learning_rate": 1.5539809865710913e-05, - "loss": 1.016, + "epoch": 0.6742445636825756, + "grad_norm": 3.1465112482158464, + "learning_rate": 1.8076348477091216e-05, + "loss": 0.7324, "step": 4775 }, { - "epoch": 1.0014678129586916, - "grad_norm": 6.127952313172415, - "learning_rate": 1.5537924973464397e-05, - "loss": 1.0382, + "epoch": 0.6743857667325615, + "grad_norm": 4.741794133349117, + "learning_rate": 1.8075449439369678e-05, + "loss": 1.1829, "step": 4776 }, { - "epoch": 1.0016775005242189, - "grad_norm": 6.0177848211495695, - "learning_rate": 1.553603979738962e-05, - "loss": 0.9296, + "epoch": 0.6745269697825473, + "grad_norm": 3.7614585064282946, + "learning_rate": 1.8074550213978183e-05, + "loss": 0.9857, "step": 4777 }, { - "epoch": 1.0018871880897462, - "grad_norm": 6.430524103152137, - "learning_rate": 1.5534154337583198e-05, - "loss": 0.9276, + "epoch": 0.6746681728325332, + "grad_norm": 3.74063270546666, + "learning_rate": 1.8073650800937627e-05, + "loss": 0.8813, "step": 4778 }, { - "epoch": 1.0020968756552737, - "grad_norm": 6.902758896448298, - "learning_rate": 1.5532268594141767e-05, - "loss": 0.8018, + "epoch": 0.6748093758825191, + "grad_norm": 3.723534993405122, + "learning_rate": 1.8072751200268913e-05, + "loss": 0.8022, "step": 4779 }, { - "epoch": 1.002306563220801, - "grad_norm": 7.0240480556193985, - "learning_rate": 1.553038256716198e-05, - "loss": 0.8344, + "epoch": 0.674950578932505, + "grad_norm": 3.233225088954693, + "learning_rate": 1.8071851411992948e-05, + "loss": 1.0225, "step": 4780 }, { - "epoch": 1.0025162507863283, - "grad_norm": 6.741271948755893, - "learning_rate": 1.5528496256740492e-05, - "loss": 0.8489, + "epoch": 0.6750917819824909, + "grad_norm": 3.716678899999008, + "learning_rate": 1.807095143613064e-05, + "loss": 1.023, "step": 4781 }, { - "epoch": 1.0027259383518556, - "grad_norm": 7.318285709842115, - "learning_rate": 1.552660966297398e-05, - "loss": 0.8857, + "epoch": 0.6752329850324768, + "grad_norm": 3.7037170452978416, + "learning_rate": 1.8070051272702905e-05, + "loss": 0.951, "step": 4782 }, { - "epoch": 1.0029356259173832, - "grad_norm": 6.400814764801574, - "learning_rate": 1.5524722785959142e-05, - "loss": 0.7388, + "epoch": 0.6753741880824626, + "grad_norm": 3.5794030332401214, + "learning_rate": 1.8069150921730668e-05, + "loss": 0.9104, "step": 4783 }, { - "epoch": 1.0031453134829105, - "grad_norm": 8.782317484212461, - "learning_rate": 1.5522835625792674e-05, - "loss": 0.8743, + "epoch": 0.6755153911324484, + "grad_norm": 3.9408589207242652, + "learning_rate": 1.806825038323485e-05, + "loss": 1.1243, "step": 4784 }, { - "epoch": 1.0033550010484378, - "grad_norm": 8.458458066706296, - "learning_rate": 1.5520948182571305e-05, - "loss": 1.0624, + "epoch": 0.6756565941824343, + "grad_norm": 3.3504929115028776, + "learning_rate": 1.8067349657236374e-05, + "loss": 0.8142, "step": 4785 }, { - "epoch": 1.003564688613965, - "grad_norm": 9.598469956844733, - "learning_rate": 1.5519060456391763e-05, - "loss": 0.9172, + "epoch": 0.6757977972324202, + "grad_norm": 3.3313176633658457, + "learning_rate": 1.8066448743756173e-05, + "loss": 0.8381, "step": 4786 }, { - "epoch": 1.0037743761794926, - "grad_norm": 9.449742846720936, - "learning_rate": 1.5517172447350806e-05, - "loss": 1.0205, + "epoch": 0.675939000282406, + "grad_norm": 3.7392803961403454, + "learning_rate": 1.806554764281519e-05, + "loss": 0.9539, "step": 4787 }, { - "epoch": 1.00398406374502, - "grad_norm": 7.628697144396201, - "learning_rate": 1.5515284155545187e-05, - "loss": 0.9323, + "epoch": 0.6760802033323919, + "grad_norm": 3.6274335435429, + "learning_rate": 1.8064646354434363e-05, + "loss": 1.0429, "step": 4788 }, { - "epoch": 1.0041937513105472, - "grad_norm": 10.023282079242447, - "learning_rate": 1.5513395581071692e-05, - "loss": 1.0517, + "epoch": 0.6762214063823778, + "grad_norm": 3.0727636329367645, + "learning_rate": 1.806374487863464e-05, + "loss": 0.8502, "step": 4789 }, { - "epoch": 1.0044034388760748, - "grad_norm": 11.597535446234218, - "learning_rate": 1.551150672402711e-05, - "loss": 1.3654, + "epoch": 0.6763626094323637, + "grad_norm": 3.580258509023351, + "learning_rate": 1.8062843215436965e-05, + "loss": 1.1466, "step": 4790 }, { - "epoch": 1.004613126441602, - "grad_norm": 7.89602725031278, - "learning_rate": 1.550961758450825e-05, - "loss": 0.8884, + "epoch": 0.6765038124823496, + "grad_norm": 3.904707926386714, + "learning_rate": 1.8061941364862298e-05, + "loss": 1.2405, "step": 4791 }, { - "epoch": 1.0048228140071294, - "grad_norm": 7.472540163553277, - "learning_rate": 1.550772816261193e-05, - "loss": 0.8363, + "epoch": 0.6766450155323355, + "grad_norm": 3.3796818144992864, + "learning_rate": 1.8061039326931592e-05, + "loss": 0.8945, "step": 4792 }, { - "epoch": 1.0050325015726567, - "grad_norm": 7.979102850120129, - "learning_rate": 1.550583845843499e-05, - "loss": 1.0372, + "epoch": 0.6767862185823214, + "grad_norm": 4.139585113864387, + "learning_rate": 1.806013710166582e-05, + "loss": 1.0117, "step": 4793 }, { - "epoch": 1.0052421891381842, - "grad_norm": 8.134542192639802, - "learning_rate": 1.550394847207428e-05, - "loss": 0.927, + "epoch": 0.6769274216323072, + "grad_norm": 4.219950537132137, + "learning_rate": 1.8059234689085935e-05, + "loss": 1.3829, "step": 4794 }, { - "epoch": 1.0054518767037115, - "grad_norm": 6.66423236170934, - "learning_rate": 1.5502058203626663e-05, - "loss": 0.858, + "epoch": 0.6770686246822931, + "grad_norm": 4.118136651398588, + "learning_rate": 1.805833208921292e-05, + "loss": 1.1909, "step": 4795 }, { - "epoch": 1.0056615642692388, - "grad_norm": 6.1002395239030855, - "learning_rate": 1.5500167653189024e-05, - "loss": 0.7533, + "epoch": 0.677209827732279, + "grad_norm": 4.4347400580385505, + "learning_rate": 1.8057429302067748e-05, + "loss": 1.0705, "step": 4796 }, { - "epoch": 1.0058712518347661, - "grad_norm": 6.1888208626094965, - "learning_rate": 1.5498276820858255e-05, - "loss": 0.7957, + "epoch": 0.6773510307822649, + "grad_norm": 3.6787284403167524, + "learning_rate": 1.8056526327671396e-05, + "loss": 1.0412, "step": 4797 }, { - "epoch": 1.0060809394002936, - "grad_norm": 8.430796112383831, - "learning_rate": 1.5496385706731255e-05, - "loss": 1.2532, + "epoch": 0.6774922338322508, + "grad_norm": 3.2283748757895254, + "learning_rate": 1.8055623166044855e-05, + "loss": 0.8246, "step": 4798 }, { - "epoch": 1.006290626965821, - "grad_norm": 7.171822195544356, - "learning_rate": 1.549449431090496e-05, - "loss": 1.1588, + "epoch": 0.6776334368822367, + "grad_norm": 3.4977745205903323, + "learning_rate": 1.8054719817209107e-05, + "loss": 1.0895, "step": 4799 }, { - "epoch": 1.0065003145313483, - "grad_norm": 7.101904096560574, - "learning_rate": 1.54926026334763e-05, - "loss": 0.7991, + "epoch": 0.6777746399322225, + "grad_norm": 2.8660882965646595, + "learning_rate": 1.8053816281185154e-05, + "loss": 0.8597, "step": 4800 }, { - "epoch": 1.0067100020968756, - "grad_norm": 7.172039592377217, - "learning_rate": 1.5490710674542227e-05, - "loss": 0.9349, + "epoch": 0.6779158429822084, + "grad_norm": 3.14123087654651, + "learning_rate": 1.8052912557993983e-05, + "loss": 1.05, "step": 4801 }, { - "epoch": 1.006919689662403, - "grad_norm": 7.570033783058164, - "learning_rate": 1.5488818434199713e-05, - "loss": 1.0008, + "epoch": 0.6780570460321943, + "grad_norm": 3.4273673691330493, + "learning_rate": 1.8052008647656605e-05, + "loss": 0.9943, "step": 4802 }, { - "epoch": 1.0071293772279304, - "grad_norm": 6.4288680238348705, - "learning_rate": 1.5486925912545732e-05, - "loss": 0.6979, + "epoch": 0.6781982490821802, + "grad_norm": 3.5600421480646767, + "learning_rate": 1.8051104550194024e-05, + "loss": 0.9313, "step": 4803 }, { - "epoch": 1.0073390647934577, - "grad_norm": 6.733630736384756, - "learning_rate": 1.548503310967728e-05, - "loss": 1.0022, + "epoch": 0.6783394521321661, + "grad_norm": 3.259273142044434, + "learning_rate": 1.8050200265627247e-05, + "loss": 0.8943, "step": 4804 }, { - "epoch": 1.0075487523589852, - "grad_norm": 6.710651848201421, - "learning_rate": 1.548314002569137e-05, - "loss": 1.038, + "epoch": 0.678480655182152, + "grad_norm": 3.758498833701552, + "learning_rate": 1.8049295793977294e-05, + "loss": 1.1804, "step": 4805 }, { - "epoch": 1.0077584399245125, - "grad_norm": 7.017493004074286, - "learning_rate": 1.5481246660685024e-05, - "loss": 0.9196, + "epoch": 0.6786218582321378, + "grad_norm": 3.8291999656663824, + "learning_rate": 1.8048391135265185e-05, + "loss": 1.2403, "step": 4806 }, { - "epoch": 1.0079681274900398, - "grad_norm": 7.356488083523028, - "learning_rate": 1.5479353014755278e-05, - "loss": 0.9708, + "epoch": 0.6787630612821237, + "grad_norm": 3.3367429905978456, + "learning_rate": 1.804748628951194e-05, + "loss": 0.9607, "step": 4807 }, { - "epoch": 1.0081778150555671, - "grad_norm": 8.866333547929878, - "learning_rate": 1.5477459087999188e-05, - "loss": 0.9884, + "epoch": 0.6789042643321096, + "grad_norm": 3.529891021686482, + "learning_rate": 1.8046581256738592e-05, + "loss": 1.0931, "step": 4808 }, { - "epoch": 1.0083875026210947, - "grad_norm": 6.163757802439368, - "learning_rate": 1.547556488051382e-05, - "loss": 0.8365, + "epoch": 0.6790454673820955, + "grad_norm": 3.6847509995865155, + "learning_rate": 1.8045676036966168e-05, + "loss": 0.9374, "step": 4809 }, { - "epoch": 1.008597190186622, - "grad_norm": 8.021070729056651, - "learning_rate": 1.5473670392396255e-05, - "loss": 1.0373, + "epoch": 0.6791866704320814, + "grad_norm": 3.7895070392050436, + "learning_rate": 1.8044770630215706e-05, + "loss": 0.953, "step": 4810 }, { - "epoch": 1.0088068777521493, - "grad_norm": 7.068433734458395, - "learning_rate": 1.5471775623743588e-05, - "loss": 0.8432, + "epoch": 0.6793278734820672, + "grad_norm": 3.6749260737117457, + "learning_rate": 1.804386503650825e-05, + "loss": 1.0148, "step": 4811 }, { - "epoch": 1.0090165653176766, - "grad_norm": 9.830581462857863, - "learning_rate": 1.546988057465293e-05, - "loss": 1.0924, + "epoch": 0.6794690765320531, + "grad_norm": 3.461074273487788, + "learning_rate": 1.8042959255864846e-05, + "loss": 1.0565, "step": 4812 }, { - "epoch": 1.0092262528832041, - "grad_norm": 7.394931313614123, - "learning_rate": 1.5467985245221406e-05, - "loss": 0.8874, + "epoch": 0.679610279582039, + "grad_norm": 3.3270221471902586, + "learning_rate": 1.804205328830654e-05, + "loss": 1.0843, "step": 4813 }, { - "epoch": 1.0094359404487314, - "grad_norm": 7.538013906143377, - "learning_rate": 1.5466089635546155e-05, - "loss": 1.0055, + "epoch": 0.6797514826320249, + "grad_norm": 3.85241320846476, + "learning_rate": 1.804114713385439e-05, + "loss": 1.1383, "step": 4814 }, { - "epoch": 1.0096456280142587, - "grad_norm": 6.232427846893156, - "learning_rate": 1.5464193745724325e-05, - "loss": 0.6984, + "epoch": 0.6798926856820108, + "grad_norm": 4.012760611467689, + "learning_rate": 1.8040240792529457e-05, + "loss": 1.027, "step": 4815 }, { - "epoch": 1.009855315579786, - "grad_norm": 5.877638188346615, - "learning_rate": 1.546229757585309e-05, - "loss": 0.7757, + "epoch": 0.6800338887319967, + "grad_norm": 3.2400781618899206, + "learning_rate": 1.8039334264352794e-05, + "loss": 0.9966, "step": 4816 }, { - "epoch": 1.0100650031453136, - "grad_norm": 7.489816565219068, - "learning_rate": 1.5460401126029637e-05, - "loss": 0.9767, + "epoch": 0.6801750917819825, + "grad_norm": 3.2432741102507756, + "learning_rate": 1.803842754934548e-05, + "loss": 1.0201, "step": 4817 }, { - "epoch": 1.0102746907108409, - "grad_norm": 7.792365557288659, - "learning_rate": 1.545850439635115e-05, - "loss": 1.0158, + "epoch": 0.6803162948319683, + "grad_norm": 3.688204072523597, + "learning_rate": 1.8037520647528576e-05, + "loss": 0.9485, "step": 4818 }, { - "epoch": 1.0104843782763682, - "grad_norm": 8.341728234282202, - "learning_rate": 1.5456607386914845e-05, - "loss": 0.8665, + "epoch": 0.6804574978819542, + "grad_norm": 4.155285121958546, + "learning_rate": 1.803661355892317e-05, + "loss": 1.1593, "step": 4819 }, { - "epoch": 1.0106940658418955, - "grad_norm": 8.47319788320541, - "learning_rate": 1.545471009781795e-05, - "loss": 0.984, + "epoch": 0.6805987009319401, + "grad_norm": 3.792562006503575, + "learning_rate": 1.803570628355033e-05, + "loss": 1.0216, "step": 4820 }, { - "epoch": 1.010903753407423, - "grad_norm": 8.941107616003826, - "learning_rate": 1.54528125291577e-05, - "loss": 0.7165, + "epoch": 0.680739903981926, + "grad_norm": 3.2203572360239714, + "learning_rate": 1.8034798821431146e-05, + "loss": 0.9205, "step": 4821 }, { - "epoch": 1.0111134409729503, - "grad_norm": 11.124118945174159, - "learning_rate": 1.5450914681031352e-05, - "loss": 1.2522, + "epoch": 0.6808811070319118, + "grad_norm": 3.249799980212703, + "learning_rate": 1.803389117258671e-05, + "loss": 1.0793, "step": 4822 }, { - "epoch": 1.0113231285384776, - "grad_norm": 6.265448431014745, - "learning_rate": 1.5449016553536168e-05, - "loss": 0.7148, + "epoch": 0.6810223100818977, + "grad_norm": 3.566213009754105, + "learning_rate": 1.803298333703811e-05, + "loss": 1.0183, "step": 4823 }, { - "epoch": 1.0115328161040051, - "grad_norm": 7.978689641467229, - "learning_rate": 1.544711814676944e-05, - "loss": 0.9815, + "epoch": 0.6811635131318836, + "grad_norm": 3.560328288195111, + "learning_rate": 1.803207531480645e-05, + "loss": 1.0015, "step": 4824 }, { - "epoch": 1.0117425036695324, - "grad_norm": 7.397987623192461, - "learning_rate": 1.544521946082846e-05, - "loss": 0.8813, + "epoch": 0.6813047161818695, + "grad_norm": 4.145151558755745, + "learning_rate": 1.8031167105912828e-05, + "loss": 1.3682, "step": 4825 }, { - "epoch": 1.0119521912350598, - "grad_norm": 7.171100212561379, - "learning_rate": 1.5443320495810535e-05, - "loss": 0.8061, + "epoch": 0.6814459192318554, + "grad_norm": 3.2218094598860354, + "learning_rate": 1.8030258710378348e-05, + "loss": 0.9294, "step": 4826 }, { - "epoch": 1.012161878800587, - "grad_norm": 8.045166625457925, - "learning_rate": 1.5441421251812993e-05, - "loss": 0.9499, + "epoch": 0.6815871222818413, + "grad_norm": 3.1631731361180213, + "learning_rate": 1.8029350128224126e-05, + "loss": 1.0385, "step": 4827 }, { - "epoch": 1.0123715663661146, - "grad_norm": 7.651080839085174, - "learning_rate": 1.5439521728933177e-05, - "loss": 1.0635, + "epoch": 0.6817283253318271, + "grad_norm": 4.3486490094319645, + "learning_rate": 1.8028441359471273e-05, + "loss": 0.9646, "step": 4828 }, { - "epoch": 1.012581253931642, - "grad_norm": 9.857127619093347, - "learning_rate": 1.5437621927268435e-05, - "loss": 0.9477, + "epoch": 0.681869528381813, + "grad_norm": 3.304591147931252, + "learning_rate": 1.8027532404140914e-05, + "loss": 1.1562, "step": 4829 }, { - "epoch": 1.0127909414971692, - "grad_norm": 7.729377406431391, - "learning_rate": 1.5435721846916137e-05, - "loss": 0.9344, + "epoch": 0.6820107314317989, + "grad_norm": 3.811938130683063, + "learning_rate": 1.8026623262254164e-05, + "loss": 0.9833, "step": 4830 }, { - "epoch": 1.0130006290626965, - "grad_norm": 6.78144221566299, - "learning_rate": 1.5433821487973674e-05, - "loss": 0.8084, + "epoch": 0.6821519344817848, + "grad_norm": 3.590591899478524, + "learning_rate": 1.802571393383216e-05, + "loss": 0.9524, "step": 4831 }, { - "epoch": 1.013210316628224, - "grad_norm": 6.752617376832929, - "learning_rate": 1.5431920850538432e-05, - "loss": 0.9478, + "epoch": 0.6822931375317707, + "grad_norm": 3.472513492817397, + "learning_rate": 1.8024804418896026e-05, + "loss": 0.9623, "step": 4832 }, { - "epoch": 1.0134200041937513, - "grad_norm": 5.9980789959754555, - "learning_rate": 1.543001993470782e-05, - "loss": 0.6817, + "epoch": 0.6824343405817566, + "grad_norm": 3.1760262048708388, + "learning_rate": 1.8023894717466904e-05, + "loss": 0.9801, "step": 4833 }, { - "epoch": 1.0136296917592786, - "grad_norm": 6.577932642380443, - "learning_rate": 1.5428118740579274e-05, - "loss": 0.7125, + "epoch": 0.6825755436317424, + "grad_norm": 3.5500072584261333, + "learning_rate": 1.8022984829565935e-05, + "loss": 1.1531, "step": 4834 }, { - "epoch": 1.013839379324806, - "grad_norm": 5.711699780954579, - "learning_rate": 1.5426217268250226e-05, - "loss": 0.6333, + "epoch": 0.6827167466817283, + "grad_norm": 4.15453036117576, + "learning_rate": 1.8022074755214264e-05, + "loss": 0.8229, "step": 4835 }, { - "epoch": 1.0140490668903335, - "grad_norm": 8.122745473699483, - "learning_rate": 1.5424315517818134e-05, - "loss": 1.0306, + "epoch": 0.6828579497317142, + "grad_norm": 3.579126912745341, + "learning_rate": 1.8021164494433038e-05, + "loss": 1.1329, "step": 4836 }, { - "epoch": 1.0142587544558608, - "grad_norm": 5.818445579124551, - "learning_rate": 1.542241348938046e-05, - "loss": 0.8214, + "epoch": 0.6829991527817001, + "grad_norm": 3.826680535979476, + "learning_rate": 1.8020254047243413e-05, + "loss": 1.0305, "step": 4837 }, { - "epoch": 1.014468442021388, - "grad_norm": 7.497099912494813, - "learning_rate": 1.5420511183034695e-05, - "loss": 0.899, + "epoch": 0.683140355831686, + "grad_norm": 3.4828094514436496, + "learning_rate": 1.801934341366655e-05, + "loss": 1.0038, "step": 4838 }, { - "epoch": 1.0146781295869154, - "grad_norm": 7.833021591244792, - "learning_rate": 1.5418608598878325e-05, - "loss": 0.8644, + "epoch": 0.6832815588816719, + "grad_norm": 4.320111056414389, + "learning_rate": 1.801843259372361e-05, + "loss": 1.0814, "step": 4839 }, { - "epoch": 1.014887817152443, - "grad_norm": 7.221951250666633, - "learning_rate": 1.5416705737008868e-05, - "loss": 0.9763, + "epoch": 0.6834227619316577, + "grad_norm": 3.549533724743314, + "learning_rate": 1.801752158743576e-05, + "loss": 1.0644, "step": 4840 }, { - "epoch": 1.0150975047179702, - "grad_norm": 7.060329361357559, - "learning_rate": 1.541480259752385e-05, - "loss": 0.8822, + "epoch": 0.6835639649816436, + "grad_norm": 3.3937231726407053, + "learning_rate": 1.8016610394824168e-05, + "loss": 1.0014, "step": 4841 }, { - "epoch": 1.0153071922834975, - "grad_norm": 6.607377770868969, - "learning_rate": 1.5412899180520804e-05, - "loss": 0.7601, + "epoch": 0.6837051680316295, + "grad_norm": 3.846986015963456, + "learning_rate": 1.801569901591001e-05, + "loss": 1.0466, "step": 4842 }, { - "epoch": 1.015516879849025, - "grad_norm": 7.128108755993761, - "learning_rate": 1.541099548609729e-05, - "loss": 0.8568, + "epoch": 0.6838463710816154, + "grad_norm": 3.5676368238464677, + "learning_rate": 1.801478745071447e-05, + "loss": 0.935, "step": 4843 }, { - "epoch": 1.0157265674145524, - "grad_norm": 7.7933385718225106, - "learning_rate": 1.540909151435087e-05, - "loss": 0.9918, + "epoch": 0.6839875741316013, + "grad_norm": 3.739033956686776, + "learning_rate": 1.8013875699258738e-05, + "loss": 1.0882, "step": 4844 }, { - "epoch": 1.0159362549800797, - "grad_norm": 7.842364843020797, - "learning_rate": 1.5407187265379127e-05, - "loss": 0.9293, + "epoch": 0.6841287771815872, + "grad_norm": 3.808747480885693, + "learning_rate": 1.801296376156399e-05, + "loss": 1.1735, "step": 4845 }, { - "epoch": 1.016145942545607, - "grad_norm": 9.015812971669483, - "learning_rate": 1.540528273927966e-05, - "loss": 0.9142, + "epoch": 0.684269980231573, + "grad_norm": 4.424774924232497, + "learning_rate": 1.8012051637651423e-05, + "loss": 1.1288, "step": 4846 }, { - "epoch": 1.0163556301111345, - "grad_norm": 6.600221295929714, - "learning_rate": 1.5403377936150076e-05, - "loss": 0.602, + "epoch": 0.6844111832815589, + "grad_norm": 3.590957629098163, + "learning_rate": 1.8011139327542238e-05, + "loss": 0.9503, "step": 4847 }, { - "epoch": 1.0165653176766618, - "grad_norm": 9.039132498764376, - "learning_rate": 1.5401472856088006e-05, - "loss": 1.0537, + "epoch": 0.6845523863315448, + "grad_norm": 3.27030680117124, + "learning_rate": 1.8010226831257636e-05, + "loss": 0.9983, "step": 4848 }, { - "epoch": 1.0167750052421891, - "grad_norm": 7.79865091391446, - "learning_rate": 1.539956749919108e-05, - "loss": 0.9747, + "epoch": 0.6846935893815307, + "grad_norm": 2.8577112140751058, + "learning_rate": 1.8009314148818822e-05, + "loss": 0.8771, "step": 4849 }, { - "epoch": 1.0169846928077164, - "grad_norm": 7.603094653958017, - "learning_rate": 1.539766186555695e-05, - "loss": 0.9491, + "epoch": 0.6848347924315166, + "grad_norm": 3.306867257306951, + "learning_rate": 1.8008401280247005e-05, + "loss": 1.0379, "step": 4850 }, { - "epoch": 1.017194380373244, - "grad_norm": 7.652528116781066, - "learning_rate": 1.5395755955283297e-05, - "loss": 0.8649, + "epoch": 0.6849759954815025, + "grad_norm": 3.8980972002258016, + "learning_rate": 1.80074882255634e-05, + "loss": 1.0437, "step": 4851 }, { - "epoch": 1.0174040679387713, - "grad_norm": 6.456569485212163, - "learning_rate": 1.5393849768467788e-05, - "loss": 0.8898, + "epoch": 0.6851171985314882, + "grad_norm": 3.732492234526865, + "learning_rate": 1.8006574984789226e-05, + "loss": 1.1157, "step": 4852 }, { - "epoch": 1.0176137555042986, - "grad_norm": 6.809272595758615, - "learning_rate": 1.5391943305208124e-05, - "loss": 0.7431, + "epoch": 0.6852584015814741, + "grad_norm": 3.8633955724280953, + "learning_rate": 1.800566155794571e-05, + "loss": 0.9841, "step": 4853 }, { - "epoch": 1.0178234430698259, - "grad_norm": 8.01741550480431, - "learning_rate": 1.539003656560201e-05, - "loss": 0.993, + "epoch": 0.68539960463146, + "grad_norm": 3.7733800500700596, + "learning_rate": 1.8004747945054076e-05, + "loss": 1.0379, "step": 4854 }, { - "epoch": 1.0180331306353534, - "grad_norm": 7.77373667146071, - "learning_rate": 1.538812954974718e-05, - "loss": 0.9156, + "epoch": 0.6855408076814459, + "grad_norm": 3.325498523459098, + "learning_rate": 1.8003834146135557e-05, + "loss": 0.9438, "step": 4855 }, { - "epoch": 1.0182428182008807, - "grad_norm": 6.572133287398302, - "learning_rate": 1.538622225774136e-05, - "loss": 0.7831, + "epoch": 0.6856820107314318, + "grad_norm": 3.6169162193298456, + "learning_rate": 1.8002920161211384e-05, + "loss": 1.0583, "step": 4856 }, { - "epoch": 1.018452505766408, - "grad_norm": 7.216682451758032, - "learning_rate": 1.5384314689682313e-05, - "loss": 0.8784, + "epoch": 0.6858232137814176, + "grad_norm": 3.845935905359987, + "learning_rate": 1.800200599030281e-05, + "loss": 1.2527, "step": 4857 }, { - "epoch": 1.0186621933319353, - "grad_norm": 7.243893114131416, - "learning_rate": 1.5382406845667794e-05, - "loss": 1.0223, + "epoch": 0.6859644168314035, + "grad_norm": 3.3436245075169473, + "learning_rate": 1.800109163343107e-05, + "loss": 0.8502, "step": 4858 }, { - "epoch": 1.0188718808974628, - "grad_norm": 7.749479472701532, - "learning_rate": 1.538049872579559e-05, - "loss": 0.9865, + "epoch": 0.6861056198813894, + "grad_norm": 3.796254835586533, + "learning_rate": 1.8000177090617416e-05, + "loss": 1.0515, "step": 4859 }, { - "epoch": 1.0190815684629901, - "grad_norm": 7.213357088359016, - "learning_rate": 1.53785903301635e-05, - "loss": 0.9423, + "epoch": 0.6862468229313753, + "grad_norm": 3.9392811400000984, + "learning_rate": 1.7999262361883102e-05, + "loss": 1.1274, "step": 4860 }, { - "epoch": 1.0192912560285174, - "grad_norm": 7.7311774524613135, - "learning_rate": 1.537668165886932e-05, - "loss": 1.037, + "epoch": 0.6863880259813612, + "grad_norm": 3.730663995315453, + "learning_rate": 1.799834744724938e-05, + "loss": 0.9405, "step": 4861 }, { - "epoch": 1.019500943594045, - "grad_norm": 6.939071631903374, - "learning_rate": 1.5374772712010878e-05, - "loss": 0.7832, + "epoch": 0.686529229031347, + "grad_norm": 4.319885924397974, + "learning_rate": 1.7997432346737524e-05, + "loss": 1.1759, "step": 4862 }, { - "epoch": 1.0197106311595723, - "grad_norm": 7.304979051799655, - "learning_rate": 1.5372863489686022e-05, - "loss": 1.0671, + "epoch": 0.6866704320813329, + "grad_norm": 4.045438380914125, + "learning_rate": 1.7996517060368793e-05, + "loss": 0.9222, "step": 4863 }, { - "epoch": 1.0199203187250996, - "grad_norm": 7.393542107705038, - "learning_rate": 1.5370953991992584e-05, - "loss": 1.0416, + "epoch": 0.6868116351313188, + "grad_norm": 3.189527000501543, + "learning_rate": 1.7995601588164456e-05, + "loss": 0.961, "step": 4864 }, { - "epoch": 1.020130006290627, - "grad_norm": 9.137503390076736, - "learning_rate": 1.5369044219028444e-05, - "loss": 1.1535, + "epoch": 0.6869528381813047, + "grad_norm": 3.763511417082652, + "learning_rate": 1.7994685930145793e-05, + "loss": 1.1567, "step": 4865 }, { - "epoch": 1.0203396938561544, - "grad_norm": 6.654196987292526, - "learning_rate": 1.5367134170891472e-05, - "loss": 0.8538, + "epoch": 0.6870940412312906, + "grad_norm": 3.594409764039015, + "learning_rate": 1.7993770086334082e-05, + "loss": 1.0028, "step": 4866 }, { - "epoch": 1.0205493814216817, - "grad_norm": 8.293336701776512, - "learning_rate": 1.5365223847679568e-05, - "loss": 0.901, + "epoch": 0.6872352442812765, + "grad_norm": 3.493799892916875, + "learning_rate": 1.7992854056750604e-05, + "loss": 0.9926, "step": 4867 }, { - "epoch": 1.020759068987209, - "grad_norm": 6.231442627086459, - "learning_rate": 1.5363313249490635e-05, - "loss": 0.7209, + "epoch": 0.6873764473312624, + "grad_norm": 3.134414188298595, + "learning_rate": 1.7991937841416652e-05, + "loss": 0.854, "step": 4868 }, { - "epoch": 1.0209687565527363, - "grad_norm": 6.380867152388836, - "learning_rate": 1.5361402376422594e-05, - "loss": 0.7363, + "epoch": 0.6875176503812482, + "grad_norm": 3.8320882077153144, + "learning_rate": 1.7991021440353515e-05, + "loss": 1.3191, "step": 4869 }, { - "epoch": 1.0211784441182639, - "grad_norm": 5.782538980568519, - "learning_rate": 1.5359491228573385e-05, - "loss": 0.6333, + "epoch": 0.6876588534312341, + "grad_norm": 3.734186464785118, + "learning_rate": 1.7990104853582494e-05, + "loss": 1.0545, "step": 4870 }, { - "epoch": 1.0213881316837912, - "grad_norm": 6.8108533350524, - "learning_rate": 1.5357579806040953e-05, - "loss": 0.9333, + "epoch": 0.68780005648122, + "grad_norm": 4.560120936341732, + "learning_rate": 1.7989188081124883e-05, + "loss": 1.1339, "step": 4871 }, { - "epoch": 1.0215978192493185, - "grad_norm": 6.561139136800119, - "learning_rate": 1.5355668108923265e-05, - "loss": 0.8766, + "epoch": 0.6879412595312059, + "grad_norm": 3.342683470989466, + "learning_rate": 1.7988271123001994e-05, + "loss": 0.9153, "step": 4872 }, { - "epoch": 1.0218075068148458, - "grad_norm": 7.207667521170825, - "learning_rate": 1.53537561373183e-05, - "loss": 1.0119, + "epoch": 0.6880824625811918, + "grad_norm": 3.548955463691675, + "learning_rate": 1.798735397923513e-05, + "loss": 0.9469, "step": 4873 }, { - "epoch": 1.0220171943803733, - "grad_norm": 6.879222371421032, - "learning_rate": 1.5351843891324045e-05, - "loss": 0.9621, + "epoch": 0.6882236656311777, + "grad_norm": 3.7711465742839034, + "learning_rate": 1.798643664984561e-05, + "loss": 1.1507, "step": 4874 }, { - "epoch": 1.0222268819459006, - "grad_norm": 8.863317670280777, - "learning_rate": 1.534993137103851e-05, - "loss": 0.891, + "epoch": 0.6883648686811635, + "grad_norm": 3.0601005189804953, + "learning_rate": 1.7985519134854755e-05, + "loss": 0.9641, "step": 4875 }, { - "epoch": 1.022436569511428, - "grad_norm": 7.539894066789852, - "learning_rate": 1.5348018576559707e-05, - "loss": 0.7907, + "epoch": 0.6885060717311494, + "grad_norm": 3.472981350550807, + "learning_rate": 1.798460143428388e-05, + "loss": 1.0574, "step": 4876 }, { - "epoch": 1.0226462570769554, - "grad_norm": 7.723878179152712, - "learning_rate": 1.534610550798568e-05, - "loss": 0.909, + "epoch": 0.6886472747811353, + "grad_norm": 3.0069917516150113, + "learning_rate": 1.7983683548154318e-05, + "loss": 0.8841, "step": 4877 }, { - "epoch": 1.0228559446424828, - "grad_norm": 6.255393201148589, - "learning_rate": 1.534419216541447e-05, - "loss": 0.655, + "epoch": 0.6887884778311212, + "grad_norm": 3.772836179279812, + "learning_rate": 1.7982765476487398e-05, + "loss": 1.0526, "step": 4878 }, { - "epoch": 1.02306563220801, - "grad_norm": 5.5256679205084565, - "learning_rate": 1.534227854894415e-05, - "loss": 0.8273, + "epoch": 0.6889296808811071, + "grad_norm": 3.252576219679041, + "learning_rate": 1.7981847219304456e-05, + "loss": 0.972, "step": 4879 }, { - "epoch": 1.0232753197735374, - "grad_norm": 8.1422508065986, - "learning_rate": 1.5340364658672787e-05, - "loss": 0.998, + "epoch": 0.689070883931093, + "grad_norm": 3.5598209124002707, + "learning_rate": 1.7980928776626833e-05, + "loss": 1.0249, "step": 4880 }, { - "epoch": 1.023485007339065, - "grad_norm": 7.899056320304972, - "learning_rate": 1.533845049469847e-05, - "loss": 0.6781, + "epoch": 0.6892120869810788, + "grad_norm": 3.6909074570822145, + "learning_rate": 1.7980010148475868e-05, + "loss": 1.158, "step": 4881 }, { - "epoch": 1.0236946949045922, - "grad_norm": 5.98551241869694, - "learning_rate": 1.5336536057119307e-05, - "loss": 0.7395, + "epoch": 0.6893532900310647, + "grad_norm": 3.8880888594016634, + "learning_rate": 1.7979091334872915e-05, + "loss": 1.3167, "step": 4882 }, { - "epoch": 1.0239043824701195, - "grad_norm": 7.06497172629204, - "learning_rate": 1.533462134603341e-05, - "loss": 1.0321, + "epoch": 0.6894944930810506, + "grad_norm": 3.333804720538366, + "learning_rate": 1.7978172335839324e-05, + "loss": 1.0112, "step": 4883 }, { - "epoch": 1.0241140700356468, - "grad_norm": 5.940506712153749, - "learning_rate": 1.5332706361538927e-05, - "loss": 0.7395, + "epoch": 0.6896356961310365, + "grad_norm": 3.1728299871942185, + "learning_rate": 1.797725315139646e-05, + "loss": 0.8991, "step": 4884 }, { - "epoch": 1.0243237576011743, - "grad_norm": 5.973792579108355, - "learning_rate": 1.5330791103733985e-05, - "loss": 0.7007, + "epoch": 0.6897768991810224, + "grad_norm": 4.031043572413333, + "learning_rate": 1.7976333781565672e-05, + "loss": 0.8381, "step": 4885 }, { - "epoch": 1.0245334451667016, - "grad_norm": 6.682918348080621, - "learning_rate": 1.532887557271676e-05, - "loss": 0.9773, + "epoch": 0.6899181022310081, + "grad_norm": 3.37456968412429, + "learning_rate": 1.7975414226368334e-05, + "loss": 0.9592, "step": 4886 }, { - "epoch": 1.024743132732229, - "grad_norm": 6.170331379094081, - "learning_rate": 1.5326959768585413e-05, - "loss": 0.6962, + "epoch": 0.690059305280994, + "grad_norm": 3.902029201000711, + "learning_rate": 1.7974494485825812e-05, + "loss": 1.0123, "step": 4887 }, { - "epoch": 1.0249528202977563, - "grad_norm": 8.344971099761546, - "learning_rate": 1.5325043691438142e-05, - "loss": 1.1157, + "epoch": 0.6902005083309799, + "grad_norm": 3.518613387923976, + "learning_rate": 1.797357455995948e-05, + "loss": 0.9706, "step": 4888 }, { - "epoch": 1.0251625078632838, - "grad_norm": 6.937852039995552, - "learning_rate": 1.5323127341373146e-05, - "loss": 0.8421, + "epoch": 0.6903417113809658, + "grad_norm": 3.598395742373168, + "learning_rate": 1.7972654448790723e-05, + "loss": 0.967, "step": 4889 }, { - "epoch": 1.025372195428811, - "grad_norm": 8.392026438345647, - "learning_rate": 1.5321210718488642e-05, - "loss": 0.9602, + "epoch": 0.6904829144309517, + "grad_norm": 4.119492894180726, + "learning_rate": 1.7971734152340918e-05, + "loss": 1.2623, "step": 4890 }, { - "epoch": 1.0255818829943384, - "grad_norm": 6.307493919128374, - "learning_rate": 1.531929382288286e-05, - "loss": 1.0047, + "epoch": 0.6906241174809375, + "grad_norm": 3.9999078769706573, + "learning_rate": 1.7970813670631454e-05, + "loss": 1.0526, "step": 4891 }, { - "epoch": 1.0257915705598657, - "grad_norm": 7.381614471042405, - "learning_rate": 1.5317376654654043e-05, - "loss": 1.0528, + "epoch": 0.6907653205309234, + "grad_norm": 3.7720480674761525, + "learning_rate": 1.7969893003683726e-05, + "loss": 0.9604, "step": 4892 }, { - "epoch": 1.0260012581253932, - "grad_norm": 8.015427527495586, - "learning_rate": 1.5315459213900448e-05, - "loss": 0.8527, + "epoch": 0.6909065235809093, + "grad_norm": 3.1130275854445393, + "learning_rate": 1.796897215151912e-05, + "loss": 0.8214, "step": 4893 }, { - "epoch": 1.0262109456909205, - "grad_norm": 7.079622282020579, - "learning_rate": 1.531354150072035e-05, - "loss": 0.9815, + "epoch": 0.6910477266308952, + "grad_norm": 3.6919640978968475, + "learning_rate": 1.7968051114159046e-05, + "loss": 1.044, "step": 4894 }, { - "epoch": 1.0264206332564478, - "grad_norm": 5.875508104929852, - "learning_rate": 1.5311623515212036e-05, - "loss": 0.6933, + "epoch": 0.6911889296808811, + "grad_norm": 3.5024666374190843, + "learning_rate": 1.7967129891624907e-05, + "loss": 0.9204, "step": 4895 }, { - "epoch": 1.0266303208219754, - "grad_norm": 8.498549735591824, - "learning_rate": 1.53097052574738e-05, - "loss": 1.0195, + "epoch": 0.691330132730867, + "grad_norm": 4.039641906444027, + "learning_rate": 1.7966208483938108e-05, + "loss": 1.2647, "step": 4896 }, { - "epoch": 1.0268400083875027, - "grad_norm": 6.673638914952235, - "learning_rate": 1.5307786727603962e-05, - "loss": 0.7539, + "epoch": 0.6914713357808528, + "grad_norm": 3.1430245958419274, + "learning_rate": 1.7965286891120064e-05, + "loss": 0.7836, "step": 4897 }, { - "epoch": 1.02704969595303, - "grad_norm": 6.593148830091223, - "learning_rate": 1.5305867925700853e-05, - "loss": 0.7908, + "epoch": 0.6916125388308387, + "grad_norm": 3.0189861197600996, + "learning_rate": 1.7964365113192195e-05, + "loss": 0.9178, "step": 4898 }, { - "epoch": 1.0272593835185573, - "grad_norm": 9.085220206714018, - "learning_rate": 1.5303948851862806e-05, - "loss": 1.2864, + "epoch": 0.6917537418808246, + "grad_norm": 3.778769813900629, + "learning_rate": 1.7963443150175915e-05, + "loss": 1.1918, "step": 4899 }, { - "epoch": 1.0274690710840848, - "grad_norm": 6.637584637932477, - "learning_rate": 1.530202950618818e-05, - "loss": 0.8352, + "epoch": 0.6918949449308105, + "grad_norm": 3.46060760335112, + "learning_rate": 1.796252100209266e-05, + "loss": 0.8881, "step": 4900 }, { - "epoch": 1.0276787586496121, - "grad_norm": 6.194410226825367, - "learning_rate": 1.5300109888775348e-05, - "loss": 0.5937, + "epoch": 0.6920361479807964, + "grad_norm": 3.791675125173452, + "learning_rate": 1.796159866896386e-05, + "loss": 1.0193, "step": 4901 }, { - "epoch": 1.0278884462151394, - "grad_norm": 7.7949109501524365, - "learning_rate": 1.5298189999722693e-05, - "loss": 0.8478, + "epoch": 0.6921773510307823, + "grad_norm": 3.618495650277027, + "learning_rate": 1.7960676150810937e-05, + "loss": 0.9839, "step": 4902 }, { - "epoch": 1.0280981337806667, - "grad_norm": 6.422156473358355, - "learning_rate": 1.5296269839128612e-05, - "loss": 0.7847, + "epoch": 0.6923185540807681, + "grad_norm": 3.5567501844874982, + "learning_rate": 1.795975344765534e-05, + "loss": 1.1792, "step": 4903 }, { - "epoch": 1.0283078213461943, - "grad_norm": 8.206784981133389, - "learning_rate": 1.529434940709151e-05, - "loss": 0.9669, + "epoch": 0.692459757130754, + "grad_norm": 3.882062105729809, + "learning_rate": 1.7958830559518513e-05, + "loss": 1.1627, "step": 4904 }, { - "epoch": 1.0285175089117216, - "grad_norm": 8.8965749407282, - "learning_rate": 1.5292428703709825e-05, - "loss": 0.9365, + "epoch": 0.6926009601807399, + "grad_norm": 3.1158292523688784, + "learning_rate": 1.7957907486421896e-05, + "loss": 0.798, "step": 4905 }, { - "epoch": 1.0287271964772489, - "grad_norm": 6.954027681744386, - "learning_rate": 1.529050772908198e-05, - "loss": 0.7532, + "epoch": 0.6927421632307258, + "grad_norm": 3.6847868615351644, + "learning_rate": 1.795698422838695e-05, + "loss": 1.0689, "step": 4906 }, { - "epoch": 1.0289368840427762, - "grad_norm": 6.951945879429214, - "learning_rate": 1.528858648330645e-05, - "loss": 0.9188, + "epoch": 0.6928833662807117, + "grad_norm": 3.248669508406251, + "learning_rate": 1.7956060785435125e-05, + "loss": 1.0676, "step": 4907 }, { - "epoch": 1.0291465716083037, - "grad_norm": 7.285777060926327, - "learning_rate": 1.528666496648168e-05, - "loss": 0.8034, + "epoch": 0.6930245693306976, + "grad_norm": 3.283982429965398, + "learning_rate": 1.7955137157587886e-05, + "loss": 1.0636, "step": 4908 }, { - "epoch": 1.029356259173831, - "grad_norm": 8.242924612766094, - "learning_rate": 1.5284743178706168e-05, - "loss": 0.9133, + "epoch": 0.6931657723806834, + "grad_norm": 3.6653384614291715, + "learning_rate": 1.7954213344866694e-05, + "loss": 1.2765, "step": 4909 }, { - "epoch": 1.0295659467393583, - "grad_norm": 6.4857903543043225, - "learning_rate": 1.52828211200784e-05, - "loss": 0.7224, + "epoch": 0.6933069754306693, + "grad_norm": 3.2693588718239797, + "learning_rate": 1.795328934729302e-05, + "loss": 0.9708, "step": 4910 }, { - "epoch": 1.0297756343048856, - "grad_norm": 7.632058495847685, - "learning_rate": 1.5280898790696887e-05, - "loss": 1.0222, + "epoch": 0.6934481784806552, + "grad_norm": 3.2894821939712315, + "learning_rate": 1.795236516488833e-05, + "loss": 1.0232, "step": 4911 }, { - "epoch": 1.0299853218704131, - "grad_norm": 8.241533774173002, - "learning_rate": 1.5278976190660152e-05, - "loss": 0.9351, + "epoch": 0.6935893815306411, + "grad_norm": 5.904686601776154, + "learning_rate": 1.7951440797674117e-05, + "loss": 1.1616, "step": 4912 }, { - "epoch": 1.0301950094359404, - "grad_norm": 6.900869870056787, - "learning_rate": 1.527705332006673e-05, - "loss": 0.797, + "epoch": 0.693730584580627, + "grad_norm": 3.20207256483985, + "learning_rate": 1.7950516245671848e-05, + "loss": 0.9678, "step": 4913 }, { - "epoch": 1.0304046970014678, - "grad_norm": 7.728788265724365, - "learning_rate": 1.5275130179015173e-05, - "loss": 0.9883, + "epoch": 0.6938717876306129, + "grad_norm": 3.573764548442737, + "learning_rate": 1.7949591508903016e-05, + "loss": 1.1044, "step": 4914 }, { - "epoch": 1.0306143845669953, - "grad_norm": 7.829491061975332, - "learning_rate": 1.5273206767604047e-05, - "loss": 0.9612, + "epoch": 0.6940129906805987, + "grad_norm": 3.5595335588285884, + "learning_rate": 1.7948666587389112e-05, + "loss": 0.9634, "step": 4915 }, { - "epoch": 1.0308240721325226, - "grad_norm": 6.348711659796047, - "learning_rate": 1.5271283085931927e-05, - "loss": 0.807, + "epoch": 0.6941541937305846, + "grad_norm": 3.1314588481850394, + "learning_rate": 1.7947741481151628e-05, + "loss": 0.8725, "step": 4916 }, { - "epoch": 1.03103375969805, - "grad_norm": 7.119047448839702, - "learning_rate": 1.526935913409741e-05, - "loss": 0.8866, + "epoch": 0.6942953967805705, + "grad_norm": 3.975707036517665, + "learning_rate": 1.7946816190212063e-05, + "loss": 1.1297, "step": 4917 }, { - "epoch": 1.0312434472635772, - "grad_norm": 7.073621453726036, - "learning_rate": 1.526743491219909e-05, - "loss": 0.7166, + "epoch": 0.6944365998305564, + "grad_norm": 3.354867228416297, + "learning_rate": 1.7945890714591926e-05, + "loss": 1.0578, "step": 4918 }, { - "epoch": 1.0314531348291047, - "grad_norm": 6.2562974434917695, - "learning_rate": 1.52655104203356e-05, - "loss": 0.6898, + "epoch": 0.6945778028805423, + "grad_norm": 3.8502550493035628, + "learning_rate": 1.7944965054312718e-05, + "loss": 0.9862, "step": 4919 }, { - "epoch": 1.031662822394632, - "grad_norm": 7.45145563132765, - "learning_rate": 1.526358565860557e-05, - "loss": 0.7589, + "epoch": 0.694719005930528, + "grad_norm": 4.158313788612613, + "learning_rate": 1.794403920939595e-05, + "loss": 1.1441, "step": 4920 }, { - "epoch": 1.0318725099601593, - "grad_norm": 7.163729357019478, - "learning_rate": 1.5261660627107643e-05, - "loss": 0.8901, + "epoch": 0.6948602089805139, + "grad_norm": 3.622695532047422, + "learning_rate": 1.7943113179863147e-05, + "loss": 1.0328, "step": 4921 }, { - "epoch": 1.0320821975256866, - "grad_norm": 7.969289223341199, - "learning_rate": 1.5259735325940482e-05, - "loss": 1.0353, + "epoch": 0.6950014120304998, + "grad_norm": 4.7711040374408205, + "learning_rate": 1.794218696573582e-05, + "loss": 1.3909, "step": 4922 }, { - "epoch": 1.0322918850912142, - "grad_norm": 8.35151287533368, - "learning_rate": 1.5257809755202766e-05, - "loss": 0.9723, + "epoch": 0.6951426150804857, + "grad_norm": 4.129471713708771, + "learning_rate": 1.7941260567035498e-05, + "loss": 0.973, "step": 4923 }, { - "epoch": 1.0325015726567415, - "grad_norm": 8.57390297153983, - "learning_rate": 1.525588391499318e-05, - "loss": 1.0289, + "epoch": 0.6952838181304716, + "grad_norm": 4.097047556802675, + "learning_rate": 1.7940333983783715e-05, + "loss": 1.2203, "step": 4924 }, { - "epoch": 1.0327112602222688, - "grad_norm": 5.166400977438753, - "learning_rate": 1.5253957805410427e-05, - "loss": 0.6506, + "epoch": 0.6954250211804575, + "grad_norm": 3.4975179725561465, + "learning_rate": 1.7939407216001993e-05, + "loss": 1.0387, "step": 4925 }, { - "epoch": 1.032920947787796, - "grad_norm": 6.28417191108513, - "learning_rate": 1.5252031426553223e-05, - "loss": 0.7086, + "epoch": 0.6955662242304433, + "grad_norm": 3.809718589561135, + "learning_rate": 1.793848026371188e-05, + "loss": 1.0785, "step": 4926 }, { - "epoch": 1.0331306353533236, - "grad_norm": 6.016081697449043, - "learning_rate": 1.52501047785203e-05, - "loss": 0.8106, + "epoch": 0.6957074272804292, + "grad_norm": 3.6016494986125687, + "learning_rate": 1.793755312693491e-05, + "loss": 0.9856, "step": 4927 }, { - "epoch": 1.033340322918851, - "grad_norm": 7.938133083908658, - "learning_rate": 1.5248177861410401e-05, - "loss": 0.9777, + "epoch": 0.6958486303304151, + "grad_norm": 3.76952232717789, + "learning_rate": 1.793662580569264e-05, + "loss": 0.9489, "step": 4928 }, { - "epoch": 1.0335500104843782, - "grad_norm": 6.2390497542837915, - "learning_rate": 1.5246250675322282e-05, - "loss": 0.8393, + "epoch": 0.695989833380401, + "grad_norm": 3.822543454458155, + "learning_rate": 1.793569830000661e-05, + "loss": 1.0408, "step": 4929 }, { - "epoch": 1.0337596980499058, - "grad_norm": 7.9152307318907935, - "learning_rate": 1.5244323220354722e-05, - "loss": 1.0305, + "epoch": 0.6961310364303869, + "grad_norm": 3.95015309461034, + "learning_rate": 1.7934770609898377e-05, + "loss": 1.1794, "step": 4930 }, { - "epoch": 1.033969385615433, - "grad_norm": 6.408397883786978, - "learning_rate": 1.5242395496606497e-05, - "loss": 0.8316, + "epoch": 0.6962722394803728, + "grad_norm": 3.8317423000082336, + "learning_rate": 1.7933842735389505e-05, + "loss": 1.0468, "step": 4931 }, { - "epoch": 1.0341790731809604, - "grad_norm": 8.912013223084658, - "learning_rate": 1.5240467504176413e-05, - "loss": 1.0824, + "epoch": 0.6964134425303586, + "grad_norm": 3.437606999973978, + "learning_rate": 1.7932914676501553e-05, + "loss": 1.02, "step": 4932 }, { - "epoch": 1.0343887607464877, - "grad_norm": 6.611544571155025, - "learning_rate": 1.5238539243163277e-05, - "loss": 0.8369, + "epoch": 0.6965546455803445, + "grad_norm": 3.458834077509404, + "learning_rate": 1.7931986433256088e-05, + "loss": 1.1071, "step": 4933 }, { - "epoch": 1.0345984483120152, - "grad_norm": 7.142271040354011, - "learning_rate": 1.5236610713665922e-05, - "loss": 0.8512, + "epoch": 0.6966958486303304, + "grad_norm": 3.4634637098113426, + "learning_rate": 1.7931058005674687e-05, + "loss": 0.8652, "step": 4934 }, { - "epoch": 1.0348081358775425, - "grad_norm": 7.02304870942379, - "learning_rate": 1.5234681915783182e-05, - "loss": 0.938, + "epoch": 0.6968370516803163, + "grad_norm": 3.3396845596011304, + "learning_rate": 1.793012939377892e-05, + "loss": 0.8847, "step": 4935 }, { - "epoch": 1.0350178234430698, - "grad_norm": 8.259218607351727, - "learning_rate": 1.5232752849613919e-05, - "loss": 1.0997, + "epoch": 0.6969782547303022, + "grad_norm": 4.933366992658467, + "learning_rate": 1.7929200597590375e-05, + "loss": 1.3717, "step": 4936 }, { - "epoch": 1.0352275110085971, - "grad_norm": 7.699991968792437, - "learning_rate": 1.5230823515256994e-05, - "loss": 0.9936, + "epoch": 0.697119457780288, + "grad_norm": 3.9696486945254668, + "learning_rate": 1.7928271617130628e-05, + "loss": 1.0222, "step": 4937 }, { - "epoch": 1.0354371985741246, - "grad_norm": 7.373855366620911, - "learning_rate": 1.5228893912811293e-05, - "loss": 0.7628, + "epoch": 0.6972606608302739, + "grad_norm": 3.6088150640999417, + "learning_rate": 1.7927342452421275e-05, + "loss": 1.0508, "step": 4938 }, { - "epoch": 1.035646886139652, - "grad_norm": 7.3137595967106925, - "learning_rate": 1.522696404237571e-05, - "loss": 0.8644, + "epoch": 0.6974018638802598, + "grad_norm": 4.641155354114272, + "learning_rate": 1.7926413103483903e-05, + "loss": 1.0084, "step": 4939 }, { - "epoch": 1.0358565737051793, - "grad_norm": 7.19687470729279, - "learning_rate": 1.5225033904049152e-05, - "loss": 0.8256, + "epoch": 0.6975430669302457, + "grad_norm": 3.760359463129475, + "learning_rate": 1.7925483570340118e-05, + "loss": 1.1653, "step": 4940 }, { - "epoch": 1.0360662612707066, - "grad_norm": 6.3141301400210885, - "learning_rate": 1.5223103497930546e-05, - "loss": 0.636, + "epoch": 0.6976842699802316, + "grad_norm": 3.9725904150012608, + "learning_rate": 1.792455385301152e-05, + "loss": 1.1374, "step": 4941 }, { - "epoch": 1.036275948836234, - "grad_norm": 7.178556473291898, - "learning_rate": 1.5221172824118827e-05, - "loss": 0.9722, + "epoch": 0.6978254730302175, + "grad_norm": 3.1563758725642237, + "learning_rate": 1.7923623951519708e-05, + "loss": 0.9656, "step": 4942 }, { - "epoch": 1.0364856364017614, - "grad_norm": 6.369711047600969, - "learning_rate": 1.521924188271294e-05, - "loss": 0.828, + "epoch": 0.6979666760802034, + "grad_norm": 3.7690353501607183, + "learning_rate": 1.79226938658863e-05, + "loss": 1.0548, "step": 4943 }, { - "epoch": 1.0366953239672887, - "grad_norm": 7.155809870583605, - "learning_rate": 1.521731067381186e-05, - "loss": 0.8976, + "epoch": 0.6981078791301892, + "grad_norm": 4.060368879548283, + "learning_rate": 1.7921763596132905e-05, + "loss": 1.0353, "step": 4944 }, { - "epoch": 1.036905011532816, - "grad_norm": 5.71419247433343, - "learning_rate": 1.5215379197514559e-05, - "loss": 0.6153, + "epoch": 0.6982490821801751, + "grad_norm": 3.2512862030613716, + "learning_rate": 1.7920833142281145e-05, + "loss": 0.9495, "step": 4945 }, { - "epoch": 1.0371146990983435, - "grad_norm": 7.841874080434669, - "learning_rate": 1.5213447453920024e-05, - "loss": 1.0807, + "epoch": 0.698390285230161, + "grad_norm": 4.037250239064881, + "learning_rate": 1.7919902504352646e-05, + "loss": 1.0587, "step": 4946 }, { - "epoch": 1.0373243866638708, - "grad_norm": 6.8902263484140125, - "learning_rate": 1.5211515443127268e-05, - "loss": 0.8236, + "epoch": 0.6985314882801469, + "grad_norm": 4.1272527856074825, + "learning_rate": 1.7918971682369034e-05, + "loss": 1.1974, "step": 4947 }, { - "epoch": 1.0375340742293981, - "grad_norm": 5.459616391100047, - "learning_rate": 1.5209583165235307e-05, - "loss": 0.6613, + "epoch": 0.6986726913301328, + "grad_norm": 3.8741226736112573, + "learning_rate": 1.791804067635194e-05, + "loss": 0.9565, "step": 4948 }, { - "epoch": 1.0377437617949257, - "grad_norm": 8.009252446666222, - "learning_rate": 1.5207650620343173e-05, - "loss": 1.0604, + "epoch": 0.6988138943801186, + "grad_norm": 3.837766735449058, + "learning_rate": 1.7917109486322997e-05, + "loss": 0.9729, "step": 4949 }, { - "epoch": 1.037953449360453, - "grad_norm": 6.631908663650191, - "learning_rate": 1.5205717808549908e-05, - "loss": 0.8408, + "epoch": 0.6989550974301045, + "grad_norm": 3.842350594919303, + "learning_rate": 1.791617811230385e-05, + "loss": 1.3002, "step": 4950 }, { - "epoch": 1.0381631369259803, - "grad_norm": 7.460614510230662, - "learning_rate": 1.5203784729954584e-05, - "loss": 1.0238, + "epoch": 0.6990963004800904, + "grad_norm": 3.448583000639665, + "learning_rate": 1.7915246554316145e-05, + "loss": 0.9534, "step": 4951 }, { - "epoch": 1.0383728244915076, - "grad_norm": 7.773727793281137, - "learning_rate": 1.520185138465626e-05, - "loss": 0.978, + "epoch": 0.6992375035300763, + "grad_norm": 3.639507876968727, + "learning_rate": 1.7914314812381524e-05, + "loss": 0.9045, "step": 4952 }, { - "epoch": 1.0385825120570351, - "grad_norm": 6.929464349115824, - "learning_rate": 1.5199917772754036e-05, - "loss": 0.8351, + "epoch": 0.6993787065800622, + "grad_norm": 3.4731095447808484, + "learning_rate": 1.7913382886521648e-05, + "loss": 1.0376, "step": 4953 }, { - "epoch": 1.0387921996225624, - "grad_norm": 7.353387446001933, - "learning_rate": 1.5197983894347004e-05, - "loss": 0.9884, + "epoch": 0.699519909630048, + "grad_norm": 3.1854397052921097, + "learning_rate": 1.791245077675817e-05, + "loss": 0.9268, "step": 4954 }, { - "epoch": 1.0390018871880897, - "grad_norm": 7.122582879420277, - "learning_rate": 1.5196049749534285e-05, - "loss": 1.0697, + "epoch": 0.6996611126800338, + "grad_norm": 3.767447903031259, + "learning_rate": 1.7911518483112752e-05, + "loss": 0.9883, "step": 4955 }, { - "epoch": 1.039211574753617, - "grad_norm": 7.801533730077766, - "learning_rate": 1.5194115338415e-05, - "loss": 0.8123, + "epoch": 0.6998023157300197, + "grad_norm": 3.6923455642089755, + "learning_rate": 1.7910586005607063e-05, + "loss": 0.8594, "step": 4956 }, { - "epoch": 1.0394212623191446, - "grad_norm": 6.425233416553472, - "learning_rate": 1.51921806610883e-05, - "loss": 0.6701, + "epoch": 0.6999435187800056, + "grad_norm": 3.6448247612937488, + "learning_rate": 1.790965334426277e-05, + "loss": 1.1476, "step": 4957 }, { - "epoch": 1.0396309498846719, - "grad_norm": 7.047847522101738, - "learning_rate": 1.5190245717653332e-05, - "loss": 0.8372, + "epoch": 0.7000847218299915, + "grad_norm": 3.5810133998126537, + "learning_rate": 1.7908720499101552e-05, + "loss": 1.1233, "step": 4958 }, { - "epoch": 1.0398406374501992, - "grad_norm": 6.395206387957378, - "learning_rate": 1.5188310508209273e-05, - "loss": 0.8802, + "epoch": 0.7002259248799774, + "grad_norm": 4.027853212120315, + "learning_rate": 1.790778747014508e-05, + "loss": 1.2545, "step": 4959 }, { - "epoch": 1.0400503250157265, - "grad_norm": 5.875925723213028, - "learning_rate": 1.5186375032855296e-05, - "loss": 0.8079, + "epoch": 0.7003671279299633, + "grad_norm": 2.8741132307290016, + "learning_rate": 1.7906854257415048e-05, + "loss": 0.8527, "step": 4960 }, { - "epoch": 1.040260012581254, - "grad_norm": 6.694159306339121, - "learning_rate": 1.5184439291690606e-05, - "loss": 0.7786, + "epoch": 0.7005083309799491, + "grad_norm": 3.3094312860045125, + "learning_rate": 1.7905920860933136e-05, + "loss": 0.9097, "step": 4961 }, { - "epoch": 1.0404697001467813, - "grad_norm": 6.794040026490486, - "learning_rate": 1.5182503284814409e-05, - "loss": 0.9839, + "epoch": 0.700649534029935, + "grad_norm": 4.3227834235897875, + "learning_rate": 1.7904987280721037e-05, + "loss": 1.1097, "step": 4962 }, { - "epoch": 1.0406793877123086, - "grad_norm": 5.94054012017211, - "learning_rate": 1.5180567012325935e-05, - "loss": 0.8323, + "epoch": 0.7007907370799209, + "grad_norm": 4.319445616616094, + "learning_rate": 1.7904053516800448e-05, + "loss": 1.2249, "step": 4963 }, { - "epoch": 1.040889075277836, - "grad_norm": 5.641050102965108, - "learning_rate": 1.517863047432441e-05, - "loss": 0.7168, + "epoch": 0.7009319401299068, + "grad_norm": 3.011031964933952, + "learning_rate": 1.7903119569193066e-05, + "loss": 0.8272, "step": 4964 }, { - "epoch": 1.0410987628433634, - "grad_norm": 8.21345482293305, - "learning_rate": 1.5176693670909096e-05, - "loss": 0.9992, + "epoch": 0.7010731431798927, + "grad_norm": 3.5753962019663765, + "learning_rate": 1.7902185437920603e-05, + "loss": 1.0165, "step": 4965 }, { - "epoch": 1.0413084504088908, - "grad_norm": 7.375461630597876, - "learning_rate": 1.517475660217925e-05, - "loss": 0.787, + "epoch": 0.7012143462298785, + "grad_norm": 3.8001370096272042, + "learning_rate": 1.790125112300476e-05, + "loss": 0.9125, "step": 4966 }, { - "epoch": 1.041518137974418, - "grad_norm": 6.789936193276466, - "learning_rate": 1.5172819268234155e-05, - "loss": 0.8067, + "epoch": 0.7013555492798644, + "grad_norm": 3.476174346380946, + "learning_rate": 1.7900316624467254e-05, + "loss": 0.9976, "step": 4967 }, { - "epoch": 1.0417278255399456, - "grad_norm": 6.168768720045257, - "learning_rate": 1.5170881669173102e-05, - "loss": 0.7556, + "epoch": 0.7014967523298503, + "grad_norm": 3.6753109904495953, + "learning_rate": 1.78993819423298e-05, + "loss": 0.9658, "step": 4968 }, { - "epoch": 1.041937513105473, - "grad_norm": 7.442692624941551, - "learning_rate": 1.5168943805095391e-05, - "loss": 0.8909, + "epoch": 0.7016379553798362, + "grad_norm": 3.881491751430802, + "learning_rate": 1.7898447076614123e-05, + "loss": 0.9928, "step": 4969 }, { - "epoch": 1.0421472006710002, - "grad_norm": 6.987253192762509, - "learning_rate": 1.5167005676100346e-05, - "loss": 0.9609, + "epoch": 0.7017791584298221, + "grad_norm": 3.2837016528548935, + "learning_rate": 1.7897512027341945e-05, + "loss": 0.8974, "step": 4970 }, { - "epoch": 1.0423568882365275, - "grad_norm": 6.68226057430912, - "learning_rate": 1.51650672822873e-05, - "loss": 0.8657, + "epoch": 0.701920361479808, + "grad_norm": 3.7316128013845646, + "learning_rate": 1.7896576794535002e-05, + "loss": 1.0198, "step": 4971 }, { - "epoch": 1.042566575802055, - "grad_norm": 10.187132487220923, - "learning_rate": 1.5163128623755596e-05, - "loss": 1.0124, + "epoch": 0.7020615645297938, + "grad_norm": 3.0379806003765197, + "learning_rate": 1.789564137821502e-05, + "loss": 0.9311, "step": 4972 }, { - "epoch": 1.0427762633675823, - "grad_norm": 6.933568706735866, - "learning_rate": 1.5161189700604598e-05, - "loss": 0.8859, + "epoch": 0.7022027675797797, + "grad_norm": 3.8890201402248343, + "learning_rate": 1.7894705778403746e-05, + "loss": 1.2104, "step": 4973 }, { - "epoch": 1.0429859509331096, - "grad_norm": 6.709281871479803, - "learning_rate": 1.5159250512933672e-05, - "loss": 0.5756, + "epoch": 0.7023439706297656, + "grad_norm": 3.3544549652210276, + "learning_rate": 1.7893769995122916e-05, + "loss": 0.8811, "step": 4974 }, { - "epoch": 1.043195638498637, - "grad_norm": 6.880010295983627, - "learning_rate": 1.515731106084221e-05, - "loss": 0.7291, + "epoch": 0.7024851736797515, + "grad_norm": 3.670011825573082, + "learning_rate": 1.7892834028394285e-05, + "loss": 1.0226, "step": 4975 }, { - "epoch": 1.0434053260641645, - "grad_norm": 7.2003832373081655, - "learning_rate": 1.5155371344429612e-05, - "loss": 0.6924, + "epoch": 0.7026263767297374, + "grad_norm": 4.956363878007812, + "learning_rate": 1.7891897878239595e-05, + "loss": 1.2145, "step": 4976 }, { - "epoch": 1.0436150136296918, - "grad_norm": 6.840324106674076, - "learning_rate": 1.5153431363795292e-05, - "loss": 0.806, + "epoch": 0.7027675797797233, + "grad_norm": 4.070265763567169, + "learning_rate": 1.789096154468061e-05, + "loss": 1.4238, "step": 4977 }, { - "epoch": 1.043824701195219, - "grad_norm": 8.050995520785525, - "learning_rate": 1.5151491119038672e-05, - "loss": 0.8851, + "epoch": 0.7029087828297091, + "grad_norm": 3.687184915341469, + "learning_rate": 1.7890025027739084e-05, + "loss": 0.8404, "step": 4978 }, { - "epoch": 1.0440343887607464, - "grad_norm": 8.003452055421375, - "learning_rate": 1.5149550610259203e-05, - "loss": 1.0227, + "epoch": 0.703049985879695, + "grad_norm": 3.407299740962904, + "learning_rate": 1.7889088327436783e-05, + "loss": 1.0777, "step": 4979 }, { - "epoch": 1.044244076326274, - "grad_norm": 8.623397406152913, - "learning_rate": 1.5147609837556334e-05, - "loss": 1.0489, + "epoch": 0.7031911889296809, + "grad_norm": 3.3293450041563113, + "learning_rate": 1.7888151443795478e-05, + "loss": 0.8899, "step": 4980 }, { - "epoch": 1.0444537638918012, - "grad_norm": 7.591865887265669, - "learning_rate": 1.5145668801029528e-05, - "loss": 0.8448, + "epoch": 0.7033323919796668, + "grad_norm": 3.407754301552625, + "learning_rate": 1.788721437683694e-05, + "loss": 0.949, "step": 4981 }, { - "epoch": 1.0446634514573285, - "grad_norm": 6.125270430642908, - "learning_rate": 1.5143727500778273e-05, - "loss": 0.7292, + "epoch": 0.7034735950296527, + "grad_norm": 3.5328900152966285, + "learning_rate": 1.7886277126582947e-05, + "loss": 1.1086, "step": 4982 }, { - "epoch": 1.0448731390228558, - "grad_norm": 9.900133971716045, - "learning_rate": 1.5141785936902062e-05, - "loss": 0.9333, + "epoch": 0.7036147980796386, + "grad_norm": 3.296893849798958, + "learning_rate": 1.7885339693055276e-05, + "loss": 0.9262, "step": 4983 }, { - "epoch": 1.0450828265883834, - "grad_norm": 6.635919598445679, - "learning_rate": 1.5139844109500405e-05, - "loss": 0.8179, + "epoch": 0.7037560011296244, + "grad_norm": 3.5264997601460455, + "learning_rate": 1.7884402076275723e-05, + "loss": 1.1062, "step": 4984 }, { - "epoch": 1.0452925141539107, - "grad_norm": 7.8794737090461995, - "learning_rate": 1.5137902018672823e-05, - "loss": 0.8866, + "epoch": 0.7038972041796103, + "grad_norm": 3.157365329339565, + "learning_rate": 1.7883464276266064e-05, + "loss": 0.9598, "step": 4985 }, { - "epoch": 1.045502201719438, - "grad_norm": 7.954706874316916, - "learning_rate": 1.5135959664518853e-05, - "loss": 1.1873, + "epoch": 0.7040384072295962, + "grad_norm": 3.517264938511991, + "learning_rate": 1.7882526293048102e-05, + "loss": 1.1385, "step": 4986 }, { - "epoch": 1.0457118892849655, - "grad_norm": 6.260871602935669, - "learning_rate": 1.5134017047138038e-05, - "loss": 0.7074, + "epoch": 0.7041796102795821, + "grad_norm": 3.556560858212387, + "learning_rate": 1.7881588126643632e-05, + "loss": 0.9751, "step": 4987 }, { - "epoch": 1.0459215768504928, - "grad_norm": 7.47829953257803, - "learning_rate": 1.5132074166629949e-05, - "loss": 0.834, + "epoch": 0.704320813329568, + "grad_norm": 3.8115058338522463, + "learning_rate": 1.788064977707446e-05, + "loss": 1.0367, "step": 4988 }, { - "epoch": 1.0461312644160201, - "grad_norm": 7.103226589199328, - "learning_rate": 1.5130131023094154e-05, - "loss": 0.7806, + "epoch": 0.7044620163795537, + "grad_norm": 3.5583813298827756, + "learning_rate": 1.787971124436239e-05, + "loss": 1.1144, "step": 4989 }, { - "epoch": 1.0463409519815474, - "grad_norm": 7.026415806032829, - "learning_rate": 1.5128187616630248e-05, - "loss": 0.7655, + "epoch": 0.7046032194295396, + "grad_norm": 3.791512971049549, + "learning_rate": 1.7878772528529232e-05, + "loss": 1.2657, "step": 4990 }, { - "epoch": 1.046550639547075, - "grad_norm": 7.111847419815197, - "learning_rate": 1.5126243947337834e-05, - "loss": 0.8572, + "epoch": 0.7047444224795255, + "grad_norm": 3.8455394478153235, + "learning_rate": 1.7877833629596805e-05, + "loss": 1.0708, "step": 4991 }, { - "epoch": 1.0467603271126023, - "grad_norm": 7.387592909986293, - "learning_rate": 1.5124300015316524e-05, - "loss": 0.7374, + "epoch": 0.7048856255295114, + "grad_norm": 3.971044506253799, + "learning_rate": 1.7876894547586924e-05, + "loss": 1.2719, "step": 4992 }, { - "epoch": 1.0469700146781296, - "grad_norm": 8.384145261677098, - "learning_rate": 1.512235582066595e-05, - "loss": 0.8726, + "epoch": 0.7050268285794973, + "grad_norm": 4.185304546464157, + "learning_rate": 1.787595528252142e-05, + "loss": 1.4954, "step": 4993 }, { - "epoch": 1.0471797022436569, - "grad_norm": 10.48463549979645, - "learning_rate": 1.5120411363485758e-05, - "loss": 1.1321, + "epoch": 0.7051680316294832, + "grad_norm": 2.917869695438468, + "learning_rate": 1.7875015834422113e-05, + "loss": 0.9402, "step": 4994 }, { - "epoch": 1.0473893898091844, - "grad_norm": 7.726540855829647, - "learning_rate": 1.5118466643875603e-05, - "loss": 0.6866, + "epoch": 0.705309234679469, + "grad_norm": 4.054121132892206, + "learning_rate": 1.787407620331084e-05, + "loss": 1.1962, "step": 4995 }, { - "epoch": 1.0475990773747117, - "grad_norm": 7.58518523649189, - "learning_rate": 1.5116521661935153e-05, - "loss": 0.9859, + "epoch": 0.7054504377294549, + "grad_norm": 3.1935513773725073, + "learning_rate": 1.7873136389209435e-05, + "loss": 0.9174, "step": 4996 }, { - "epoch": 1.047808764940239, - "grad_norm": 5.823361326959982, - "learning_rate": 1.5114576417764095e-05, - "loss": 0.6076, + "epoch": 0.7055916407794408, + "grad_norm": 5.998002559019988, + "learning_rate": 1.7872196392139745e-05, + "loss": 1.3476, "step": 4997 }, { - "epoch": 1.0480184525057663, - "grad_norm": 6.635570726572421, - "learning_rate": 1.5112630911462124e-05, - "loss": 0.7668, + "epoch": 0.7057328438294267, + "grad_norm": 2.8882888103723654, + "learning_rate": 1.7871256212123605e-05, + "loss": 0.8645, "step": 4998 }, { - "epoch": 1.0482281400712938, - "grad_norm": 7.485108338377767, - "learning_rate": 1.511068514312895e-05, - "loss": 0.8497, + "epoch": 0.7058740468794126, + "grad_norm": 3.430810723438634, + "learning_rate": 1.7870315849182874e-05, + "loss": 1.0197, "step": 4999 }, { - "epoch": 1.0484378276368211, - "grad_norm": 7.858796476170127, - "learning_rate": 1.5108739112864301e-05, - "loss": 0.9937, + "epoch": 0.7060152499293985, + "grad_norm": 3.5172910882865307, + "learning_rate": 1.78693753033394e-05, + "loss": 1.0992, "step": 5000 }, { - "epoch": 1.0486475152023484, - "grad_norm": 6.761089314386591, - "learning_rate": 1.510679282076791e-05, - "loss": 0.736, + "epoch": 0.7061564529793843, + "grad_norm": 4.414330373432635, + "learning_rate": 1.7868434574615042e-05, + "loss": 1.0446, "step": 5001 }, { - "epoch": 1.0488572027678758, - "grad_norm": 8.09648809458204, - "learning_rate": 1.5104846266939532e-05, - "loss": 0.9433, + "epoch": 0.7062976560293702, + "grad_norm": 3.9425613439416622, + "learning_rate": 1.7867493663031664e-05, + "loss": 1.3116, "step": 5002 }, { - "epoch": 1.0490668903334033, - "grad_norm": 5.799159783945289, - "learning_rate": 1.510289945147893e-05, - "loss": 0.7523, + "epoch": 0.7064388590793561, + "grad_norm": 3.934442679748222, + "learning_rate": 1.7866552568611132e-05, + "loss": 1.0943, "step": 5003 }, { - "epoch": 1.0492765778989306, - "grad_norm": 7.207518715900732, - "learning_rate": 1.5100952374485877e-05, - "loss": 0.9175, + "epoch": 0.706580062129342, + "grad_norm": 3.9271576494670213, + "learning_rate": 1.7865611291375313e-05, + "loss": 1.1351, "step": 5004 }, { - "epoch": 1.049486265464458, - "grad_norm": 6.233520566678287, - "learning_rate": 1.509900503606017e-05, - "loss": 0.849, + "epoch": 0.7067212651793279, + "grad_norm": 3.4389659743332732, + "learning_rate": 1.7864669831346084e-05, + "loss": 0.8963, "step": 5005 }, { - "epoch": 1.0496959530299854, - "grad_norm": 6.7338206586902265, - "learning_rate": 1.509705743630161e-05, - "loss": 0.6924, + "epoch": 0.7068624682293138, + "grad_norm": 4.212484468729581, + "learning_rate": 1.7863728188545326e-05, + "loss": 1.0828, "step": 5006 }, { - "epoch": 1.0499056405955127, - "grad_norm": 6.656545986727053, - "learning_rate": 1.5095109575310014e-05, - "loss": 0.9097, + "epoch": 0.7070036712792996, + "grad_norm": 4.963815904634519, + "learning_rate": 1.7862786362994922e-05, + "loss": 1.3729, "step": 5007 }, { - "epoch": 1.05011532816104, - "grad_norm": 6.4659190885520355, - "learning_rate": 1.5093161453185219e-05, - "loss": 0.8672, + "epoch": 0.7071448743292855, + "grad_norm": 3.357223101408097, + "learning_rate": 1.7861844354716757e-05, + "loss": 1.1, "step": 5008 }, { - "epoch": 1.0503250157265673, - "grad_norm": 7.655913405658616, - "learning_rate": 1.5091213070027064e-05, - "loss": 0.9757, + "epoch": 0.7072860773792714, + "grad_norm": 3.505451631658313, + "learning_rate": 1.7860902163732725e-05, + "loss": 1.0501, "step": 5009 }, { - "epoch": 1.0505347032920949, - "grad_norm": 6.638035837193669, - "learning_rate": 1.5089264425935408e-05, - "loss": 0.8311, + "epoch": 0.7074272804292573, + "grad_norm": 3.6379377163761486, + "learning_rate": 1.7859959790064723e-05, + "loss": 1.145, "step": 5010 }, { - "epoch": 1.0507443908576222, - "grad_norm": 7.331557028874784, - "learning_rate": 1.5087315521010127e-05, - "loss": 0.9073, + "epoch": 0.7075684834792432, + "grad_norm": 3.488825591389287, + "learning_rate": 1.785901723373465e-05, + "loss": 0.9841, "step": 5011 }, { - "epoch": 1.0509540784231495, - "grad_norm": 6.625284771376989, - "learning_rate": 1.5085366355351099e-05, - "loss": 0.7779, + "epoch": 0.707709686529229, + "grad_norm": 3.9156291433782573, + "learning_rate": 1.7858074494764406e-05, + "loss": 1.0521, "step": 5012 }, { - "epoch": 1.0511637659886768, - "grad_norm": 6.222845317999916, - "learning_rate": 1.5083416929058223e-05, - "loss": 0.8391, + "epoch": 0.7078508895792149, + "grad_norm": 4.380465896517571, + "learning_rate": 1.7857131573175906e-05, + "loss": 1.0925, "step": 5013 }, { - "epoch": 1.0513734535542043, - "grad_norm": 7.473935522760192, - "learning_rate": 1.5081467242231415e-05, - "loss": 0.8763, + "epoch": 0.7079920926292008, + "grad_norm": 4.086376363375203, + "learning_rate": 1.7856188468991064e-05, + "loss": 1.1517, "step": 5014 }, { - "epoch": 1.0515831411197316, - "grad_norm": 6.795897657731762, - "learning_rate": 1.5079517294970601e-05, - "loss": 0.7929, + "epoch": 0.7081332956791867, + "grad_norm": 3.6495049561481427, + "learning_rate": 1.7855245182231794e-05, + "loss": 0.906, "step": 5015 }, { - "epoch": 1.051792828685259, - "grad_norm": 6.486017232070313, - "learning_rate": 1.5077567087375708e-05, - "loss": 0.939, + "epoch": 0.7082744987291726, + "grad_norm": 3.635346614516239, + "learning_rate": 1.785430171292002e-05, + "loss": 1.1495, "step": 5016 }, { - "epoch": 1.0520025162507862, - "grad_norm": 7.552929790130162, - "learning_rate": 1.5075616619546702e-05, - "loss": 0.9303, + "epoch": 0.7084157017791585, + "grad_norm": 3.0679473220294065, + "learning_rate": 1.785335806107766e-05, + "loss": 0.852, "step": 5017 }, { - "epoch": 1.0522122038163138, - "grad_norm": 7.005488479592218, - "learning_rate": 1.5073665891583538e-05, - "loss": 0.8501, + "epoch": 0.7085569048291444, + "grad_norm": 3.2887875834953566, + "learning_rate": 1.7852414226726654e-05, + "loss": 1.0946, "step": 5018 }, { - "epoch": 1.052421891381841, - "grad_norm": 6.447850865274571, - "learning_rate": 1.5071714903586196e-05, - "loss": 0.9588, + "epoch": 0.7086981078791302, + "grad_norm": 3.1590412233372263, + "learning_rate": 1.7851470209888938e-05, + "loss": 1.006, "step": 5019 }, { - "epoch": 1.0526315789473684, - "grad_norm": 7.881188345711636, - "learning_rate": 1.5069763655654672e-05, - "loss": 0.9151, + "epoch": 0.7088393109291161, + "grad_norm": 3.057353740719704, + "learning_rate": 1.7850526010586437e-05, + "loss": 0.7995, "step": 5020 }, { - "epoch": 1.052841266512896, - "grad_norm": 8.856306952193773, - "learning_rate": 1.5067812147888967e-05, - "loss": 0.9757, + "epoch": 0.708980513979102, + "grad_norm": 2.941570959866705, + "learning_rate": 1.7849581628841106e-05, + "loss": 0.7424, "step": 5021 }, { - "epoch": 1.0530509540784232, - "grad_norm": 7.510488000659729, - "learning_rate": 1.50658603803891e-05, - "loss": 0.8956, + "epoch": 0.7091217170290879, + "grad_norm": 3.415629161590476, + "learning_rate": 1.7848637064674887e-05, + "loss": 0.8821, "step": 5022 }, { - "epoch": 1.0532606416439505, - "grad_norm": 6.994093836212675, - "learning_rate": 1.50639083532551e-05, - "loss": 0.9696, + "epoch": 0.7092629200790737, + "grad_norm": 3.351485111044392, + "learning_rate": 1.7847692318109732e-05, + "loss": 0.8917, "step": 5023 }, { - "epoch": 1.0534703292094778, - "grad_norm": 8.651994042616476, - "learning_rate": 1.5061956066587015e-05, - "loss": 1.1563, + "epoch": 0.7094041231290595, + "grad_norm": 3.2219728283150033, + "learning_rate": 1.78467473891676e-05, + "loss": 0.8681, "step": 5024 }, { - "epoch": 1.0536800167750053, - "grad_norm": 7.38758974472631, - "learning_rate": 1.5060003520484903e-05, - "loss": 0.8339, + "epoch": 0.7095453261790454, + "grad_norm": 4.686022726707394, + "learning_rate": 1.7845802277870442e-05, + "loss": 1.4153, "step": 5025 }, { - "epoch": 1.0538897043405326, - "grad_norm": 7.112098953810162, - "learning_rate": 1.5058050715048831e-05, - "loss": 0.8215, + "epoch": 0.7096865292290313, + "grad_norm": 3.587896315770786, + "learning_rate": 1.784485698424023e-05, + "loss": 0.9595, "step": 5026 }, { - "epoch": 1.05409939190606, - "grad_norm": 6.159410725172554, - "learning_rate": 1.5056097650378888e-05, - "loss": 0.663, + "epoch": 0.7098277322790172, + "grad_norm": 4.563153552415818, + "learning_rate": 1.784391150829893e-05, + "loss": 1.1391, "step": 5027 }, { - "epoch": 1.0543090794715873, - "grad_norm": 6.532512547897546, - "learning_rate": 1.5054144326575175e-05, - "loss": 0.6908, + "epoch": 0.7099689353290031, + "grad_norm": 3.99692112190469, + "learning_rate": 1.784296585006851e-05, + "loss": 1.2172, "step": 5028 }, { - "epoch": 1.0545187670371148, - "grad_norm": 7.456324328260314, - "learning_rate": 1.5052190743737795e-05, - "loss": 0.785, + "epoch": 0.710110138378989, + "grad_norm": 4.741025411681182, + "learning_rate": 1.7842020009570955e-05, + "loss": 1.4888, "step": 5029 }, { - "epoch": 1.054728454602642, - "grad_norm": 7.585509049714016, - "learning_rate": 1.5050236901966883e-05, - "loss": 0.8609, + "epoch": 0.7102513414289748, + "grad_norm": 3.7659797914817594, + "learning_rate": 1.784107398682824e-05, + "loss": 1.1441, "step": 5030 }, { - "epoch": 1.0549381421681694, - "grad_norm": 7.14029506906107, - "learning_rate": 1.5048282801362564e-05, - "loss": 0.8689, + "epoch": 0.7103925444789607, + "grad_norm": 3.534957265534807, + "learning_rate": 1.7840127781862354e-05, + "loss": 1.0008, "step": 5031 }, { - "epoch": 1.0551478297336967, - "grad_norm": 6.972872239996281, - "learning_rate": 1.5046328442025e-05, - "loss": 0.9703, + "epoch": 0.7105337475289466, + "grad_norm": 3.524731622798178, + "learning_rate": 1.7839181394695285e-05, + "loss": 0.8784, "step": 5032 }, { - "epoch": 1.0553575172992242, - "grad_norm": 6.417710484806855, - "learning_rate": 1.5044373824054348e-05, - "loss": 0.6173, + "epoch": 0.7106749505789325, + "grad_norm": 2.9918341177888887, + "learning_rate": 1.7838234825349023e-05, + "loss": 0.971, "step": 5033 }, { - "epoch": 1.0555672048647515, - "grad_norm": 7.509771623100593, - "learning_rate": 1.504241894755079e-05, - "loss": 1.0538, + "epoch": 0.7108161536289184, + "grad_norm": 3.190053295666956, + "learning_rate": 1.7837288073845566e-05, + "loss": 0.8066, "step": 5034 }, { - "epoch": 1.0557768924302788, - "grad_norm": 6.664900168344534, - "learning_rate": 1.5040463812614514e-05, - "loss": 0.7029, + "epoch": 0.7109573566789043, + "grad_norm": 3.6128173505441077, + "learning_rate": 1.7836341140206924e-05, + "loss": 1.1073, "step": 5035 }, { - "epoch": 1.0559865799958064, - "grad_norm": 8.417358870883497, - "learning_rate": 1.5038508419345726e-05, - "loss": 0.9586, + "epoch": 0.7110985597288901, + "grad_norm": 3.2123705674930005, + "learning_rate": 1.7835394024455097e-05, + "loss": 0.9778, "step": 5036 }, { - "epoch": 1.0561962675613337, - "grad_norm": 8.533497464112534, - "learning_rate": 1.5036552767844641e-05, - "loss": 0.9426, + "epoch": 0.711239762778876, + "grad_norm": 3.4135099829700737, + "learning_rate": 1.78344467266121e-05, + "loss": 1.0221, "step": 5037 }, { - "epoch": 1.056405955126861, - "grad_norm": 8.999010077149382, - "learning_rate": 1.5034596858211496e-05, - "loss": 1.0272, + "epoch": 0.7113809658288619, + "grad_norm": 3.2736195231841223, + "learning_rate": 1.783349924669994e-05, + "loss": 0.9635, "step": 5038 }, { - "epoch": 1.0566156426923883, - "grad_norm": 7.897554840193791, - "learning_rate": 1.5032640690546524e-05, - "loss": 0.9727, + "epoch": 0.7115221688788478, + "grad_norm": 3.2241616495603385, + "learning_rate": 1.783255158474064e-05, + "loss": 0.9324, "step": 5039 }, { - "epoch": 1.0568253302579158, - "grad_norm": 6.860248678936019, - "learning_rate": 1.5030684264949993e-05, - "loss": 0.9517, + "epoch": 0.7116633719288337, + "grad_norm": 4.154404588747539, + "learning_rate": 1.7831603740756223e-05, + "loss": 1.1938, "step": 5040 }, { - "epoch": 1.0570350178234431, - "grad_norm": 10.069039612628313, - "learning_rate": 1.5028727581522167e-05, - "loss": 1.3065, + "epoch": 0.7118045749788195, + "grad_norm": 3.3759124875802358, + "learning_rate": 1.7830655714768717e-05, + "loss": 0.9166, "step": 5041 }, { - "epoch": 1.0572447053889704, - "grad_norm": 6.314865799990083, - "learning_rate": 1.5026770640363328e-05, - "loss": 0.7154, + "epoch": 0.7119457780288054, + "grad_norm": 5.793011529970844, + "learning_rate": 1.7829707506800157e-05, + "loss": 1.1978, "step": 5042 }, { - "epoch": 1.0574543929544977, - "grad_norm": 8.158003975188912, - "learning_rate": 1.5024813441573777e-05, - "loss": 1.1046, + "epoch": 0.7120869810787913, + "grad_norm": 3.522288964839806, + "learning_rate": 1.7828759116872575e-05, + "loss": 0.9847, "step": 5043 }, { - "epoch": 1.0576640805200253, - "grad_norm": 6.532942462505225, - "learning_rate": 1.5022855985253823e-05, - "loss": 0.8287, + "epoch": 0.7122281841287772, + "grad_norm": 3.7627963890101475, + "learning_rate": 1.782781054500801e-05, + "loss": 1.1176, "step": 5044 }, { - "epoch": 1.0578737680855526, - "grad_norm": 8.8281936400201, - "learning_rate": 1.5020898271503785e-05, - "loss": 1.0699, + "epoch": 0.7123693871787631, + "grad_norm": 3.4483716001159066, + "learning_rate": 1.7826861791228516e-05, + "loss": 0.9441, "step": 5045 }, { - "epoch": 1.0580834556510799, - "grad_norm": 7.351426957154842, - "learning_rate": 1.5018940300424005e-05, - "loss": 0.8466, + "epoch": 0.712510590228749, + "grad_norm": 3.541461830271571, + "learning_rate": 1.782591285555613e-05, + "loss": 1.0603, "step": 5046 }, { - "epoch": 1.0582931432166072, - "grad_norm": 8.57503969611497, - "learning_rate": 1.501698207211483e-05, - "loss": 1.0748, + "epoch": 0.7126517932787348, + "grad_norm": 3.8626755324588897, + "learning_rate": 1.7824963738012907e-05, + "loss": 1.1205, "step": 5047 }, { - "epoch": 1.0585028307821347, - "grad_norm": 5.502480170194385, - "learning_rate": 1.5015023586676621e-05, - "loss": 0.6201, + "epoch": 0.7127929963287207, + "grad_norm": 3.5992067680540565, + "learning_rate": 1.7824014438620906e-05, + "loss": 0.83, "step": 5048 }, { - "epoch": 1.058712518347662, - "grad_norm": 6.358821766921522, - "learning_rate": 1.5013064844209754e-05, - "loss": 0.8283, + "epoch": 0.7129341993787066, + "grad_norm": 3.1195219912610805, + "learning_rate": 1.782306495740219e-05, + "loss": 0.8612, "step": 5049 }, { - "epoch": 1.0589222059131893, - "grad_norm": 6.7537490273196426, - "learning_rate": 1.5011105844814622e-05, - "loss": 0.7816, + "epoch": 0.7130754024286925, + "grad_norm": 3.7175836391658903, + "learning_rate": 1.7822115294378824e-05, + "loss": 1.3601, "step": 5050 }, { - "epoch": 1.0591318934787166, - "grad_norm": 6.905860820329714, - "learning_rate": 1.5009146588591619e-05, - "loss": 1.0077, + "epoch": 0.7132166054786784, + "grad_norm": 3.7838242310272285, + "learning_rate": 1.7821165449572873e-05, + "loss": 1.1783, "step": 5051 }, { - "epoch": 1.0593415810442441, - "grad_norm": 6.843285441417043, - "learning_rate": 1.5007187075641172e-05, - "loss": 0.8985, + "epoch": 0.7133578085286643, + "grad_norm": 3.1669302770651684, + "learning_rate": 1.7820215423006418e-05, + "loss": 0.9582, "step": 5052 }, { - "epoch": 1.0595512686097714, - "grad_norm": 6.686383296745393, - "learning_rate": 1.5005227306063699e-05, - "loss": 0.9319, + "epoch": 0.7134990115786501, + "grad_norm": 4.404494648621697, + "learning_rate": 1.7819265214701532e-05, + "loss": 1.13, "step": 5053 }, { - "epoch": 1.0597609561752988, - "grad_norm": 7.239350534445047, - "learning_rate": 1.5003267279959646e-05, - "loss": 0.7826, + "epoch": 0.713640214628636, + "grad_norm": 5.172759099502781, + "learning_rate": 1.78183148246803e-05, + "loss": 1.1508, "step": 5054 }, { - "epoch": 1.0599706437408263, - "grad_norm": 7.479503429119001, - "learning_rate": 1.5001306997429467e-05, - "loss": 0.8067, + "epoch": 0.7137814176786219, + "grad_norm": 3.2643971305389443, + "learning_rate": 1.781736425296481e-05, + "loss": 0.9786, "step": 5055 }, { - "epoch": 1.0601803313063536, - "grad_norm": 8.055862482582052, - "learning_rate": 1.4999346458573629e-05, - "loss": 0.7961, + "epoch": 0.7139226207286078, + "grad_norm": 3.671970175994379, + "learning_rate": 1.7816413499577146e-05, + "loss": 1.2046, "step": 5056 }, { - "epoch": 1.060390018871881, - "grad_norm": 6.9712515815905265, - "learning_rate": 1.4997385663492616e-05, - "loss": 0.7604, + "epoch": 0.7140638237785936, + "grad_norm": 3.259658493142142, + "learning_rate": 1.781546256453941e-05, + "loss": 0.8722, "step": 5057 }, { - "epoch": 1.0605997064374082, - "grad_norm": 7.230681709654834, - "learning_rate": 1.499542461228692e-05, - "loss": 1.0907, + "epoch": 0.7142050268285794, + "grad_norm": 3.291482008584362, + "learning_rate": 1.78145114478737e-05, + "loss": 0.9473, "step": 5058 }, { - "epoch": 1.0608093940029357, - "grad_norm": 7.024082197874813, - "learning_rate": 1.4993463305057048e-05, - "loss": 0.8435, + "epoch": 0.7143462298785653, + "grad_norm": 3.75874963431193, + "learning_rate": 1.781356014960212e-05, + "loss": 1.1589, "step": 5059 }, { - "epoch": 1.061019081568463, - "grad_norm": 6.697058648888024, - "learning_rate": 1.4991501741903519e-05, - "loss": 0.7056, + "epoch": 0.7144874329285512, + "grad_norm": 3.442225654403889, + "learning_rate": 1.7812608669746774e-05, + "loss": 0.9354, "step": 5060 }, { - "epoch": 1.0612287691339903, - "grad_norm": 6.932821685224161, - "learning_rate": 1.4989539922926872e-05, - "loss": 0.8605, + "epoch": 0.7146286359785371, + "grad_norm": 3.284929141986392, + "learning_rate": 1.7811657008329776e-05, + "loss": 1.0478, "step": 5061 }, { - "epoch": 1.0614384566995176, - "grad_norm": 6.357173665353515, - "learning_rate": 1.4987577848227646e-05, - "loss": 0.7406, + "epoch": 0.714769839028523, + "grad_norm": 3.6133507544518886, + "learning_rate": 1.7810705165373245e-05, + "loss": 0.9145, "step": 5062 }, { - "epoch": 1.0616481442650452, - "grad_norm": 6.3688478718411945, - "learning_rate": 1.4985615517906409e-05, - "loss": 0.599, + "epoch": 0.7149110420785089, + "grad_norm": 3.218780051137876, + "learning_rate": 1.78097531408993e-05, + "loss": 0.8709, "step": 5063 }, { - "epoch": 1.0618578318305725, - "grad_norm": 6.7535861735399285, - "learning_rate": 1.4983652932063728e-05, - "loss": 0.7684, + "epoch": 0.7150522451284947, + "grad_norm": 3.558911364493531, + "learning_rate": 1.7808800934930062e-05, + "loss": 1.0947, "step": 5064 }, { - "epoch": 1.0620675193960998, - "grad_norm": 7.118259871648339, - "learning_rate": 1.4981690090800189e-05, - "loss": 0.8294, + "epoch": 0.7151934481784806, + "grad_norm": 3.5387865257996407, + "learning_rate": 1.7807848547487664e-05, + "loss": 1.1608, "step": 5065 }, { - "epoch": 1.062277206961627, - "grad_norm": 7.4079827978135295, - "learning_rate": 1.4979726994216395e-05, - "loss": 0.8515, + "epoch": 0.7153346512284665, + "grad_norm": 3.8036642634771423, + "learning_rate": 1.7806895978594237e-05, + "loss": 1.1467, "step": 5066 }, { - "epoch": 1.0624868945271546, - "grad_norm": 5.6860573798892275, - "learning_rate": 1.4977763642412955e-05, - "loss": 0.7886, + "epoch": 0.7154758542784524, + "grad_norm": 3.458201833981553, + "learning_rate": 1.780594322827192e-05, + "loss": 0.9128, "step": 5067 }, { - "epoch": 1.062696582092682, - "grad_norm": 7.193377330104058, - "learning_rate": 1.4975800035490492e-05, - "loss": 0.9114, + "epoch": 0.7156170573284383, + "grad_norm": 2.9247954961783695, + "learning_rate": 1.7804990296542856e-05, + "loss": 0.8129, "step": 5068 }, { - "epoch": 1.0629062696582092, - "grad_norm": 6.107500476685139, - "learning_rate": 1.4973836173549652e-05, - "loss": 0.902, + "epoch": 0.7157582603784242, + "grad_norm": 3.881738686810293, + "learning_rate": 1.7804037183429185e-05, + "loss": 1.1715, "step": 5069 }, { - "epoch": 1.0631159572237365, - "grad_norm": 6.679559077595515, - "learning_rate": 1.497187205669108e-05, - "loss": 0.7374, + "epoch": 0.71589946342841, + "grad_norm": 3.8123525999979977, + "learning_rate": 1.7803083888953058e-05, + "loss": 1.1961, "step": 5070 }, { - "epoch": 1.063325644789264, - "grad_norm": 7.884939057637599, - "learning_rate": 1.4969907685015439e-05, - "loss": 0.9913, + "epoch": 0.7160406664783959, + "grad_norm": 3.636348554253174, + "learning_rate": 1.7802130413136636e-05, + "loss": 1.093, "step": 5071 }, { - "epoch": 1.0635353323547914, - "grad_norm": 7.17073178191822, - "learning_rate": 1.4967943058623412e-05, - "loss": 0.8556, + "epoch": 0.7161818695283818, + "grad_norm": 2.995315705611547, + "learning_rate": 1.780117675600207e-05, + "loss": 0.8119, "step": 5072 }, { - "epoch": 1.0637450199203187, - "grad_norm": 8.670535533144832, - "learning_rate": 1.4965978177615686e-05, - "loss": 0.9672, + "epoch": 0.7163230725783677, + "grad_norm": 3.1185258477851607, + "learning_rate": 1.7800222917571526e-05, + "loss": 0.7732, "step": 5073 }, { - "epoch": 1.0639547074858462, - "grad_norm": 7.605005697878998, - "learning_rate": 1.4964013042092962e-05, - "loss": 0.7535, + "epoch": 0.7164642756283536, + "grad_norm": 3.0746745441981016, + "learning_rate": 1.779926889786717e-05, + "loss": 0.7486, "step": 5074 }, { - "epoch": 1.0641643950513735, - "grad_norm": 7.131441873556847, - "learning_rate": 1.4962047652155964e-05, - "loss": 0.916, + "epoch": 0.7166054786783395, + "grad_norm": 4.174964883702475, + "learning_rate": 1.779831469691117e-05, + "loss": 1.2721, "step": 5075 }, { - "epoch": 1.0643740826169008, - "grad_norm": 6.243887917670286, - "learning_rate": 1.4960082007905416e-05, - "loss": 0.8018, + "epoch": 0.7167466817283253, + "grad_norm": 3.1536227369832046, + "learning_rate": 1.7797360314725707e-05, + "loss": 0.9134, "step": 5076 }, { - "epoch": 1.0645837701824281, - "grad_norm": 8.721087733050263, - "learning_rate": 1.4958116109442063e-05, - "loss": 1.2262, + "epoch": 0.7168878847783112, + "grad_norm": 3.2854668208042233, + "learning_rate": 1.779640575133296e-05, + "loss": 0.8518, "step": 5077 }, { - "epoch": 1.0647934577479556, - "grad_norm": 6.6928629166504505, - "learning_rate": 1.4956149956866659e-05, - "loss": 0.9322, + "epoch": 0.7170290878282971, + "grad_norm": 3.4837145114725363, + "learning_rate": 1.779545100675511e-05, + "loss": 1.1309, "step": 5078 }, { - "epoch": 1.065003145313483, - "grad_norm": 8.22266428509861, - "learning_rate": 1.4954183550279975e-05, - "loss": 0.901, + "epoch": 0.717170290878283, + "grad_norm": 3.7979437586953613, + "learning_rate": 1.779449608101434e-05, + "loss": 1.2562, "step": 5079 }, { - "epoch": 1.0652128328790103, - "grad_norm": 9.054091447716381, - "learning_rate": 1.4952216889782791e-05, - "loss": 0.9547, + "epoch": 0.7173114939282689, + "grad_norm": 4.0271371633795345, + "learning_rate": 1.779354097413285e-05, + "loss": 1.2333, "step": 5080 }, { - "epoch": 1.0654225204445376, - "grad_norm": 6.729182647674046, - "learning_rate": 1.4950249975475904e-05, - "loss": 0.7237, + "epoch": 0.7174526969782548, + "grad_norm": 3.6100887282124137, + "learning_rate": 1.7792585686132837e-05, + "loss": 1.0177, "step": 5081 }, { - "epoch": 1.065632208010065, - "grad_norm": 7.454025735448186, - "learning_rate": 1.4948282807460116e-05, - "loss": 0.9305, + "epoch": 0.7175939000282406, + "grad_norm": 3.9297868860480603, + "learning_rate": 1.7791630217036492e-05, + "loss": 1.0506, "step": 5082 }, { - "epoch": 1.0658418955755924, - "grad_norm": 8.24670161997894, - "learning_rate": 1.4946315385836256e-05, - "loss": 0.9521, + "epoch": 0.7177351030782265, + "grad_norm": 3.2609509996038835, + "learning_rate": 1.779067456686603e-05, + "loss": 1.0649, "step": 5083 }, { - "epoch": 1.0660515831411197, - "grad_norm": 6.56928389236455, - "learning_rate": 1.4944347710705151e-05, - "loss": 0.797, + "epoch": 0.7178763061282124, + "grad_norm": 3.5556084944592437, + "learning_rate": 1.7789718735643655e-05, + "loss": 1.162, "step": 5084 }, { - "epoch": 1.066261270706647, - "grad_norm": 7.482313168282947, - "learning_rate": 1.494237978216765e-05, - "loss": 0.9292, + "epoch": 0.7180175091781983, + "grad_norm": 3.2958968160034234, + "learning_rate": 1.778876272339158e-05, + "loss": 0.902, "step": 5085 }, { - "epoch": 1.0664709582721745, - "grad_norm": 8.32617495145479, - "learning_rate": 1.4940411600324613e-05, - "loss": 0.9727, + "epoch": 0.7181587122281842, + "grad_norm": 3.071537562898388, + "learning_rate": 1.7787806530132022e-05, + "loss": 0.8784, "step": 5086 }, { - "epoch": 1.0666806458377018, - "grad_norm": 7.968060626336125, - "learning_rate": 1.4938443165276914e-05, - "loss": 0.7656, + "epoch": 0.71829991527817, + "grad_norm": 3.661685776591576, + "learning_rate": 1.7786850155887206e-05, + "loss": 1.0425, "step": 5087 }, { - "epoch": 1.0668903334032291, - "grad_norm": 6.859134346845168, - "learning_rate": 1.4936474477125438e-05, - "loss": 0.8678, + "epoch": 0.7184411183281559, + "grad_norm": 3.686946783694264, + "learning_rate": 1.7785893600679353e-05, + "loss": 0.9308, "step": 5088 }, { - "epoch": 1.0671000209687564, - "grad_norm": 7.6585384264847445, - "learning_rate": 1.493450553597108e-05, - "loss": 0.9392, + "epoch": 0.7185823213781418, + "grad_norm": 2.9861377903476454, + "learning_rate": 1.7784936864530698e-05, + "loss": 0.9348, "step": 5089 }, { - "epoch": 1.067309708534284, - "grad_norm": 7.289850863910213, - "learning_rate": 1.4932536341914761e-05, - "loss": 0.7779, + "epoch": 0.7187235244281277, + "grad_norm": 3.5021656173929014, + "learning_rate": 1.778397994746347e-05, + "loss": 1.019, "step": 5090 }, { - "epoch": 1.0675193960998113, - "grad_norm": 7.924613416317872, - "learning_rate": 1.4930566895057394e-05, - "loss": 0.9352, + "epoch": 0.7188647274781135, + "grad_norm": 3.9235734904843844, + "learning_rate": 1.778302284949991e-05, + "loss": 1.2335, "step": 5091 }, { - "epoch": 1.0677290836653386, - "grad_norm": 5.74181356672226, - "learning_rate": 1.4928597195499924e-05, - "loss": 0.644, + "epoch": 0.7190059305280994, + "grad_norm": 3.709096823737312, + "learning_rate": 1.7782065570662263e-05, + "loss": 1.0457, "step": 5092 }, { - "epoch": 1.0679387712308661, - "grad_norm": 7.874459601254827, - "learning_rate": 1.4926627243343297e-05, - "loss": 0.8588, + "epoch": 0.7191471335780852, + "grad_norm": 4.607491061376305, + "learning_rate": 1.7781108110972768e-05, + "loss": 1.1687, "step": 5093 }, { - "epoch": 1.0681484587963934, - "grad_norm": 7.289917142427254, - "learning_rate": 1.4924657038688481e-05, - "loss": 0.8362, + "epoch": 0.7192883366280711, + "grad_norm": 3.501245519790058, + "learning_rate": 1.7780150470453682e-05, + "loss": 0.8517, "step": 5094 }, { - "epoch": 1.0683581463619207, - "grad_norm": 8.110344871274794, - "learning_rate": 1.492268658163645e-05, - "loss": 0.9563, + "epoch": 0.719429539678057, + "grad_norm": 3.9407611820845374, + "learning_rate": 1.7779192649127262e-05, + "loss": 1.2203, "step": 5095 }, { - "epoch": 1.068567833927448, - "grad_norm": 7.476859412680474, - "learning_rate": 1.4920715872288195e-05, - "loss": 0.8758, + "epoch": 0.7195707427280429, + "grad_norm": 3.8159200515798406, + "learning_rate": 1.777823464701576e-05, + "loss": 1.0593, "step": 5096 }, { - "epoch": 1.0687775214929756, - "grad_norm": 7.441152927512292, - "learning_rate": 1.4918744910744716e-05, - "loss": 0.9042, + "epoch": 0.7197119457780288, + "grad_norm": 4.041044550350458, + "learning_rate": 1.777727646414145e-05, + "loss": 1.1471, "step": 5097 }, { - "epoch": 1.0689872090585029, - "grad_norm": 8.915858789424579, - "learning_rate": 1.491677369710703e-05, - "loss": 0.9766, + "epoch": 0.7198531488280147, + "grad_norm": 3.9478324406004552, + "learning_rate": 1.777631810052659e-05, + "loss": 1.0179, "step": 5098 }, { - "epoch": 1.0691968966240302, - "grad_norm": 6.306695900654438, - "learning_rate": 1.4914802231476161e-05, - "loss": 0.6519, + "epoch": 0.7199943518780005, + "grad_norm": 3.3538356267017724, + "learning_rate": 1.7775359556193455e-05, + "loss": 0.8945, "step": 5099 }, { - "epoch": 1.0694065841895575, - "grad_norm": 6.920464718882883, - "learning_rate": 1.4912830513953158e-05, - "loss": 0.7845, + "epoch": 0.7201355549279864, + "grad_norm": 3.338922118068307, + "learning_rate": 1.777440083116432e-05, + "loss": 0.8278, "step": 5100 }, { - "epoch": 1.069616271755085, - "grad_norm": 6.738444734608047, - "learning_rate": 1.4910858544639068e-05, - "loss": 0.6977, + "epoch": 0.7202767579779723, + "grad_norm": 3.8363885656517036, + "learning_rate": 1.7773441925461473e-05, + "loss": 1.0449, "step": 5101 }, { - "epoch": 1.0698259593206123, - "grad_norm": 6.580371801305737, - "learning_rate": 1.490888632363496e-05, - "loss": 0.8193, + "epoch": 0.7204179610279582, + "grad_norm": 3.1603997632519425, + "learning_rate": 1.777248283910719e-05, + "loss": 0.8904, "step": 5102 }, { - "epoch": 1.0700356468861396, - "grad_norm": 7.450143709152783, - "learning_rate": 1.4906913851041914e-05, - "loss": 1.0196, + "epoch": 0.7205591640779441, + "grad_norm": 3.396208901937564, + "learning_rate": 1.7771523572123763e-05, + "loss": 1.0726, "step": 5103 }, { - "epoch": 1.070245334451667, - "grad_norm": 6.420303412015791, - "learning_rate": 1.4904941126961024e-05, - "loss": 0.7946, + "epoch": 0.72070036712793, + "grad_norm": 2.9810037967954126, + "learning_rate": 1.777056412453348e-05, + "loss": 0.8956, "step": 5104 }, { - "epoch": 1.0704550220171944, - "grad_norm": 7.265128863222921, - "learning_rate": 1.4902968151493391e-05, - "loss": 0.929, + "epoch": 0.7208415701779158, + "grad_norm": 3.503331931999316, + "learning_rate": 1.776960449635865e-05, + "loss": 0.9719, "step": 5105 }, { - "epoch": 1.0706647095827218, - "grad_norm": 6.271818568206735, - "learning_rate": 1.490099492474014e-05, - "loss": 0.7703, + "epoch": 0.7209827732279017, + "grad_norm": 3.4494466982548366, + "learning_rate": 1.776864468762156e-05, + "loss": 1.1753, "step": 5106 }, { - "epoch": 1.070874397148249, - "grad_norm": 7.565624944595501, - "learning_rate": 1.4899021446802395e-05, - "loss": 0.8648, + "epoch": 0.7211239762778876, + "grad_norm": 3.726840993206747, + "learning_rate": 1.776768469834453e-05, + "loss": 1.0176, "step": 5107 }, { - "epoch": 1.0710840847137764, - "grad_norm": 7.460818308482355, - "learning_rate": 1.4897047717781305e-05, - "loss": 0.8472, + "epoch": 0.7212651793278735, + "grad_norm": 3.118234141425858, + "learning_rate": 1.7766724528549856e-05, + "loss": 0.9714, "step": 5108 }, { - "epoch": 1.071293772279304, - "grad_norm": 6.338633125848731, - "learning_rate": 1.4895073737778027e-05, - "loss": 0.8262, + "epoch": 0.7214063823778594, + "grad_norm": 3.124577026653579, + "learning_rate": 1.7765764178259863e-05, + "loss": 0.9835, "step": 5109 }, { - "epoch": 1.0715034598448312, - "grad_norm": 6.326284774133242, - "learning_rate": 1.4893099506893732e-05, - "loss": 0.5766, + "epoch": 0.7215475854278453, + "grad_norm": 3.463917223470069, + "learning_rate": 1.776480364749686e-05, + "loss": 0.9261, "step": 5110 }, { - "epoch": 1.0717131474103585, - "grad_norm": 7.07204757708267, - "learning_rate": 1.4891125025229599e-05, - "loss": 0.8925, + "epoch": 0.7216887884778311, + "grad_norm": 4.960275524805102, + "learning_rate": 1.7763842936283175e-05, + "loss": 1.3761, "step": 5111 }, { - "epoch": 1.071922834975886, - "grad_norm": 6.449350049572894, - "learning_rate": 1.4889150292886825e-05, - "loss": 0.8095, + "epoch": 0.721829991527817, + "grad_norm": 3.7958156803643153, + "learning_rate": 1.7762882044641133e-05, + "loss": 1.0542, "step": 5112 }, { - "epoch": 1.0721325225414133, - "grad_norm": 6.610053039777937, - "learning_rate": 1.4887175309966617e-05, - "loss": 0.9039, + "epoch": 0.7219711945778029, + "grad_norm": 3.812607035413126, + "learning_rate": 1.7761920972593064e-05, + "loss": 1.2381, "step": 5113 }, { - "epoch": 1.0723422101069406, - "grad_norm": 6.804611416538084, - "learning_rate": 1.4885200076570199e-05, - "loss": 0.6904, + "epoch": 0.7221123976277888, + "grad_norm": 3.923127410259805, + "learning_rate": 1.7760959720161306e-05, + "loss": 1.1529, "step": 5114 }, { - "epoch": 1.072551897672468, - "grad_norm": 7.006213050829078, - "learning_rate": 1.4883224592798804e-05, - "loss": 0.8052, + "epoch": 0.7222536006777747, + "grad_norm": 3.294678110341738, + "learning_rate": 1.7759998287368193e-05, + "loss": 0.9417, "step": 5115 }, { - "epoch": 1.0727615852379955, - "grad_norm": 8.352361355838735, - "learning_rate": 1.4881248858753678e-05, - "loss": 1.0357, + "epoch": 0.7223948037277605, + "grad_norm": 3.654121588182083, + "learning_rate": 1.7759036674236074e-05, + "loss": 1.1682, "step": 5116 }, { - "epoch": 1.0729712728035228, - "grad_norm": 8.017781821206086, - "learning_rate": 1.4879272874536085e-05, - "loss": 0.8296, + "epoch": 0.7225360067777464, + "grad_norm": 3.464707382759429, + "learning_rate": 1.775807488078729e-05, + "loss": 1.0189, "step": 5117 }, { - "epoch": 1.07318096036905, - "grad_norm": 7.85931514841387, - "learning_rate": 1.4877296640247292e-05, - "loss": 0.9281, + "epoch": 0.7226772098277323, + "grad_norm": 3.75361908722945, + "learning_rate": 1.77571129070442e-05, + "loss": 1.0253, "step": 5118 }, { - "epoch": 1.0733906479345774, - "grad_norm": 6.351376863936367, - "learning_rate": 1.4875320155988591e-05, - "loss": 0.6532, + "epoch": 0.7228184128777182, + "grad_norm": 4.309776150982811, + "learning_rate": 1.7756150753029154e-05, + "loss": 1.4386, "step": 5119 }, { - "epoch": 1.073600335500105, - "grad_norm": 6.010153268245333, - "learning_rate": 1.4873343421861273e-05, - "loss": 0.7705, + "epoch": 0.7229596159277041, + "grad_norm": 3.7745096111707466, + "learning_rate": 1.7755188418764517e-05, + "loss": 1.0582, "step": 5120 }, { - "epoch": 1.0738100230656322, - "grad_norm": 7.110124894825287, - "learning_rate": 1.487136643796665e-05, - "loss": 0.7884, + "epoch": 0.72310081897769, + "grad_norm": 3.262589332901122, + "learning_rate": 1.7754225904272647e-05, + "loss": 0.973, "step": 5121 }, { - "epoch": 1.0740197106311595, - "grad_norm": 6.640811958766606, - "learning_rate": 1.486938920440605e-05, - "loss": 0.7357, + "epoch": 0.7232420220276758, + "grad_norm": 3.94200604833333, + "learning_rate": 1.7753263209575914e-05, + "loss": 1.0729, "step": 5122 }, { - "epoch": 1.0742293981966868, - "grad_norm": 6.808465627601769, - "learning_rate": 1.4867411721280809e-05, - "loss": 0.7428, + "epoch": 0.7233832250776617, + "grad_norm": 3.5112783456773453, + "learning_rate": 1.7752300334696696e-05, + "loss": 1.0494, "step": 5123 }, { - "epoch": 1.0744390857622144, - "grad_norm": 8.396247758169004, - "learning_rate": 1.4865433988692272e-05, - "loss": 0.9646, + "epoch": 0.7235244281276476, + "grad_norm": 3.5473434477661607, + "learning_rate": 1.7751337279657365e-05, + "loss": 0.9607, "step": 5124 }, { - "epoch": 1.0746487733277417, - "grad_norm": 6.42436686286732, - "learning_rate": 1.486345600674181e-05, - "loss": 0.6844, + "epoch": 0.7236656311776334, + "grad_norm": 3.702047363538064, + "learning_rate": 1.7750374044480306e-05, + "loss": 1.0263, "step": 5125 }, { - "epoch": 1.074858460893269, - "grad_norm": 8.832085868459043, - "learning_rate": 1.4861477775530788e-05, - "loss": 1.271, + "epoch": 0.7238068342276193, + "grad_norm": 3.190481682500292, + "learning_rate": 1.77494106291879e-05, + "loss": 0.798, "step": 5126 }, { - "epoch": 1.0750681484587963, - "grad_norm": 7.417368101446979, - "learning_rate": 1.48594992951606e-05, - "loss": 0.8365, + "epoch": 0.7239480372776051, + "grad_norm": 3.4267621930970407, + "learning_rate": 1.7748447033802533e-05, + "loss": 1.0998, "step": 5127 }, { - "epoch": 1.0752778360243238, - "grad_norm": 6.507268527853079, - "learning_rate": 1.4857520565732644e-05, - "loss": 0.8939, + "epoch": 0.724089240327591, + "grad_norm": 3.5568968669454732, + "learning_rate": 1.7747483258346607e-05, + "loss": 1.1395, "step": 5128 }, { - "epoch": 1.0754875235898511, - "grad_norm": 9.277868070944484, - "learning_rate": 1.4855541587348335e-05, - "loss": 1.1368, + "epoch": 0.7242304433775769, + "grad_norm": 3.13554059679409, + "learning_rate": 1.7746519302842514e-05, + "loss": 0.8204, "step": 5129 }, { - "epoch": 1.0756972111553784, - "grad_norm": 8.11034903735834, - "learning_rate": 1.4853562360109096e-05, - "loss": 0.7677, + "epoch": 0.7243716464275628, + "grad_norm": 3.6085516122329127, + "learning_rate": 1.774555516731266e-05, + "loss": 0.9007, "step": 5130 }, { - "epoch": 1.075906898720906, - "grad_norm": 8.258372820364784, - "learning_rate": 1.4851582884116371e-05, - "loss": 0.9032, + "epoch": 0.7245128494775487, + "grad_norm": 3.1023955675044195, + "learning_rate": 1.774459085177945e-05, + "loss": 0.9034, "step": 5131 }, { - "epoch": 1.0761165862864333, - "grad_norm": 7.014858438406556, - "learning_rate": 1.4849603159471604e-05, - "loss": 0.9535, + "epoch": 0.7246540525275346, + "grad_norm": 3.5405036681756137, + "learning_rate": 1.7743626356265292e-05, + "loss": 1.1103, "step": 5132 }, { - "epoch": 1.0763262738519606, - "grad_norm": 7.145536286340474, - "learning_rate": 1.484762318627627e-05, - "loss": 0.999, + "epoch": 0.7247952555775204, + "grad_norm": 3.5052599388501537, + "learning_rate": 1.77426616807926e-05, + "loss": 1.2092, "step": 5133 }, { - "epoch": 1.0765359614174879, - "grad_norm": 8.180629374728934, - "learning_rate": 1.4845642964631834e-05, - "loss": 1.1019, + "epoch": 0.7249364586275063, + "grad_norm": 3.952595911177927, + "learning_rate": 1.7741696825383797e-05, + "loss": 1.1712, "step": 5134 }, { - "epoch": 1.0767456489830154, - "grad_norm": 6.21694311802288, - "learning_rate": 1.4843662494639795e-05, - "loss": 0.542, + "epoch": 0.7250776616774922, + "grad_norm": 3.9611151880889244, + "learning_rate": 1.77407317900613e-05, + "loss": 1.1648, "step": 5135 }, { - "epoch": 1.0769553365485427, - "grad_norm": 8.549348868479944, - "learning_rate": 1.484168177640165e-05, - "loss": 1.0476, + "epoch": 0.7252188647274781, + "grad_norm": 3.2303050088664125, + "learning_rate": 1.7739766574847542e-05, + "loss": 0.9367, "step": 5136 }, { - "epoch": 1.07716502411407, - "grad_norm": 7.102699427719841, - "learning_rate": 1.483970081001892e-05, - "loss": 0.9204, + "epoch": 0.725360067777464, + "grad_norm": 3.366954061334709, + "learning_rate": 1.773880117976495e-05, + "loss": 1.0085, "step": 5137 }, { - "epoch": 1.0773747116795973, - "grad_norm": 7.855004380346779, - "learning_rate": 1.4837719595593125e-05, - "loss": 0.9728, + "epoch": 0.7255012708274499, + "grad_norm": 4.335838459237132, + "learning_rate": 1.7737835604835962e-05, + "loss": 1.0369, "step": 5138 }, { - "epoch": 1.0775843992451248, - "grad_norm": 6.527366581356953, - "learning_rate": 1.4835738133225814e-05, - "loss": 0.6538, + "epoch": 0.7256424738774357, + "grad_norm": 3.926134625480553, + "learning_rate": 1.7736869850083013e-05, + "loss": 1.2109, "step": 5139 }, { - "epoch": 1.0777940868106521, - "grad_norm": 7.2937101877069015, - "learning_rate": 1.4833756423018537e-05, - "loss": 0.9038, + "epoch": 0.7257836769274216, + "grad_norm": 3.9139457218285965, + "learning_rate": 1.7735903915528553e-05, + "loss": 1.0184, "step": 5140 }, { - "epoch": 1.0780037743761794, - "grad_norm": 6.446587503930849, - "learning_rate": 1.4831774465072855e-05, - "loss": 0.6871, + "epoch": 0.7259248799774075, + "grad_norm": 3.7784900087469144, + "learning_rate": 1.7734937801195027e-05, + "loss": 1.0831, "step": 5141 }, { - "epoch": 1.078213461941707, - "grad_norm": 6.208220676098961, - "learning_rate": 1.4829792259490356e-05, - "loss": 0.8927, + "epoch": 0.7260660830273934, + "grad_norm": 2.6121135854296518, + "learning_rate": 1.7733971507104887e-05, + "loss": 0.7419, "step": 5142 }, { - "epoch": 1.0784231495072343, - "grad_norm": 6.886790982471816, - "learning_rate": 1.4827809806372623e-05, - "loss": 0.8756, + "epoch": 0.7262072860773793, + "grad_norm": 4.05245068669235, + "learning_rate": 1.7733005033280587e-05, + "loss": 1.1432, "step": 5143 }, { - "epoch": 1.0786328370727616, - "grad_norm": 5.6377185807506995, - "learning_rate": 1.4825827105821266e-05, - "loss": 0.6492, + "epoch": 0.7263484891273652, + "grad_norm": 3.7942933763605615, + "learning_rate": 1.7732038379744592e-05, + "loss": 1.0541, "step": 5144 }, { - "epoch": 1.078842524638289, - "grad_norm": 6.048395549560448, - "learning_rate": 1.4823844157937901e-05, - "loss": 0.7291, + "epoch": 0.726489692177351, + "grad_norm": 4.009094162179761, + "learning_rate": 1.7731071546519364e-05, + "loss": 1.2215, "step": 5145 }, { - "epoch": 1.0790522122038162, - "grad_norm": 7.061994221181714, - "learning_rate": 1.4821860962824157e-05, - "loss": 0.8241, + "epoch": 0.7266308952273369, + "grad_norm": 3.8059071421581474, + "learning_rate": 1.773010453362737e-05, + "loss": 1.0558, "step": 5146 }, { - "epoch": 1.0792618997693437, - "grad_norm": 6.403883603231528, - "learning_rate": 1.4819877520581671e-05, - "loss": 0.6336, + "epoch": 0.7267720982773228, + "grad_norm": 3.4347692515434023, + "learning_rate": 1.7729137341091088e-05, + "loss": 0.9663, "step": 5147 }, { - "epoch": 1.079471587334871, - "grad_norm": 8.089186320721302, - "learning_rate": 1.4817893831312107e-05, - "loss": 0.9253, + "epoch": 0.7269133013273087, + "grad_norm": 4.569768528993115, + "learning_rate": 1.772816996893299e-05, + "loss": 0.9832, "step": 5148 }, { - "epoch": 1.0796812749003983, - "grad_norm": 7.199145283844229, - "learning_rate": 1.4815909895117127e-05, - "loss": 0.7524, + "epoch": 0.7270545043772946, + "grad_norm": 3.222475836378053, + "learning_rate": 1.772720241717556e-05, + "loss": 0.8673, "step": 5149 }, { - "epoch": 1.0798909624659259, - "grad_norm": 6.664254077754818, - "learning_rate": 1.4813925712098409e-05, - "loss": 0.6835, + "epoch": 0.7271957074272805, + "grad_norm": 3.435753983737089, + "learning_rate": 1.7726234685841283e-05, + "loss": 0.7729, "step": 5150 }, { - "epoch": 1.0801006500314532, - "grad_norm": 7.2141565789241, - "learning_rate": 1.4811941282357652e-05, - "loss": 0.7738, + "epoch": 0.7273369104772663, + "grad_norm": 3.255167032402089, + "learning_rate": 1.772526677495265e-05, + "loss": 1.0498, "step": 5151 }, { - "epoch": 1.0803103375969805, - "grad_norm": 6.6925461600405365, - "learning_rate": 1.4809956605996561e-05, - "loss": 0.8691, + "epoch": 0.7274781135272522, + "grad_norm": 3.6806673764563254, + "learning_rate": 1.772429868453215e-05, + "loss": 1.1301, "step": 5152 }, { - "epoch": 1.0805200251625078, - "grad_norm": 7.709772242966372, - "learning_rate": 1.4807971683116844e-05, - "loss": 0.8742, + "epoch": 0.7276193165772381, + "grad_norm": 3.8892305824580853, + "learning_rate": 1.7723330414602288e-05, + "loss": 0.947, "step": 5153 }, { - "epoch": 1.0807297127280353, - "grad_norm": 7.215700190095787, - "learning_rate": 1.4805986513820245e-05, - "loss": 0.7748, + "epoch": 0.727760519627224, + "grad_norm": 3.0963346839530925, + "learning_rate": 1.772236196518556e-05, + "loss": 0.9215, "step": 5154 }, { - "epoch": 1.0809394002935626, - "grad_norm": 6.712058906762827, - "learning_rate": 1.4804001098208498e-05, - "loss": 0.6353, + "epoch": 0.7279017226772099, + "grad_norm": 2.933047855785967, + "learning_rate": 1.7721393336304474e-05, + "loss": 0.9151, "step": 5155 }, { - "epoch": 1.08114908785909, - "grad_norm": 7.729957184748364, - "learning_rate": 1.4802015436383366e-05, - "loss": 0.9433, + "epoch": 0.7280429257271958, + "grad_norm": 4.01678990634645, + "learning_rate": 1.7720424527981545e-05, + "loss": 0.9247, "step": 5156 }, { - "epoch": 1.0813587754246172, - "grad_norm": 6.706142351914358, - "learning_rate": 1.4800029528446614e-05, - "loss": 0.7706, + "epoch": 0.7281841287771816, + "grad_norm": 3.402040162913891, + "learning_rate": 1.7719455540239283e-05, + "loss": 1.0262, "step": 5157 }, { - "epoch": 1.0815684629901448, - "grad_norm": 6.56618578158904, - "learning_rate": 1.479804337450002e-05, - "loss": 0.6999, + "epoch": 0.7283253318271675, + "grad_norm": 3.8477869156667377, + "learning_rate": 1.7718486373100207e-05, + "loss": 1.1099, "step": 5158 }, { - "epoch": 1.081778150555672, - "grad_norm": 8.318220758100463, - "learning_rate": 1.4796056974645383e-05, - "loss": 0.9899, + "epoch": 0.7284665348771533, + "grad_norm": 3.498431461414149, + "learning_rate": 1.7717517026586844e-05, + "loss": 0.9643, "step": 5159 }, { - "epoch": 1.0819878381211994, - "grad_norm": 8.57235735455009, - "learning_rate": 1.479407032898451e-05, - "loss": 0.9584, + "epoch": 0.7286077379271392, + "grad_norm": 3.5677650908186234, + "learning_rate": 1.7716547500721715e-05, + "loss": 0.8665, "step": 5160 }, { - "epoch": 1.082197525686727, - "grad_norm": 7.918182685146835, - "learning_rate": 1.4792083437619213e-05, - "loss": 0.8264, + "epoch": 0.7287489409771251, + "grad_norm": 3.8930545598142245, + "learning_rate": 1.7715577795527355e-05, + "loss": 0.8956, "step": 5161 }, { - "epoch": 1.0824072132522542, - "grad_norm": 8.221571827235467, - "learning_rate": 1.4790096300651333e-05, - "loss": 0.9761, + "epoch": 0.7288901440271109, + "grad_norm": 3.2216476353570904, + "learning_rate": 1.77146079110263e-05, + "loss": 0.7613, "step": 5162 }, { - "epoch": 1.0826169008177815, - "grad_norm": 6.368757183741934, - "learning_rate": 1.4788108918182709e-05, - "loss": 0.6081, + "epoch": 0.7290313470770968, + "grad_norm": 4.020711676814464, + "learning_rate": 1.771363784724109e-05, + "loss": 1.0128, "step": 5163 }, { - "epoch": 1.0828265883833088, - "grad_norm": 7.362813489763786, - "learning_rate": 1.4786121290315196e-05, - "loss": 0.9601, + "epoch": 0.7291725501270827, + "grad_norm": 3.137472657516944, + "learning_rate": 1.7712667604194263e-05, + "loss": 0.9861, "step": 5164 }, { - "epoch": 1.0830362759488363, - "grad_norm": 7.5589034034327325, - "learning_rate": 1.4784133417150669e-05, - "loss": 0.9928, + "epoch": 0.7293137531770686, + "grad_norm": 4.31219302904973, + "learning_rate": 1.7711697181908376e-05, + "loss": 1.2672, "step": 5165 }, { - "epoch": 1.0832459635143636, - "grad_norm": 7.269786994971708, - "learning_rate": 1.4782145298791003e-05, - "loss": 0.8423, + "epoch": 0.7294549562270545, + "grad_norm": 4.282622160731298, + "learning_rate": 1.7710726580405977e-05, + "loss": 1.07, "step": 5166 }, { - "epoch": 1.083455651079891, - "grad_norm": 7.652543355106625, - "learning_rate": 1.4780156935338096e-05, - "loss": 1.0243, + "epoch": 0.7295961592770404, + "grad_norm": 3.468123119075137, + "learning_rate": 1.770975579970962e-05, + "loss": 0.9296, "step": 5167 }, { - "epoch": 1.0836653386454183, - "grad_norm": 7.418779390896711, - "learning_rate": 1.4778168326893858e-05, - "loss": 0.7405, + "epoch": 0.7297373623270262, + "grad_norm": 3.849691387555377, + "learning_rate": 1.770878483984187e-05, + "loss": 1.2036, "step": 5168 }, { - "epoch": 1.0838750262109458, - "grad_norm": 9.027682976846481, - "learning_rate": 1.4776179473560205e-05, - "loss": 1.1147, + "epoch": 0.7298785653770121, + "grad_norm": 3.5910642348711117, + "learning_rate": 1.7707813700825288e-05, + "loss": 1.2382, "step": 5169 }, { - "epoch": 1.084084713776473, - "grad_norm": 7.144663366302767, - "learning_rate": 1.477419037543907e-05, - "loss": 0.8702, + "epoch": 0.730019768426998, + "grad_norm": 4.390141005309064, + "learning_rate": 1.7706842382682445e-05, + "loss": 1.1034, "step": 5170 }, { - "epoch": 1.0842944013420004, - "grad_norm": 6.30187481438015, - "learning_rate": 1.4772201032632398e-05, - "loss": 0.91, + "epoch": 0.7301609714769839, + "grad_norm": 4.002095614860221, + "learning_rate": 1.770587088543591e-05, + "loss": 1.2416, "step": 5171 }, { - "epoch": 1.0845040889075277, - "grad_norm": 8.886982425089645, - "learning_rate": 1.4770211445242147e-05, - "loss": 1.0972, + "epoch": 0.7303021745269698, + "grad_norm": 3.203054407182655, + "learning_rate": 1.770489920910827e-05, + "loss": 0.9528, "step": 5172 }, { - "epoch": 1.0847137764730552, - "grad_norm": 6.527057871445205, - "learning_rate": 1.4768221613370285e-05, - "loss": 0.691, + "epoch": 0.7304433775769557, + "grad_norm": 3.7856891365510936, + "learning_rate": 1.7703927353722096e-05, + "loss": 1.1024, "step": 5173 }, { - "epoch": 1.0849234640385825, - "grad_norm": 7.045321711506494, - "learning_rate": 1.4766231537118795e-05, - "loss": 0.7956, + "epoch": 0.7305845806269415, + "grad_norm": 3.966621153668162, + "learning_rate": 1.770295531929998e-05, + "loss": 1.4033, "step": 5174 }, { - "epoch": 1.0851331516041098, - "grad_norm": 7.2863804417603575, - "learning_rate": 1.4764241216589674e-05, - "loss": 0.7647, + "epoch": 0.7307257836769274, + "grad_norm": 3.6039883294406136, + "learning_rate": 1.7701983105864506e-05, + "loss": 1.0743, "step": 5175 }, { - "epoch": 1.0853428391696371, - "grad_norm": 6.838303966036686, - "learning_rate": 1.4762250651884924e-05, - "loss": 0.7897, + "epoch": 0.7308669867269133, + "grad_norm": 3.3328610690320737, + "learning_rate": 1.770101071343827e-05, + "loss": 0.9686, "step": 5176 }, { - "epoch": 1.0855525267351647, - "grad_norm": 7.547915679396741, - "learning_rate": 1.4760259843106573e-05, - "loss": 0.9521, + "epoch": 0.7310081897768992, + "grad_norm": 3.8073395357213276, + "learning_rate": 1.7700038142043875e-05, + "loss": 1.0549, "step": 5177 }, { - "epoch": 1.085762214300692, - "grad_norm": 7.7508941531621645, - "learning_rate": 1.4758268790356644e-05, - "loss": 0.8405, + "epoch": 0.7311493928268851, + "grad_norm": 3.325732526560263, + "learning_rate": 1.7699065391703914e-05, + "loss": 0.9593, "step": 5178 }, { - "epoch": 1.0859719018662193, - "grad_norm": 6.39595317702409, - "learning_rate": 1.475627749373719e-05, - "loss": 0.8321, + "epoch": 0.731290595876871, + "grad_norm": 3.839317805384115, + "learning_rate": 1.7698092462441003e-05, + "loss": 1.0577, "step": 5179 }, { - "epoch": 1.0861815894317468, - "grad_norm": 7.735831365356152, - "learning_rate": 1.4754285953350262e-05, - "loss": 0.878, + "epoch": 0.7314317989268568, + "grad_norm": 3.788039397428166, + "learning_rate": 1.7697119354277746e-05, + "loss": 1.1351, "step": 5180 }, { - "epoch": 1.0863912769972741, - "grad_norm": 6.805298123078015, - "learning_rate": 1.4752294169297938e-05, - "loss": 0.9383, + "epoch": 0.7315730019768427, + "grad_norm": 3.2791278864865805, + "learning_rate": 1.7696146067236758e-05, + "loss": 0.7441, "step": 5181 }, { - "epoch": 1.0866009645628014, - "grad_norm": 6.266710651964672, - "learning_rate": 1.4750302141682292e-05, - "loss": 0.8925, + "epoch": 0.7317142050268286, + "grad_norm": 3.35427769434779, + "learning_rate": 1.769517260134066e-05, + "loss": 1.1672, "step": 5182 }, { - "epoch": 1.0868106521283287, - "grad_norm": 6.362613932046409, - "learning_rate": 1.4748309870605422e-05, - "loss": 0.905, + "epoch": 0.7318554080768145, + "grad_norm": 3.7705413624901833, + "learning_rate": 1.7694198956612074e-05, + "loss": 0.9819, "step": 5183 }, { - "epoch": 1.0870203396938563, - "grad_norm": 8.217743508051532, - "learning_rate": 1.4746317356169436e-05, - "loss": 1.0756, + "epoch": 0.7319966111268004, + "grad_norm": 3.7667321721586955, + "learning_rate": 1.769322513307363e-05, + "loss": 1.0534, "step": 5184 }, { - "epoch": 1.0872300272593836, - "grad_norm": 7.1576847255600615, - "learning_rate": 1.4744324598476454e-05, - "loss": 0.8169, + "epoch": 0.7321378141767863, + "grad_norm": 3.5721051618504567, + "learning_rate": 1.769225113074795e-05, + "loss": 1.2151, "step": 5185 }, { - "epoch": 1.0874397148249109, - "grad_norm": 6.3637126532829775, - "learning_rate": 1.4742331597628608e-05, - "loss": 0.854, + "epoch": 0.7322790172267721, + "grad_norm": 3.647920302535526, + "learning_rate": 1.769127694965768e-05, + "loss": 0.9356, "step": 5186 }, { - "epoch": 1.0876494023904382, - "grad_norm": 6.6180057624909505, - "learning_rate": 1.474033835372804e-05, - "loss": 0.7927, + "epoch": 0.732420220276758, + "grad_norm": 3.763200520435438, + "learning_rate": 1.7690302589825455e-05, + "loss": 1.0483, "step": 5187 }, { - "epoch": 1.0878590899559657, - "grad_norm": 7.571565965563071, - "learning_rate": 1.4738344866876913e-05, - "loss": 0.8628, + "epoch": 0.7325614233267439, + "grad_norm": 3.7199292020346646, + "learning_rate": 1.768932805127392e-05, + "loss": 1.1347, "step": 5188 }, { - "epoch": 1.088068777521493, - "grad_norm": 7.450952980155977, - "learning_rate": 1.4736351137177392e-05, - "loss": 0.8944, + "epoch": 0.7327026263767298, + "grad_norm": 3.658978160772491, + "learning_rate": 1.768835333402572e-05, + "loss": 0.9018, "step": 5189 }, { - "epoch": 1.0882784650870203, - "grad_norm": 7.1631545659673606, - "learning_rate": 1.4734357164731659e-05, - "loss": 0.8991, + "epoch": 0.7328438294267157, + "grad_norm": 3.841981055971075, + "learning_rate": 1.768737843810351e-05, + "loss": 1.0282, "step": 5190 }, { - "epoch": 1.0884881526525476, - "grad_norm": 5.818817333056958, - "learning_rate": 1.4732362949641911e-05, - "loss": 0.6424, + "epoch": 0.7329850324767015, + "grad_norm": 3.7216067642755886, + "learning_rate": 1.768640336352994e-05, + "loss": 1.0669, "step": 5191 }, { - "epoch": 1.0886978402180751, - "grad_norm": 7.154352660564363, - "learning_rate": 1.4730368492010354e-05, - "loss": 0.8967, + "epoch": 0.7331262355266874, + "grad_norm": 3.6052685057509866, + "learning_rate": 1.7685428110327683e-05, + "loss": 1.0075, "step": 5192 }, { - "epoch": 1.0889075277836024, - "grad_norm": 7.427578493378902, - "learning_rate": 1.472837379193921e-05, - "loss": 0.8014, + "epoch": 0.7332674385766732, + "grad_norm": 3.2833593338194267, + "learning_rate": 1.7684452678519393e-05, + "loss": 0.7798, "step": 5193 }, { - "epoch": 1.0891172153491298, - "grad_norm": 5.973005129926664, - "learning_rate": 1.4726378849530705e-05, - "loss": 0.7712, + "epoch": 0.7334086416266591, + "grad_norm": 3.206096260320886, + "learning_rate": 1.7683477068127742e-05, + "loss": 0.8312, "step": 5194 }, { - "epoch": 1.089326902914657, - "grad_norm": 6.170650200998096, - "learning_rate": 1.4724383664887088e-05, - "loss": 0.7955, + "epoch": 0.733549844676645, + "grad_norm": 3.7313288739670796, + "learning_rate": 1.76825012791754e-05, + "loss": 1.2218, "step": 5195 }, { - "epoch": 1.0895365904801846, - "grad_norm": 8.53144146762034, - "learning_rate": 1.4722388238110611e-05, - "loss": 1.2179, + "epoch": 0.7336910477266309, + "grad_norm": 3.986072999137443, + "learning_rate": 1.7681525311685046e-05, + "loss": 1.0977, "step": 5196 }, { - "epoch": 1.089746278045712, - "grad_norm": 7.709237393908878, - "learning_rate": 1.472039256930355e-05, - "loss": 0.9327, + "epoch": 0.7338322507766167, + "grad_norm": 4.128323338782933, + "learning_rate": 1.7680549165679362e-05, + "loss": 1.0499, "step": 5197 }, { - "epoch": 1.0899559656112392, - "grad_norm": 6.428577573680235, - "learning_rate": 1.4718396658568181e-05, - "loss": 0.8474, + "epoch": 0.7339734538266026, + "grad_norm": 4.175860787342354, + "learning_rate": 1.7679572841181033e-05, + "loss": 1.1421, "step": 5198 }, { - "epoch": 1.0901656531767667, - "grad_norm": 8.160447321823169, - "learning_rate": 1.4716400506006798e-05, - "loss": 0.9434, + "epoch": 0.7341146568765885, + "grad_norm": 3.7739005875436993, + "learning_rate": 1.7678596338212747e-05, + "loss": 0.9572, "step": 5199 }, { - "epoch": 1.090375340742294, - "grad_norm": 8.223173609969216, - "learning_rate": 1.4714404111721715e-05, - "loss": 0.9066, + "epoch": 0.7342558599265744, + "grad_norm": 3.9305353984342584, + "learning_rate": 1.76776196567972e-05, + "loss": 1.2202, "step": 5200 }, { - "epoch": 1.0905850283078213, - "grad_norm": 6.716984493450535, - "learning_rate": 1.4712407475815238e-05, - "loss": 0.8658, + "epoch": 0.7343970629765603, + "grad_norm": 4.225679492044602, + "learning_rate": 1.7676642796957086e-05, + "loss": 1.0551, "step": 5201 }, { - "epoch": 1.0907947158733486, - "grad_norm": 6.13662592434018, - "learning_rate": 1.4710410598389708e-05, - "loss": 0.7111, + "epoch": 0.7345382660265461, + "grad_norm": 3.386513496963224, + "learning_rate": 1.767566575871511e-05, + "loss": 1.1148, "step": 5202 }, { - "epoch": 1.0910044034388762, - "grad_norm": 7.939494952599585, - "learning_rate": 1.4708413479547463e-05, - "loss": 0.9595, + "epoch": 0.734679469076532, + "grad_norm": 3.345487639984392, + "learning_rate": 1.7674688542093977e-05, + "loss": 1.1752, "step": 5203 }, { - "epoch": 1.0912140910044035, - "grad_norm": 8.844054160009875, - "learning_rate": 1.4706416119390863e-05, - "loss": 1.0835, + "epoch": 0.7348206721265179, + "grad_norm": 3.2110146012065974, + "learning_rate": 1.7673711147116392e-05, + "loss": 0.8906, "step": 5204 }, { - "epoch": 1.0914237785699308, - "grad_norm": 7.118598590697339, - "learning_rate": 1.4704418518022273e-05, - "loss": 0.8725, + "epoch": 0.7349618751765038, + "grad_norm": 3.451130523935677, + "learning_rate": 1.767273357380508e-05, + "loss": 0.9435, "step": 5205 }, { - "epoch": 1.091633466135458, - "grad_norm": 6.270520499430026, - "learning_rate": 1.4702420675544073e-05, - "loss": 0.8771, + "epoch": 0.7351030782264897, + "grad_norm": 3.9884920365468877, + "learning_rate": 1.767175582218275e-05, + "loss": 1.1512, "step": 5206 }, { - "epoch": 1.0918431537009856, - "grad_norm": 5.8874066641634135, - "learning_rate": 1.470042259205866e-05, - "loss": 0.9082, + "epoch": 0.7352442812764756, + "grad_norm": 4.049866100642281, + "learning_rate": 1.7670777892272127e-05, + "loss": 1.28, "step": 5207 }, { - "epoch": 1.092052841266513, - "grad_norm": 8.380986299400435, - "learning_rate": 1.4698424267668436e-05, - "loss": 1.1283, + "epoch": 0.7353854843264614, + "grad_norm": 3.6113124581335287, + "learning_rate": 1.766979978409594e-05, + "loss": 0.9274, "step": 5208 }, { - "epoch": 1.0922625288320402, - "grad_norm": 6.494949379374786, - "learning_rate": 1.4696425702475818e-05, - "loss": 0.7135, + "epoch": 0.7355266873764473, + "grad_norm": 3.3263554113426115, + "learning_rate": 1.766882149767692e-05, + "loss": 1.1465, "step": 5209 }, { - "epoch": 1.0924722163975675, - "grad_norm": 8.693236211299794, - "learning_rate": 1.4694426896583239e-05, - "loss": 1.0204, + "epoch": 0.7356678904264332, + "grad_norm": 3.1929389283408773, + "learning_rate": 1.76678430330378e-05, + "loss": 0.901, "step": 5210 }, { - "epoch": 1.092681903963095, - "grad_norm": 7.31116612227341, - "learning_rate": 1.4692427850093136e-05, - "loss": 0.8029, + "epoch": 0.7358090934764191, + "grad_norm": 3.3494554321808763, + "learning_rate": 1.7666864390201316e-05, + "loss": 0.9073, "step": 5211 }, { - "epoch": 1.0928915915286224, - "grad_norm": 7.053077825536181, - "learning_rate": 1.4690428563107972e-05, - "loss": 0.7885, + "epoch": 0.735950296526405, + "grad_norm": 3.4371131614234134, + "learning_rate": 1.766588556919022e-05, + "loss": 1.0814, "step": 5212 }, { - "epoch": 1.0931012790941497, - "grad_norm": 7.103941672845228, - "learning_rate": 1.4688429035730207e-05, - "loss": 0.784, + "epoch": 0.7360914995763909, + "grad_norm": 3.3378803297672826, + "learning_rate": 1.7664906570027248e-05, + "loss": 1.0004, "step": 5213 }, { - "epoch": 1.093310966659677, - "grad_norm": 7.184952343470437, - "learning_rate": 1.4686429268062321e-05, - "loss": 1.0048, + "epoch": 0.7362327026263767, + "grad_norm": 3.4893553281891725, + "learning_rate": 1.766392739273516e-05, + "loss": 1.0507, "step": 5214 }, { - "epoch": 1.0935206542252045, - "grad_norm": 8.715002074528302, - "learning_rate": 1.4684429260206808e-05, - "loss": 1.1955, + "epoch": 0.7363739056763626, + "grad_norm": 3.512892620575974, + "learning_rate": 1.7662948037336712e-05, + "loss": 0.946, "step": 5215 }, { - "epoch": 1.0937303417907318, - "grad_norm": 7.76117673401112, - "learning_rate": 1.468242901226617e-05, - "loss": 1.0695, + "epoch": 0.7365151087263485, + "grad_norm": 3.903656291842149, + "learning_rate": 1.766196850385466e-05, + "loss": 1.0506, "step": 5216 }, { - "epoch": 1.0939400293562591, - "grad_norm": 7.31097933631295, - "learning_rate": 1.4680428524342924e-05, - "loss": 0.9259, + "epoch": 0.7366563117763344, + "grad_norm": 4.1261989567664, + "learning_rate": 1.7660988792311766e-05, + "loss": 1.1422, "step": 5217 }, { - "epoch": 1.0941497169217866, - "grad_norm": 7.761901331848131, - "learning_rate": 1.4678427796539598e-05, - "loss": 1.0372, + "epoch": 0.7367975148263203, + "grad_norm": 3.5265977026392106, + "learning_rate": 1.7660008902730804e-05, + "loss": 1.1161, "step": 5218 }, { - "epoch": 1.094359404487314, - "grad_norm": 6.255989377055225, - "learning_rate": 1.4676426828958735e-05, - "loss": 0.8217, + "epoch": 0.7369387178763062, + "grad_norm": 3.9651582668346275, + "learning_rate": 1.765902883513454e-05, + "loss": 1.1428, "step": 5219 }, { - "epoch": 1.0945690920528413, - "grad_norm": 6.757370022526783, - "learning_rate": 1.4674425621702884e-05, - "loss": 0.7647, + "epoch": 0.737079920926292, + "grad_norm": 3.471093127144974, + "learning_rate": 1.7658048589545757e-05, + "loss": 1.132, "step": 5220 }, { - "epoch": 1.0947787796183686, - "grad_norm": 7.736046708190815, - "learning_rate": 1.4672424174874613e-05, - "loss": 1.0537, + "epoch": 0.7372211239762779, + "grad_norm": 3.4770560955620504, + "learning_rate": 1.765706816598723e-05, + "loss": 1.002, "step": 5221 }, { - "epoch": 1.094988467183896, - "grad_norm": 6.099275543820175, - "learning_rate": 1.4670422488576501e-05, - "loss": 0.8327, + "epoch": 0.7373623270262638, + "grad_norm": 3.593611062719353, + "learning_rate": 1.7656087564481746e-05, + "loss": 0.9708, "step": 5222 }, { - "epoch": 1.0951981547494234, - "grad_norm": 7.505195641683392, - "learning_rate": 1.4668420562911134e-05, - "loss": 0.9174, + "epoch": 0.7375035300762497, + "grad_norm": 4.222664087697859, + "learning_rate": 1.7655106785052093e-05, + "loss": 1.3272, "step": 5223 }, { - "epoch": 1.0954078423149507, - "grad_norm": 5.96454124723484, - "learning_rate": 1.4666418397981117e-05, - "loss": 0.7314, + "epoch": 0.7376447331262356, + "grad_norm": 4.10434730451653, + "learning_rate": 1.7654125827721066e-05, + "loss": 1.0104, "step": 5224 }, { - "epoch": 1.095617529880478, - "grad_norm": 6.144113544639926, - "learning_rate": 1.4664415993889062e-05, - "loss": 0.8219, + "epoch": 0.7377859361762215, + "grad_norm": 4.473908021972094, + "learning_rate": 1.7653144692511457e-05, + "loss": 1.3244, "step": 5225 }, { - "epoch": 1.0958272174460055, - "grad_norm": 8.483985177326398, - "learning_rate": 1.4662413350737598e-05, - "loss": 1.0613, + "epoch": 0.7379271392262073, + "grad_norm": 3.7077664029708375, + "learning_rate": 1.7652163379446073e-05, + "loss": 1.1665, "step": 5226 }, { - "epoch": 1.0960369050115328, - "grad_norm": 6.104065812146913, - "learning_rate": 1.4660410468629367e-05, - "loss": 0.7293, + "epoch": 0.7380683422761931, + "grad_norm": 2.883075461939255, + "learning_rate": 1.765118188854772e-05, + "loss": 0.8076, "step": 5227 }, { - "epoch": 1.0962465925770601, - "grad_norm": 9.169389924208907, - "learning_rate": 1.465840734766701e-05, - "loss": 1.052, + "epoch": 0.738209545326179, + "grad_norm": 3.3771721415184595, + "learning_rate": 1.7650200219839198e-05, + "loss": 1.0456, "step": 5228 }, { - "epoch": 1.0964562801425874, - "grad_norm": 7.41990455922644, - "learning_rate": 1.4656403987953203e-05, - "loss": 0.8779, + "epoch": 0.7383507483761649, + "grad_norm": 4.589219589508171, + "learning_rate": 1.7649218373343327e-05, + "loss": 1.1142, "step": 5229 }, { - "epoch": 1.096665967708115, - "grad_norm": 6.716426703282258, - "learning_rate": 1.465440038959061e-05, - "loss": 0.9721, + "epoch": 0.7384919514261508, + "grad_norm": 3.7225635184465626, + "learning_rate": 1.7648236349082928e-05, + "loss": 1.1035, "step": 5230 }, { - "epoch": 1.0968756552736423, - "grad_norm": 8.3008178030219, - "learning_rate": 1.4652396552681927e-05, - "loss": 0.9549, + "epoch": 0.7386331544761366, + "grad_norm": 3.2727647205613106, + "learning_rate": 1.7647254147080817e-05, + "loss": 0.9359, "step": 5231 }, { - "epoch": 1.0970853428391696, - "grad_norm": 7.444636334720324, - "learning_rate": 1.4650392477329853e-05, - "loss": 0.9387, + "epoch": 0.7387743575261225, + "grad_norm": 3.6846080407813253, + "learning_rate": 1.7646271767359824e-05, + "loss": 1.1587, "step": 5232 }, { - "epoch": 1.097295030404697, - "grad_norm": 6.770186121830977, - "learning_rate": 1.46483881636371e-05, - "loss": 0.8118, + "epoch": 0.7389155605761084, + "grad_norm": 3.6534726703244154, + "learning_rate": 1.7645289209942776e-05, + "loss": 1.1327, "step": 5233 }, { - "epoch": 1.0975047179702244, - "grad_norm": 6.425003740410699, - "learning_rate": 1.4646383611706386e-05, - "loss": 0.8559, + "epoch": 0.7390567636260943, + "grad_norm": 3.5328163068772307, + "learning_rate": 1.764430647485251e-05, + "loss": 0.9535, "step": 5234 }, { - "epoch": 1.0977144055357517, - "grad_norm": 6.9036210920477385, - "learning_rate": 1.4644378821640458e-05, - "loss": 0.833, + "epoch": 0.7391979666760802, + "grad_norm": 3.7887242368557668, + "learning_rate": 1.7643323562111864e-05, + "loss": 1.049, "step": 5235 }, { - "epoch": 1.097924093101279, - "grad_norm": 6.975999700655976, - "learning_rate": 1.4642373793542058e-05, - "loss": 0.9836, + "epoch": 0.7393391697260661, + "grad_norm": 3.288233810620734, + "learning_rate": 1.7642340471743675e-05, + "loss": 0.7307, "step": 5236 }, { - "epoch": 1.0981337806668066, - "grad_norm": 6.999536986183994, - "learning_rate": 1.4640368527513946e-05, - "loss": 0.728, + "epoch": 0.7394803727760519, + "grad_norm": 3.8576367387850223, + "learning_rate": 1.7641357203770793e-05, + "loss": 1.0743, "step": 5237 }, { - "epoch": 1.0983434682323339, - "grad_norm": 7.368083529477797, - "learning_rate": 1.4638363023658903e-05, - "loss": 0.9781, + "epoch": 0.7396215758260378, + "grad_norm": 4.412853091996168, + "learning_rate": 1.7640373758216075e-05, + "loss": 1.1327, "step": 5238 }, { - "epoch": 1.0985531557978612, - "grad_norm": 6.90949812596724, - "learning_rate": 1.463635728207971e-05, - "loss": 0.6899, + "epoch": 0.7397627788760237, + "grad_norm": 3.7197698532486623, + "learning_rate": 1.7639390135102367e-05, + "loss": 1.4073, "step": 5239 }, { - "epoch": 1.0987628433633885, - "grad_norm": 6.3207946313184955, - "learning_rate": 1.4634351302879162e-05, - "loss": 0.9654, + "epoch": 0.7399039819260096, + "grad_norm": 3.104639617932623, + "learning_rate": 1.7638406334452535e-05, + "loss": 0.9323, "step": 5240 }, { - "epoch": 1.098972530928916, - "grad_norm": 6.780499414301449, - "learning_rate": 1.4632345086160076e-05, - "loss": 0.8072, + "epoch": 0.7400451849759955, + "grad_norm": 3.396733964642443, + "learning_rate": 1.763742235628944e-05, + "loss": 0.8689, "step": 5241 }, { - "epoch": 1.0991822184944433, - "grad_norm": 8.34826574657584, - "learning_rate": 1.4630338632025267e-05, - "loss": 0.9392, + "epoch": 0.7401863880259814, + "grad_norm": 3.4498789420370413, + "learning_rate": 1.7636438200635942e-05, + "loss": 0.7899, "step": 5242 }, { - "epoch": 1.0993919060599706, - "grad_norm": 7.0572576226695425, - "learning_rate": 1.4628331940577574e-05, - "loss": 0.794, + "epoch": 0.7403275910759672, + "grad_norm": 3.5573532413711293, + "learning_rate": 1.763545386751492e-05, + "loss": 1.0146, "step": 5243 }, { - "epoch": 1.099601593625498, - "grad_norm": 8.290967643697805, - "learning_rate": 1.462632501191984e-05, - "loss": 0.8568, + "epoch": 0.7404687941259531, + "grad_norm": 2.749949074573413, + "learning_rate": 1.7634469356949246e-05, + "loss": 0.8445, "step": 5244 }, { - "epoch": 1.0998112811910254, - "grad_norm": 8.058207723623731, - "learning_rate": 1.4624317846154924e-05, - "loss": 0.8509, + "epoch": 0.740609997175939, + "grad_norm": 3.7812262892611135, + "learning_rate": 1.7633484668961803e-05, + "loss": 1.159, "step": 5245 }, { - "epoch": 1.1000209687565528, - "grad_norm": 5.431495949033383, - "learning_rate": 1.4622310443385701e-05, - "loss": 0.6222, + "epoch": 0.7407512002259249, + "grad_norm": 3.4289242786044163, + "learning_rate": 1.7632499803575473e-05, + "loss": 0.8263, "step": 5246 }, { - "epoch": 1.10023065632208, - "grad_norm": 9.151745998922761, - "learning_rate": 1.4620302803715049e-05, - "loss": 0.9728, + "epoch": 0.7408924032759108, + "grad_norm": 3.7642393483032857, + "learning_rate": 1.7631514760813146e-05, + "loss": 0.8606, "step": 5247 }, { - "epoch": 1.1004403438876074, - "grad_norm": 8.405932375249979, - "learning_rate": 1.4618294927245863e-05, - "loss": 1.1251, + "epoch": 0.7410336063258967, + "grad_norm": 3.4989436153831783, + "learning_rate": 1.7630529540697708e-05, + "loss": 1.1192, "step": 5248 }, { - "epoch": 1.100650031453135, - "grad_norm": 6.491227088105822, - "learning_rate": 1.4616286814081058e-05, - "loss": 0.7245, + "epoch": 0.7411748093758825, + "grad_norm": 3.7285764613430485, + "learning_rate": 1.762954414325206e-05, + "loss": 1.1583, "step": 5249 }, { - "epoch": 1.1008597190186622, - "grad_norm": 6.776466609688915, - "learning_rate": 1.4614278464323544e-05, - "loss": 0.9516, + "epoch": 0.7413160124258684, + "grad_norm": 3.305235068239984, + "learning_rate": 1.7628558568499103e-05, + "loss": 1.0447, "step": 5250 }, { - "epoch": 1.1010694065841895, - "grad_norm": 7.034400417074577, - "learning_rate": 1.4612269878076253e-05, - "loss": 0.9885, + "epoch": 0.7414572154758543, + "grad_norm": 3.526833282379295, + "learning_rate": 1.7627572816461736e-05, + "loss": 1.0123, "step": 5251 }, { - "epoch": 1.1012790941497168, - "grad_norm": 6.616594478849876, - "learning_rate": 1.4610261055442135e-05, - "loss": 0.751, + "epoch": 0.7415984185258402, + "grad_norm": 3.5635320757670383, + "learning_rate": 1.7626586887162875e-05, + "loss": 1.1391, "step": 5252 }, { - "epoch": 1.1014887817152443, - "grad_norm": 8.52526546355809, - "learning_rate": 1.4608251996524138e-05, - "loss": 0.746, + "epoch": 0.7417396215758261, + "grad_norm": 3.5667605992656144, + "learning_rate": 1.7625600780625425e-05, + "loss": 1.1746, "step": 5253 }, { - "epoch": 1.1016984692807716, - "grad_norm": 6.92929062894921, - "learning_rate": 1.4606242701425237e-05, - "loss": 1.0006, + "epoch": 0.741880824625812, + "grad_norm": 4.026572701741333, + "learning_rate": 1.7624614496872304e-05, + "loss": 1.0284, "step": 5254 }, { - "epoch": 1.101908156846299, - "grad_norm": 7.675093962691405, - "learning_rate": 1.4604233170248408e-05, - "loss": 0.8741, + "epoch": 0.7420220276757978, + "grad_norm": 3.5205526561313825, + "learning_rate": 1.762362803592644e-05, + "loss": 1.282, "step": 5255 }, { - "epoch": 1.1021178444118265, - "grad_norm": 6.685026314885749, - "learning_rate": 1.4602223403096642e-05, - "loss": 0.8539, + "epoch": 0.7421632307257837, + "grad_norm": 3.1102377815637468, + "learning_rate": 1.762264139781075e-05, + "loss": 0.956, "step": 5256 }, { - "epoch": 1.1023275319773538, - "grad_norm": 7.414838832903433, - "learning_rate": 1.4600213400072945e-05, - "loss": 0.8243, + "epoch": 0.7423044337757696, + "grad_norm": 3.168249888149406, + "learning_rate": 1.762165458254817e-05, + "loss": 0.975, "step": 5257 }, { - "epoch": 1.102537219542881, - "grad_norm": 7.188339848015743, - "learning_rate": 1.4598203161280333e-05, - "loss": 0.5663, + "epoch": 0.7424456368257555, + "grad_norm": 3.8671052566676645, + "learning_rate": 1.7620667590161626e-05, + "loss": 1.1254, "step": 5258 }, { - "epoch": 1.1027469071084084, - "grad_norm": 5.8988011985400695, - "learning_rate": 1.4596192686821833e-05, - "loss": 0.7032, + "epoch": 0.7425868398757414, + "grad_norm": 3.9261245165406278, + "learning_rate": 1.7619680420674057e-05, + "loss": 1.0099, "step": 5259 }, { - "epoch": 1.102956594673936, - "grad_norm": 7.410839787791522, - "learning_rate": 1.4594181976800488e-05, - "loss": 0.8251, + "epoch": 0.7427280429257272, + "grad_norm": 3.203499670483618, + "learning_rate": 1.7618693074108405e-05, + "loss": 0.7872, "step": 5260 }, { - "epoch": 1.1031662822394632, - "grad_norm": 7.003744012721617, - "learning_rate": 1.4592171031319344e-05, - "loss": 0.8218, + "epoch": 0.742869245975713, + "grad_norm": 3.812647330840194, + "learning_rate": 1.761770555048762e-05, + "loss": 1.0235, "step": 5261 }, { - "epoch": 1.1033759698049905, - "grad_norm": 7.960675257705126, - "learning_rate": 1.4590159850481475e-05, - "loss": 0.965, + "epoch": 0.7430104490256989, + "grad_norm": 3.514385294845092, + "learning_rate": 1.7616717849834644e-05, + "loss": 1.1711, "step": 5262 }, { - "epoch": 1.1035856573705178, - "grad_norm": 6.403035492792724, - "learning_rate": 1.4588148434389949e-05, - "loss": 0.6962, + "epoch": 0.7431516520756848, + "grad_norm": 4.437277939150923, + "learning_rate": 1.7615729972172437e-05, + "loss": 1.1246, "step": 5263 }, { - "epoch": 1.1037953449360454, - "grad_norm": 5.879248042642327, - "learning_rate": 1.4586136783147862e-05, - "loss": 0.7019, + "epoch": 0.7432928551256707, + "grad_norm": 4.015851152418311, + "learning_rate": 1.7614741917523956e-05, + "loss": 1.0916, "step": 5264 }, { - "epoch": 1.1040050325015727, - "grad_norm": 5.625515725249563, - "learning_rate": 1.4584124896858307e-05, - "loss": 0.5377, + "epoch": 0.7434340581756566, + "grad_norm": 3.124947656555935, + "learning_rate": 1.7613753685912155e-05, + "loss": 0.9798, "step": 5265 }, { - "epoch": 1.1042147200671, - "grad_norm": 6.881322513524374, - "learning_rate": 1.4582112775624402e-05, - "loss": 0.8795, + "epoch": 0.7435752612256424, + "grad_norm": 4.160254950067781, + "learning_rate": 1.7612765277360013e-05, + "loss": 1.2165, "step": 5266 }, { - "epoch": 1.1044244076326275, - "grad_norm": 6.620952160764525, - "learning_rate": 1.4580100419549267e-05, - "loss": 0.7854, + "epoch": 0.7437164642756283, + "grad_norm": 3.6338217951078082, + "learning_rate": 1.761177669189049e-05, + "loss": 0.9146, "step": 5267 }, { - "epoch": 1.1046340951981548, - "grad_norm": 6.3558135097406225, - "learning_rate": 1.4578087828736044e-05, - "loss": 0.7925, + "epoch": 0.7438576673256142, + "grad_norm": 3.1805582969404225, + "learning_rate": 1.761078792952657e-05, + "loss": 0.8451, "step": 5268 }, { - "epoch": 1.1048437827636821, - "grad_norm": 7.194166795475136, - "learning_rate": 1.457607500328788e-05, - "loss": 0.9547, + "epoch": 0.7439988703756001, + "grad_norm": 3.4395830253350717, + "learning_rate": 1.760979899029122e-05, + "loss": 0.9423, "step": 5269 }, { - "epoch": 1.1050534703292094, - "grad_norm": 7.974793152439824, - "learning_rate": 1.4574061943307935e-05, - "loss": 1.1181, + "epoch": 0.744140073425586, + "grad_norm": 3.783780339717336, + "learning_rate": 1.7608809874207426e-05, + "loss": 1.0166, "step": 5270 }, { - "epoch": 1.1052631578947367, - "grad_norm": 7.298391176730906, - "learning_rate": 1.4572048648899381e-05, - "loss": 0.7858, + "epoch": 0.7442812764755719, + "grad_norm": 3.6207270801734275, + "learning_rate": 1.760782058129818e-05, + "loss": 1.115, "step": 5271 }, { - "epoch": 1.1054728454602643, - "grad_norm": 7.120882100733394, - "learning_rate": 1.4570035120165407e-05, - "loss": 1.0643, + "epoch": 0.7444224795255577, + "grad_norm": 3.2276139764169156, + "learning_rate": 1.7606831111586467e-05, + "loss": 0.9483, "step": 5272 }, { - "epoch": 1.1056825330257916, - "grad_norm": 6.205183015980851, - "learning_rate": 1.4568021357209204e-05, - "loss": 0.8022, + "epoch": 0.7445636825755436, + "grad_norm": 3.795702391371701, + "learning_rate": 1.7605841465095287e-05, + "loss": 1.1897, "step": 5273 }, { - "epoch": 1.1058922205913189, - "grad_norm": 6.7955672747613, - "learning_rate": 1.4566007360133982e-05, - "loss": 0.8241, + "epoch": 0.7447048856255295, + "grad_norm": 3.5461080889903847, + "learning_rate": 1.7604851641847633e-05, + "loss": 0.9511, "step": 5274 }, { - "epoch": 1.1061019081568464, - "grad_norm": 6.68587754134207, - "learning_rate": 1.4563993129042966e-05, - "loss": 0.8863, + "epoch": 0.7448460886755154, + "grad_norm": 3.4178716346569615, + "learning_rate": 1.7603861641866517e-05, + "loss": 1.0617, "step": 5275 }, { - "epoch": 1.1063115957223737, - "grad_norm": 8.125446052411736, - "learning_rate": 1.4561978664039384e-05, - "loss": 0.8918, + "epoch": 0.7449872917255013, + "grad_norm": 3.7477211860272233, + "learning_rate": 1.7602871465174934e-05, + "loss": 1.1985, "step": 5276 }, { - "epoch": 1.106521283287901, - "grad_norm": 7.01196137170565, - "learning_rate": 1.4559963965226486e-05, - "loss": 0.6808, + "epoch": 0.7451284947754871, + "grad_norm": 2.994505576322276, + "learning_rate": 1.76018811117959e-05, + "loss": 0.7673, "step": 5277 }, { - "epoch": 1.1067309708534283, - "grad_norm": 6.267591726508855, - "learning_rate": 1.4557949032707526e-05, - "loss": 0.855, + "epoch": 0.745269697825473, + "grad_norm": 3.4740876623412857, + "learning_rate": 1.7600890581752435e-05, + "loss": 0.9432, "step": 5278 }, { - "epoch": 1.1069406584189558, - "grad_norm": 7.2587708398946145, - "learning_rate": 1.4555933866585772e-05, - "loss": 0.7654, + "epoch": 0.7454109008754589, + "grad_norm": 3.600328736812069, + "learning_rate": 1.7599899875067557e-05, + "loss": 0.8368, "step": 5279 }, { - "epoch": 1.1071503459844831, - "grad_norm": 6.799538260492512, - "learning_rate": 1.4553918466964502e-05, - "loss": 0.7683, + "epoch": 0.7455521039254448, + "grad_norm": 3.342449146957272, + "learning_rate": 1.7598908991764288e-05, + "loss": 1.0104, "step": 5280 }, { - "epoch": 1.1073600335500104, - "grad_norm": 7.159715032224461, - "learning_rate": 1.4551902833947018e-05, - "loss": 0.9596, + "epoch": 0.7456933069754307, + "grad_norm": 3.7531478965551393, + "learning_rate": 1.7597917931865655e-05, + "loss": 1.0059, "step": 5281 }, { - "epoch": 1.1075697211155378, - "grad_norm": 7.685335171896852, - "learning_rate": 1.4549886967636616e-05, - "loss": 0.8096, + "epoch": 0.7458345100254166, + "grad_norm": 3.102679602615667, + "learning_rate": 1.7596926695394692e-05, + "loss": 0.9659, "step": 5282 }, { - "epoch": 1.1077794086810653, - "grad_norm": 6.95621283818804, - "learning_rate": 1.4547870868136615e-05, - "loss": 0.8672, + "epoch": 0.7459757130754024, + "grad_norm": 3.458213762328175, + "learning_rate": 1.759593528237443e-05, + "loss": 0.979, "step": 5283 }, { - "epoch": 1.1079890962465926, - "grad_norm": 6.845828659223529, - "learning_rate": 1.4545854535550345e-05, - "loss": 0.8717, + "epoch": 0.7461169161253883, + "grad_norm": 4.005589914557624, + "learning_rate": 1.7594943692827913e-05, + "loss": 1.2169, "step": 5284 }, { - "epoch": 1.10819878381212, - "grad_norm": 7.405655499532415, - "learning_rate": 1.4543837969981147e-05, - "loss": 0.8813, + "epoch": 0.7462581191753742, + "grad_norm": 3.1859866924520754, + "learning_rate": 1.759395192677819e-05, + "loss": 0.7548, "step": 5285 }, { - "epoch": 1.1084084713776474, - "grad_norm": 6.038162646175789, - "learning_rate": 1.4541821171532369e-05, - "loss": 0.6046, + "epoch": 0.7463993222253601, + "grad_norm": 3.8687154766677065, + "learning_rate": 1.75929599842483e-05, + "loss": 1.1065, "step": 5286 }, { - "epoch": 1.1086181589431747, - "grad_norm": 7.125991913803899, - "learning_rate": 1.4539804140307381e-05, - "loss": 0.8772, + "epoch": 0.746540525275346, + "grad_norm": 3.610186990714733, + "learning_rate": 1.7591967865261296e-05, + "loss": 1.0946, "step": 5287 }, { - "epoch": 1.108827846508702, - "grad_norm": 7.057075598207975, - "learning_rate": 1.4537786876409556e-05, - "loss": 0.8591, + "epoch": 0.7466817283253319, + "grad_norm": 3.5461150690814436, + "learning_rate": 1.759097556984024e-05, + "loss": 1.0372, "step": 5288 }, { - "epoch": 1.1090375340742293, - "grad_norm": 6.4348006207981445, - "learning_rate": 1.4535769379942284e-05, - "loss": 0.6179, + "epoch": 0.7468229313753177, + "grad_norm": 3.431802686057019, + "learning_rate": 1.7589983098008193e-05, + "loss": 0.8818, "step": 5289 }, { - "epoch": 1.1092472216397569, - "grad_norm": 7.987256252031424, - "learning_rate": 1.4533751651008963e-05, - "loss": 0.9093, + "epoch": 0.7469641344253036, + "grad_norm": 3.596560044346133, + "learning_rate": 1.7588990449788213e-05, + "loss": 1.0804, "step": 5290 }, { - "epoch": 1.1094569092052842, - "grad_norm": 6.985077084092817, - "learning_rate": 1.4531733689713008e-05, - "loss": 0.8381, + "epoch": 0.7471053374752895, + "grad_norm": 3.538349967940706, + "learning_rate": 1.7587997625203374e-05, + "loss": 0.9982, "step": 5291 }, { - "epoch": 1.1096665967708115, - "grad_norm": 7.102520691559278, - "learning_rate": 1.452971549615784e-05, - "loss": 0.829, + "epoch": 0.7472465405252754, + "grad_norm": 3.5242698351862383, + "learning_rate": 1.7587004624276747e-05, + "loss": 1.0503, "step": 5292 }, { - "epoch": 1.1098762843363388, - "grad_norm": 8.727475404800678, - "learning_rate": 1.4527697070446898e-05, - "loss": 1.057, + "epoch": 0.7473877435752613, + "grad_norm": 3.559411905785136, + "learning_rate": 1.7586011447031407e-05, + "loss": 1.0064, "step": 5293 }, { - "epoch": 1.1100859719018663, - "grad_norm": 7.131393791736429, - "learning_rate": 1.4525678412683627e-05, - "loss": 0.7068, + "epoch": 0.7475289466252472, + "grad_norm": 4.2104462209446805, + "learning_rate": 1.758501809349044e-05, + "loss": 1.3989, "step": 5294 }, { - "epoch": 1.1102956594673936, - "grad_norm": 6.492703477551265, - "learning_rate": 1.4523659522971487e-05, - "loss": 0.6083, + "epoch": 0.7476701496752329, + "grad_norm": 3.97111030692772, + "learning_rate": 1.7584024563676925e-05, + "loss": 1.3128, "step": 5295 }, { - "epoch": 1.110505347032921, - "grad_norm": 6.904946199420258, - "learning_rate": 1.4521640401413953e-05, - "loss": 0.6057, + "epoch": 0.7478113527252188, + "grad_norm": 3.6564760797107128, + "learning_rate": 1.7583030857613957e-05, + "loss": 1.0056, "step": 5296 }, { - "epoch": 1.1107150345984482, - "grad_norm": 7.197071066598601, - "learning_rate": 1.4519621048114505e-05, - "loss": 0.8344, + "epoch": 0.7479525557752047, + "grad_norm": 3.900718121153533, + "learning_rate": 1.7582036975324626e-05, + "loss": 1.05, "step": 5297 }, { - "epoch": 1.1109247221639758, - "grad_norm": 7.766300606169771, - "learning_rate": 1.4517601463176639e-05, - "loss": 0.9885, + "epoch": 0.7480937588251906, + "grad_norm": 3.4538854344746537, + "learning_rate": 1.7581042916832028e-05, + "loss": 1.0057, "step": 5298 }, { - "epoch": 1.111134409729503, - "grad_norm": 6.980842010656182, - "learning_rate": 1.4515581646703862e-05, - "loss": 0.928, + "epoch": 0.7482349618751765, + "grad_norm": 3.7996806827768443, + "learning_rate": 1.758004868215927e-05, + "loss": 1.2415, "step": 5299 }, { - "epoch": 1.1113440972950304, - "grad_norm": 10.098313373677547, - "learning_rate": 1.4513561598799697e-05, - "loss": 0.9939, + "epoch": 0.7483761649251623, + "grad_norm": 3.8438051131908546, + "learning_rate": 1.7579054271329457e-05, + "loss": 1.0376, "step": 5300 }, { - "epoch": 1.1115537848605577, - "grad_norm": 7.671926527131913, - "learning_rate": 1.4511541319567667e-05, - "loss": 0.8654, + "epoch": 0.7485173679751482, + "grad_norm": 3.1111986790033948, + "learning_rate": 1.757805968436569e-05, + "loss": 0.8757, "step": 5301 }, { - "epoch": 1.1117634724260852, - "grad_norm": 7.036528098010894, - "learning_rate": 1.4509520809111323e-05, - "loss": 0.7219, + "epoch": 0.7486585710251341, + "grad_norm": 3.5937512691396383, + "learning_rate": 1.7577064921291092e-05, + "loss": 1.1634, "step": 5302 }, { - "epoch": 1.1119731599916125, - "grad_norm": 7.373465532454491, - "learning_rate": 1.4507500067534214e-05, - "loss": 0.8878, + "epoch": 0.74879977407512, + "grad_norm": 3.6351106258747037, + "learning_rate": 1.7576069982128774e-05, + "loss": 0.9912, "step": 5303 }, { - "epoch": 1.1121828475571398, - "grad_norm": 7.247153458904141, - "learning_rate": 1.4505479094939911e-05, - "loss": 0.8321, + "epoch": 0.7489409771251059, + "grad_norm": 3.142698079000596, + "learning_rate": 1.7575074866901863e-05, + "loss": 0.985, "step": 5304 }, { - "epoch": 1.1123925351226673, - "grad_norm": 7.82869427456522, - "learning_rate": 1.4503457891431988e-05, - "loss": 0.865, + "epoch": 0.7490821801750918, + "grad_norm": 4.407185830881482, + "learning_rate": 1.7574079575633485e-05, + "loss": 1.0153, "step": 5305 }, { - "epoch": 1.1126022226881946, - "grad_norm": 7.936958513929337, - "learning_rate": 1.4501436457114037e-05, - "loss": 0.8269, + "epoch": 0.7492233832250776, + "grad_norm": 3.7289964165424707, + "learning_rate": 1.757308410834677e-05, + "loss": 1.2923, "step": 5306 }, { - "epoch": 1.112811910253722, - "grad_norm": 6.087566469292746, - "learning_rate": 1.4499414792089661e-05, - "loss": 0.7164, + "epoch": 0.7493645862750635, + "grad_norm": 3.4073322049966652, + "learning_rate": 1.7572088465064847e-05, + "loss": 0.9226, "step": 5307 }, { - "epoch": 1.1130215978192493, - "grad_norm": 5.646810120677337, - "learning_rate": 1.4497392896462477e-05, - "loss": 0.6004, + "epoch": 0.7495057893250494, + "grad_norm": 4.049079965490176, + "learning_rate": 1.757109264581086e-05, + "loss": 1.2077, "step": 5308 }, { - "epoch": 1.1132312853847768, - "grad_norm": 6.788673545298981, - "learning_rate": 1.4495370770336099e-05, - "loss": 0.8651, + "epoch": 0.7496469923750353, + "grad_norm": 4.466656591077825, + "learning_rate": 1.757009665060795e-05, + "loss": 1.5774, "step": 5309 }, { - "epoch": 1.113440972950304, - "grad_norm": 7.420968616252654, - "learning_rate": 1.4493348413814179e-05, - "loss": 0.9926, + "epoch": 0.7497881954250212, + "grad_norm": 2.827904860133254, + "learning_rate": 1.756910047947926e-05, + "loss": 0.878, "step": 5310 }, { - "epoch": 1.1136506605158314, - "grad_norm": 6.840141496735218, - "learning_rate": 1.4491325827000355e-05, - "loss": 0.7206, + "epoch": 0.7499293984750071, + "grad_norm": 3.636550670263667, + "learning_rate": 1.7568104132447946e-05, + "loss": 0.962, "step": 5311 }, { - "epoch": 1.1138603480813587, - "grad_norm": 6.225287715575675, - "learning_rate": 1.4489303009998294e-05, - "loss": 0.6719, + "epoch": 0.7500706015249929, + "grad_norm": 3.094501391307671, + "learning_rate": 1.7567107609537163e-05, + "loss": 0.9145, "step": 5312 }, { - "epoch": 1.1140700356468862, - "grad_norm": 6.826950050988913, - "learning_rate": 1.4487279962911666e-05, - "loss": 0.8167, + "epoch": 0.7502118045749788, + "grad_norm": 3.5700771215770635, + "learning_rate": 1.7566110910770064e-05, + "loss": 1.0685, "step": 5313 }, { - "epoch": 1.1142797232124135, - "grad_norm": 7.252652795903682, - "learning_rate": 1.4485256685844163e-05, - "loss": 0.9985, + "epoch": 0.7503530076249647, + "grad_norm": 3.5244796937086877, + "learning_rate": 1.756511403616982e-05, + "loss": 0.9721, "step": 5314 }, { - "epoch": 1.1144894107779408, - "grad_norm": 7.536198834600522, - "learning_rate": 1.4483233178899468e-05, - "loss": 1.0543, + "epoch": 0.7504942106749506, + "grad_norm": 4.046433876412351, + "learning_rate": 1.7564116985759584e-05, + "loss": 0.9965, "step": 5315 }, { - "epoch": 1.1146990983434681, - "grad_norm": 7.116797576538685, - "learning_rate": 1.4481209442181301e-05, - "loss": 0.7723, + "epoch": 0.7506354137249365, + "grad_norm": 3.788355814384214, + "learning_rate": 1.7563119759562545e-05, + "loss": 1.1643, "step": 5316 }, { - "epoch": 1.1149087859089957, - "grad_norm": 6.140798938753212, - "learning_rate": 1.4479185475793378e-05, - "loss": 0.6864, + "epoch": 0.7507766167749224, + "grad_norm": 3.8064634587656223, + "learning_rate": 1.7562122357601863e-05, + "loss": 1.046, "step": 5317 }, { - "epoch": 1.115118473474523, - "grad_norm": 6.785531510740913, - "learning_rate": 1.4477161279839429e-05, - "loss": 0.7987, + "epoch": 0.7509178198249082, + "grad_norm": 3.950865928092699, + "learning_rate": 1.7561124779900723e-05, + "loss": 1.2353, "step": 5318 }, { - "epoch": 1.1153281610400503, - "grad_norm": 6.927082267142096, - "learning_rate": 1.44751368544232e-05, - "loss": 0.836, + "epoch": 0.7510590228748941, + "grad_norm": 3.732554163554491, + "learning_rate": 1.756012702648231e-05, + "loss": 1.2607, "step": 5319 }, { - "epoch": 1.1155378486055776, - "grad_norm": 5.327226625807522, - "learning_rate": 1.4473112199648447e-05, - "loss": 0.6378, + "epoch": 0.75120022592488, + "grad_norm": 3.8534297469548, + "learning_rate": 1.755912909736981e-05, + "loss": 1.0448, "step": 5320 }, { - "epoch": 1.1157475361711051, - "grad_norm": 7.829926846860998, - "learning_rate": 1.447108731561893e-05, - "loss": 1.0554, + "epoch": 0.7513414289748659, + "grad_norm": 3.976469870990217, + "learning_rate": 1.7558130992586417e-05, + "loss": 0.8862, "step": 5321 }, { - "epoch": 1.1159572237366324, - "grad_norm": 7.559542275088372, - "learning_rate": 1.446906220243844e-05, - "loss": 1.0161, + "epoch": 0.7514826320248518, + "grad_norm": 4.338312663839415, + "learning_rate": 1.755713271215532e-05, + "loss": 1.3358, "step": 5322 }, { - "epoch": 1.1161669113021597, - "grad_norm": 9.054593005111048, - "learning_rate": 1.4467036860210756e-05, - "loss": 0.91, + "epoch": 0.7516238350748377, + "grad_norm": 3.3519702965714604, + "learning_rate": 1.7556134256099726e-05, + "loss": 0.9989, "step": 5323 }, { - "epoch": 1.1163765988676873, - "grad_norm": 7.6273218046261615, - "learning_rate": 1.4465011289039688e-05, - "loss": 0.885, + "epoch": 0.7517650381248235, + "grad_norm": 3.3925007376362726, + "learning_rate": 1.7555135624442833e-05, + "loss": 0.9702, "step": 5324 }, { - "epoch": 1.1165862864332146, - "grad_norm": 6.8352987778493235, - "learning_rate": 1.4462985489029047e-05, - "loss": 0.8547, + "epoch": 0.7519062411748094, + "grad_norm": 3.4231730859100593, + "learning_rate": 1.755413681720785e-05, + "loss": 0.8433, "step": 5325 }, { - "epoch": 1.1167959739987419, - "grad_norm": 8.296011463134029, - "learning_rate": 1.4460959460282657e-05, - "loss": 0.9299, + "epoch": 0.7520474442247953, + "grad_norm": 3.780981959948629, + "learning_rate": 1.755313783441799e-05, + "loss": 1.1882, "step": 5326 }, { - "epoch": 1.1170056615642692, - "grad_norm": 8.584075025442193, - "learning_rate": 1.4458933202904359e-05, - "loss": 1.1658, + "epoch": 0.7521886472747812, + "grad_norm": 3.512217577546415, + "learning_rate": 1.755213867609647e-05, + "loss": 0.9939, "step": 5327 }, { - "epoch": 1.1172153491297967, - "grad_norm": 6.960218398308803, - "learning_rate": 1.4456906716998001e-05, - "loss": 0.9651, + "epoch": 0.7523298503247671, + "grad_norm": 3.2403075274646946, + "learning_rate": 1.755113934226651e-05, + "loss": 0.9148, "step": 5328 }, { - "epoch": 1.117425036695324, - "grad_norm": 7.031369510005853, - "learning_rate": 1.4454880002667447e-05, - "loss": 0.7441, + "epoch": 0.7524710533747528, + "grad_norm": 3.7949361627032907, + "learning_rate": 1.755013983295133e-05, + "loss": 1.2714, "step": 5329 }, { - "epoch": 1.1176347242608513, - "grad_norm": 8.01794853655793, - "learning_rate": 1.4452853060016558e-05, - "loss": 0.7905, + "epoch": 0.7526122564247387, + "grad_norm": 3.90548510334465, + "learning_rate": 1.754914014817416e-05, + "loss": 1.0152, "step": 5330 }, { - "epoch": 1.1178444118263786, - "grad_norm": 7.262109132423744, - "learning_rate": 1.4450825889149234e-05, - "loss": 0.7935, + "epoch": 0.7527534594747246, + "grad_norm": 3.86075182380805, + "learning_rate": 1.7548140287958237e-05, + "loss": 1.0784, "step": 5331 }, { - "epoch": 1.1180540993919061, - "grad_norm": 6.354503037423062, - "learning_rate": 1.444879849016936e-05, - "loss": 0.7458, + "epoch": 0.7528946625247105, + "grad_norm": 3.9216945701964336, + "learning_rate": 1.754714025232679e-05, + "loss": 1.0934, "step": 5332 }, { - "epoch": 1.1182637869574334, - "grad_norm": 7.212829643299545, - "learning_rate": 1.4446770863180848e-05, - "loss": 0.7553, + "epoch": 0.7530358655746964, + "grad_norm": 3.393529562853154, + "learning_rate": 1.7546140041303065e-05, + "loss": 0.9881, "step": 5333 }, { - "epoch": 1.1184734745229608, - "grad_norm": 7.188151678701964, - "learning_rate": 1.4444743008287616e-05, - "loss": 0.7569, + "epoch": 0.7531770686246823, + "grad_norm": 3.770490748168645, + "learning_rate": 1.7545139654910302e-05, + "loss": 1.0408, "step": 5334 }, { - "epoch": 1.118683162088488, - "grad_norm": 8.4531894189342, - "learning_rate": 1.44427149255936e-05, - "loss": 1.016, + "epoch": 0.7533182716746681, + "grad_norm": 4.168491592260939, + "learning_rate": 1.7544139093171754e-05, + "loss": 1.1088, "step": 5335 }, { - "epoch": 1.1188928496540156, - "grad_norm": 7.2243409701215064, - "learning_rate": 1.4440686615202732e-05, - "loss": 1.0894, + "epoch": 0.753459474724654, + "grad_norm": 4.582936892355896, + "learning_rate": 1.7543138356110675e-05, + "loss": 1.2007, "step": 5336 }, { - "epoch": 1.119102537219543, - "grad_norm": 7.06649757413754, - "learning_rate": 1.4438658077218979e-05, - "loss": 0.7815, + "epoch": 0.7536006777746399, + "grad_norm": 4.370261802808565, + "learning_rate": 1.7542137443750313e-05, + "loss": 1.1121, "step": 5337 }, { - "epoch": 1.1193122247850702, - "grad_norm": 6.367088561664283, - "learning_rate": 1.4436629311746295e-05, - "loss": 0.6352, + "epoch": 0.7537418808246258, + "grad_norm": 4.10197373058056, + "learning_rate": 1.7541136356113934e-05, + "loss": 1.0637, "step": 5338 }, { - "epoch": 1.1195219123505975, - "grad_norm": 5.582258342998007, - "learning_rate": 1.443460031888867e-05, - "loss": 0.6694, + "epoch": 0.7538830838746117, + "grad_norm": 3.750455648501595, + "learning_rate": 1.7540135093224803e-05, + "loss": 1.1916, "step": 5339 }, { - "epoch": 1.119731599916125, - "grad_norm": 9.142133339711851, - "learning_rate": 1.4432571098750083e-05, - "loss": 1.0844, + "epoch": 0.7540242869245976, + "grad_norm": 3.049261729741092, + "learning_rate": 1.753913365510619e-05, + "loss": 0.84, "step": 5340 }, { - "epoch": 1.1199412874816523, - "grad_norm": 7.036624129810012, - "learning_rate": 1.4430541651434541e-05, - "loss": 0.7608, + "epoch": 0.7541654899745834, + "grad_norm": 2.9758741506028565, + "learning_rate": 1.753813204178137e-05, + "loss": 0.8051, "step": 5341 }, { - "epoch": 1.1201509750471796, - "grad_norm": 6.268651703016797, - "learning_rate": 1.4428511977046056e-05, - "loss": 0.786, + "epoch": 0.7543066930245693, + "grad_norm": 3.1747014626869174, + "learning_rate": 1.7537130253273613e-05, + "loss": 1.0031, "step": 5342 }, { - "epoch": 1.1203606626127072, - "grad_norm": 7.508689757916, - "learning_rate": 1.442648207568865e-05, - "loss": 0.917, + "epoch": 0.7544478960745552, + "grad_norm": 3.8712398508181134, + "learning_rate": 1.7536128289606206e-05, + "loss": 1.0987, "step": 5343 }, { - "epoch": 1.1205703501782345, - "grad_norm": 7.450447352270454, - "learning_rate": 1.442445194746636e-05, - "loss": 0.7783, + "epoch": 0.7545890991245411, + "grad_norm": 4.679282789397256, + "learning_rate": 1.7535126150802428e-05, + "loss": 1.4102, "step": 5344 }, { - "epoch": 1.1207800377437618, - "grad_norm": 8.498621452351479, - "learning_rate": 1.4422421592483236e-05, - "loss": 1.022, + "epoch": 0.754730302174527, + "grad_norm": 4.030503397269633, + "learning_rate": 1.7534123836885576e-05, + "loss": 0.958, "step": 5345 }, { - "epoch": 1.120989725309289, - "grad_norm": 7.928145217775752, - "learning_rate": 1.4420391010843333e-05, - "loss": 1.0667, + "epoch": 0.7548715052245129, + "grad_norm": 3.5641902534767813, + "learning_rate": 1.753312134787894e-05, + "loss": 0.945, "step": 5346 }, { - "epoch": 1.1211994128748166, - "grad_norm": 6.888034379791948, - "learning_rate": 1.4418360202650723e-05, - "loss": 0.9739, + "epoch": 0.7550127082744987, + "grad_norm": 3.9170753611460443, + "learning_rate": 1.7532118683805816e-05, + "loss": 1.1039, "step": 5347 }, { - "epoch": 1.121409100440344, - "grad_norm": 7.147667928868137, - "learning_rate": 1.441632916800949e-05, - "loss": 0.7517, + "epoch": 0.7551539113244846, + "grad_norm": 4.175391321711326, + "learning_rate": 1.7531115844689505e-05, + "loss": 0.9655, "step": 5348 }, { - "epoch": 1.1216187880058712, - "grad_norm": 6.788044712192266, - "learning_rate": 1.4414297907023729e-05, - "loss": 0.8655, + "epoch": 0.7552951143744705, + "grad_norm": 4.56019463921606, + "learning_rate": 1.7530112830553318e-05, + "loss": 1.0388, "step": 5349 }, { - "epoch": 1.1218284755713985, - "grad_norm": 7.312882044839968, - "learning_rate": 1.4412266419797543e-05, - "loss": 0.8724, + "epoch": 0.7554363174244564, + "grad_norm": 3.7962916163520783, + "learning_rate": 1.7529109641420557e-05, + "loss": 1.0387, "step": 5350 }, { - "epoch": 1.122038163136926, - "grad_norm": 6.9142472127982595, - "learning_rate": 1.441023470643505e-05, - "loss": 0.7792, + "epoch": 0.7555775204744423, + "grad_norm": 4.760329118159397, + "learning_rate": 1.7528106277314544e-05, + "loss": 1.1884, "step": 5351 }, { - "epoch": 1.1222478507024534, - "grad_norm": 7.841383018594161, - "learning_rate": 1.440820276704038e-05, - "loss": 0.7208, + "epoch": 0.7557187235244281, + "grad_norm": 3.6578711328040323, + "learning_rate": 1.7527102738258588e-05, + "loss": 1.1826, "step": 5352 }, { - "epoch": 1.1224575382679807, - "grad_norm": 6.9939743963396035, - "learning_rate": 1.4406170601717673e-05, - "loss": 0.7733, + "epoch": 0.755859926574414, + "grad_norm": 3.860137271061851, + "learning_rate": 1.7526099024276017e-05, + "loss": 0.831, "step": 5353 }, { - "epoch": 1.122667225833508, - "grad_norm": 6.554386105274742, - "learning_rate": 1.4404138210571077e-05, - "loss": 0.6831, + "epoch": 0.7560011296243999, + "grad_norm": 3.5821823596119864, + "learning_rate": 1.7525095135390152e-05, + "loss": 0.8613, "step": 5354 }, { - "epoch": 1.1228769133990355, - "grad_norm": 7.098724082310701, - "learning_rate": 1.4402105593704762e-05, - "loss": 0.7546, + "epoch": 0.7561423326743858, + "grad_norm": 3.5438383645336233, + "learning_rate": 1.7524091071624333e-05, + "loss": 1.0042, "step": 5355 }, { - "epoch": 1.1230866009645628, - "grad_norm": 7.53571305583368, - "learning_rate": 1.44000727512229e-05, - "loss": 0.8781, + "epoch": 0.7562835357243717, + "grad_norm": 4.007601416335219, + "learning_rate": 1.752308683300188e-05, + "loss": 1.1738, "step": 5356 }, { - "epoch": 1.1232962885300901, - "grad_norm": 7.882009548978248, - "learning_rate": 1.4398039683229677e-05, - "loss": 0.8757, + "epoch": 0.7564247387743576, + "grad_norm": 3.74948697446228, + "learning_rate": 1.752208241954614e-05, + "loss": 0.9997, "step": 5357 }, { - "epoch": 1.1235059760956174, - "grad_norm": 6.815514278255862, - "learning_rate": 1.4396006389829296e-05, - "loss": 0.8982, + "epoch": 0.7565659418243434, + "grad_norm": 3.4818225770743085, + "learning_rate": 1.7521077831280453e-05, + "loss": 0.9952, "step": 5358 }, { - "epoch": 1.123715663661145, - "grad_norm": 7.538810848325119, - "learning_rate": 1.4393972871125956e-05, - "loss": 0.8587, + "epoch": 0.7567071448743293, + "grad_norm": 3.1569259826482283, + "learning_rate": 1.7520073068228166e-05, + "loss": 0.853, "step": 5359 }, { - "epoch": 1.1239253512266723, - "grad_norm": 8.58280638530999, - "learning_rate": 1.4391939127223893e-05, - "loss": 0.884, + "epoch": 0.7568483479243152, + "grad_norm": 3.0880340703628497, + "learning_rate": 1.751906813041263e-05, + "loss": 0.8761, "step": 5360 }, { - "epoch": 1.1241350387921996, - "grad_norm": 7.060709874657798, - "learning_rate": 1.4389905158227328e-05, - "loss": 0.7607, + "epoch": 0.7569895509743011, + "grad_norm": 3.330844088529482, + "learning_rate": 1.7518063017857196e-05, + "loss": 1.0737, "step": 5361 }, { - "epoch": 1.124344726357727, - "grad_norm": 10.108765781270456, - "learning_rate": 1.438787096424051e-05, - "loss": 1.2638, + "epoch": 0.757130754024287, + "grad_norm": 3.2035100808879595, + "learning_rate": 1.7517057730585224e-05, + "loss": 0.7782, "step": 5362 }, { - "epoch": 1.1245544139232544, - "grad_norm": 6.281611799856333, - "learning_rate": 1.4385836545367695e-05, - "loss": 0.8481, + "epoch": 0.7572719570742728, + "grad_norm": 3.618181341571531, + "learning_rate": 1.7516052268620076e-05, + "loss": 1.2048, "step": 5363 }, { - "epoch": 1.1247641014887817, - "grad_norm": 7.298175324749741, - "learning_rate": 1.4383801901713153e-05, - "loss": 0.9415, + "epoch": 0.7574131601242586, + "grad_norm": 3.3004561402653505, + "learning_rate": 1.751504663198512e-05, + "loss": 0.8033, "step": 5364 }, { - "epoch": 1.124973789054309, - "grad_norm": 8.496575626212818, - "learning_rate": 1.4381767033381155e-05, - "loss": 1.0822, + "epoch": 0.7575543631742445, + "grad_norm": 3.638346055797656, + "learning_rate": 1.751404082070373e-05, + "loss": 1.0316, "step": 5365 }, { - "epoch": 1.1251834766198365, - "grad_norm": 9.117897448564515, - "learning_rate": 1.4379731940476001e-05, - "loss": 0.7974, + "epoch": 0.7576955662242304, + "grad_norm": 3.9447209242817425, + "learning_rate": 1.751303483479927e-05, + "loss": 1.2032, "step": 5366 }, { - "epoch": 1.1253931641853638, - "grad_norm": 5.7202721694242475, - "learning_rate": 1.4377696623101985e-05, - "loss": 0.6565, + "epoch": 0.7578367692742163, + "grad_norm": 3.8317708009186355, + "learning_rate": 1.7512028674295127e-05, + "loss": 0.9018, "step": 5367 }, { - "epoch": 1.1256028517508911, - "grad_norm": 7.9782853792509, - "learning_rate": 1.4375661081363427e-05, - "loss": 0.9567, + "epoch": 0.7579779723242022, + "grad_norm": 3.2837921856125076, + "learning_rate": 1.7511022339214682e-05, + "loss": 0.87, "step": 5368 }, { - "epoch": 1.1258125393164184, - "grad_norm": 6.643879972056351, - "learning_rate": 1.437362531536465e-05, - "loss": 0.7296, + "epoch": 0.758119175374188, + "grad_norm": 3.2129933919856666, + "learning_rate": 1.7510015829581325e-05, + "loss": 0.9365, "step": 5369 }, { - "epoch": 1.126022226881946, - "grad_norm": 8.583335207889562, - "learning_rate": 1.4371589325209989e-05, - "loss": 0.9101, + "epoch": 0.7582603784241739, + "grad_norm": 4.026588059677428, + "learning_rate": 1.750900914541844e-05, + "loss": 1.3919, "step": 5370 }, { - "epoch": 1.1262319144474733, - "grad_norm": 6.836243950650538, - "learning_rate": 1.4369553111003792e-05, - "loss": 0.9839, + "epoch": 0.7584015814741598, + "grad_norm": 3.6047314512897675, + "learning_rate": 1.7508002286749426e-05, + "loss": 0.9642, "step": 5371 }, { - "epoch": 1.1264416020130006, - "grad_norm": 7.320035137857756, - "learning_rate": 1.4367516672850422e-05, - "loss": 0.9172, + "epoch": 0.7585427845241457, + "grad_norm": 3.4349750066376856, + "learning_rate": 1.750699525359768e-05, + "loss": 1.052, "step": 5372 }, { - "epoch": 1.1266512895785281, - "grad_norm": 8.798982808132612, - "learning_rate": 1.4365480010854245e-05, - "loss": 0.8326, + "epoch": 0.7586839875741316, + "grad_norm": 2.8483640155252354, + "learning_rate": 1.750598804598661e-05, + "loss": 0.8874, "step": 5373 }, { - "epoch": 1.1268609771440554, - "grad_norm": 6.526478788559566, - "learning_rate": 1.4363443125119648e-05, - "loss": 0.6422, + "epoch": 0.7588251906241175, + "grad_norm": 3.1331039715751166, + "learning_rate": 1.7504980663939614e-05, + "loss": 0.8907, "step": 5374 }, { - "epoch": 1.1270706647095827, - "grad_norm": 6.707836526018642, - "learning_rate": 1.436140601575102e-05, - "loss": 0.8116, + "epoch": 0.7589663936741033, + "grad_norm": 3.8786223690064556, + "learning_rate": 1.7503973107480112e-05, + "loss": 0.9395, "step": 5375 }, { - "epoch": 1.12728035227511, - "grad_norm": 7.48048492036471, - "learning_rate": 1.435936868285277e-05, - "loss": 1.0693, + "epoch": 0.7591075967240892, + "grad_norm": 3.5313911154494693, + "learning_rate": 1.7502965376631515e-05, + "loss": 0.789, "step": 5376 }, { - "epoch": 1.1274900398406373, - "grad_norm": 6.426645297364127, - "learning_rate": 1.4357331126529314e-05, - "loss": 0.7899, + "epoch": 0.7592487997740751, + "grad_norm": 3.440949995923055, + "learning_rate": 1.7501957471417242e-05, + "loss": 0.9767, "step": 5377 }, { - "epoch": 1.1276997274061649, - "grad_norm": 7.930337098484812, - "learning_rate": 1.4355293346885081e-05, - "loss": 1.1519, + "epoch": 0.759390002824061, + "grad_norm": 3.5194118101119347, + "learning_rate": 1.750094939186072e-05, + "loss": 1.0327, "step": 5378 }, { - "epoch": 1.1279094149716922, - "grad_norm": 8.089741755707838, - "learning_rate": 1.435325534402451e-05, - "loss": 0.8626, + "epoch": 0.7595312058740469, + "grad_norm": 3.238977218838803, + "learning_rate": 1.749994113798537e-05, + "loss": 1.0892, "step": 5379 }, { - "epoch": 1.1281191025372195, - "grad_norm": 9.137835228507097, - "learning_rate": 1.4351217118052055e-05, - "loss": 1.113, + "epoch": 0.7596724089240328, + "grad_norm": 3.928450417173385, + "learning_rate": 1.749893270981463e-05, + "loss": 0.9276, "step": 5380 }, { - "epoch": 1.128328790102747, - "grad_norm": 7.1065101296877735, - "learning_rate": 1.4349178669072172e-05, - "loss": 0.9178, + "epoch": 0.7598136119740186, + "grad_norm": 3.0767471377721747, + "learning_rate": 1.7497924107371932e-05, + "loss": 0.8921, "step": 5381 }, { - "epoch": 1.1285384776682743, - "grad_norm": 7.4689392343693255, - "learning_rate": 1.4347139997189341e-05, - "loss": 1.0464, + "epoch": 0.7599548150240045, + "grad_norm": 3.418801860647783, + "learning_rate": 1.7496915330680713e-05, + "loss": 1.041, "step": 5382 }, { - "epoch": 1.1287481652338016, - "grad_norm": 6.187891644006432, - "learning_rate": 1.4345101102508046e-05, - "loss": 0.6993, + "epoch": 0.7600960180739904, + "grad_norm": 3.0620524424025812, + "learning_rate": 1.7495906379764423e-05, + "loss": 0.7479, "step": 5383 }, { - "epoch": 1.128957852799329, - "grad_norm": 6.240077443306164, - "learning_rate": 1.434306198513278e-05, - "loss": 0.6045, + "epoch": 0.7602372211239763, + "grad_norm": 4.275355539634467, + "learning_rate": 1.7494897254646503e-05, + "loss": 1.114, "step": 5384 }, { - "epoch": 1.1291675403648564, - "grad_norm": 5.174570112136311, - "learning_rate": 1.4341022645168058e-05, - "loss": 0.7004, + "epoch": 0.7603784241739622, + "grad_norm": 4.048008794177169, + "learning_rate": 1.749388795535041e-05, + "loss": 1.1489, "step": 5385 }, { - "epoch": 1.1293772279303838, - "grad_norm": 8.060676888391821, - "learning_rate": 1.4338983082718395e-05, - "loss": 0.9166, + "epoch": 0.7605196272239481, + "grad_norm": 3.818522516299592, + "learning_rate": 1.7492878481899595e-05, + "loss": 1.126, "step": 5386 }, { - "epoch": 1.129586915495911, - "grad_norm": 6.777680341403954, - "learning_rate": 1.4336943297888324e-05, - "loss": 0.8038, + "epoch": 0.7606608302739339, + "grad_norm": 3.27184299819054, + "learning_rate": 1.7491868834317523e-05, + "loss": 1.049, "step": 5387 }, { - "epoch": 1.1297966030614384, - "grad_norm": 7.80021177986887, - "learning_rate": 1.4334903290782384e-05, - "loss": 1.0197, + "epoch": 0.7608020333239198, + "grad_norm": 2.8113222162309444, + "learning_rate": 1.7490859012627652e-05, + "loss": 0.7611, "step": 5388 }, { - "epoch": 1.130006290626966, - "grad_norm": 8.0601547893169, - "learning_rate": 1.4332863061505136e-05, - "loss": 1.1241, + "epoch": 0.7609432363739057, + "grad_norm": 3.9440174035547435, + "learning_rate": 1.7489849016853452e-05, + "loss": 1.0258, "step": 5389 }, { - "epoch": 1.1302159781924932, - "grad_norm": 5.897357256287947, - "learning_rate": 1.433082261016114e-05, - "loss": 0.7745, + "epoch": 0.7610844394238916, + "grad_norm": 4.037069280774431, + "learning_rate": 1.7488838847018397e-05, + "loss": 1.1217, "step": 5390 }, { - "epoch": 1.1304256657580205, - "grad_norm": 7.275952764466097, - "learning_rate": 1.4328781936854973e-05, - "loss": 0.9076, + "epoch": 0.7612256424738775, + "grad_norm": 3.4308820595533525, + "learning_rate": 1.7487828503145962e-05, + "loss": 1.021, "step": 5391 }, { - "epoch": 1.130635353323548, - "grad_norm": 7.298029037435003, - "learning_rate": 1.4326741041691221e-05, - "loss": 0.8781, + "epoch": 0.7613668455238634, + "grad_norm": 3.317210516151756, + "learning_rate": 1.7486817985259627e-05, + "loss": 0.9406, "step": 5392 }, { - "epoch": 1.1308450408890753, - "grad_norm": 6.690345458777128, - "learning_rate": 1.4324699924774493e-05, - "loss": 0.9666, + "epoch": 0.7615080485738492, + "grad_norm": 3.339148874346066, + "learning_rate": 1.7485807293382872e-05, + "loss": 0.9242, "step": 5393 }, { - "epoch": 1.1310547284546026, - "grad_norm": 8.691288924121832, - "learning_rate": 1.4322658586209386e-05, - "loss": 0.8209, + "epoch": 0.7616492516238351, + "grad_norm": 3.104794939958571, + "learning_rate": 1.7484796427539194e-05, + "loss": 0.962, "step": 5394 }, { - "epoch": 1.13126441602013, - "grad_norm": 8.053075990640624, - "learning_rate": 1.4320617026100534e-05, - "loss": 0.8565, + "epoch": 0.761790454673821, + "grad_norm": 3.1822786717850677, + "learning_rate": 1.7483785387752075e-05, + "loss": 0.9293, "step": 5395 }, { - "epoch": 1.1314741035856573, - "grad_norm": 7.633574091009824, - "learning_rate": 1.4318575244552564e-05, - "loss": 0.7277, + "epoch": 0.7619316577238069, + "grad_norm": 3.7753569725066867, + "learning_rate": 1.7482774174045017e-05, + "loss": 1.0524, "step": 5396 }, { - "epoch": 1.1316837911511848, - "grad_norm": 8.938959265792771, - "learning_rate": 1.4316533241670122e-05, - "loss": 1.2485, + "epoch": 0.7620728607737927, + "grad_norm": 3.656345856196439, + "learning_rate": 1.7481762786441515e-05, + "loss": 1.0434, "step": 5397 }, { - "epoch": 1.131893478716712, - "grad_norm": 7.868265820560508, - "learning_rate": 1.4314491017557865e-05, - "loss": 0.8427, + "epoch": 0.7622140638237785, + "grad_norm": 3.1266808820681122, + "learning_rate": 1.7480751224965083e-05, + "loss": 1.0026, "step": 5398 }, { - "epoch": 1.1321031662822394, - "grad_norm": 7.3085721225612215, - "learning_rate": 1.431244857232046e-05, - "loss": 0.8427, + "epoch": 0.7623552668737644, + "grad_norm": 3.8877828809385897, + "learning_rate": 1.7479739489639218e-05, + "loss": 1.1445, "step": 5399 }, { - "epoch": 1.132312853847767, - "grad_norm": 7.119899870962551, - "learning_rate": 1.4310405906062585e-05, - "loss": 0.7924, + "epoch": 0.7624964699237503, + "grad_norm": 3.426374720496311, + "learning_rate": 1.747872758048744e-05, + "loss": 1.0968, "step": 5400 }, { - "epoch": 1.1325225414132942, - "grad_norm": 8.093826268501388, - "learning_rate": 1.4308363018888937e-05, - "loss": 0.8938, + "epoch": 0.7626376729737362, + "grad_norm": 3.7151113864288843, + "learning_rate": 1.7477715497533263e-05, + "loss": 1.2411, "step": 5401 }, { - "epoch": 1.1327322289788215, - "grad_norm": 7.320233829334078, - "learning_rate": 1.4306319910904204e-05, - "loss": 0.9461, + "epoch": 0.7627788760237221, + "grad_norm": 4.3837234890510155, + "learning_rate": 1.747670324080021e-05, + "loss": 1.1182, "step": 5402 }, { - "epoch": 1.1329419165443488, - "grad_norm": 7.880379328471395, - "learning_rate": 1.4304276582213113e-05, - "loss": 0.8792, + "epoch": 0.762920079073708, + "grad_norm": 3.704644420485445, + "learning_rate": 1.7475690810311798e-05, + "loss": 1.0697, "step": 5403 }, { - "epoch": 1.1331516041098764, - "grad_norm": 7.882877956852063, - "learning_rate": 1.4302233032920378e-05, - "loss": 0.8881, + "epoch": 0.7630612821236938, + "grad_norm": 3.1797511884255023, + "learning_rate": 1.7474678206091563e-05, + "loss": 0.8888, "step": 5404 }, { - "epoch": 1.1333612916754037, - "grad_norm": 6.806324842836676, - "learning_rate": 1.430018926313074e-05, - "loss": 0.8469, + "epoch": 0.7632024851736797, + "grad_norm": 3.6668347240063075, + "learning_rate": 1.747366542816303e-05, + "loss": 1.1185, "step": 5405 }, { - "epoch": 1.133570979240931, - "grad_norm": 7.450142492081317, - "learning_rate": 1.4298145272948945e-05, - "loss": 0.7573, + "epoch": 0.7633436882236656, + "grad_norm": 3.877043989708006, + "learning_rate": 1.7472652476549747e-05, + "loss": 1.0538, "step": 5406 }, { - "epoch": 1.1337806668064583, - "grad_norm": 7.566267394090057, - "learning_rate": 1.4296101062479745e-05, - "loss": 0.7798, + "epoch": 0.7634848912736515, + "grad_norm": 2.975033408962581, + "learning_rate": 1.7471639351275243e-05, + "loss": 0.9002, "step": 5407 }, { - "epoch": 1.1339903543719858, - "grad_norm": 7.125806302742905, - "learning_rate": 1.4294056631827919e-05, - "loss": 0.7141, + "epoch": 0.7636260943236374, + "grad_norm": 3.7696452402881255, + "learning_rate": 1.7470626052363068e-05, + "loss": 1.0987, "step": 5408 }, { - "epoch": 1.1342000419375131, - "grad_norm": 8.495944212331963, - "learning_rate": 1.4292011981098242e-05, - "loss": 1.0081, + "epoch": 0.7637672973736233, + "grad_norm": 3.616400703390129, + "learning_rate": 1.746961257983677e-05, + "loss": 0.9001, "step": 5409 }, { - "epoch": 1.1344097295030404, - "grad_norm": 7.441589839803221, - "learning_rate": 1.4289967110395507e-05, - "loss": 0.8588, + "epoch": 0.7639085004236091, + "grad_norm": 5.079507275996488, + "learning_rate": 1.74685989337199e-05, + "loss": 1.0879, "step": 5410 }, { - "epoch": 1.134619417068568, - "grad_norm": 6.575428153209504, - "learning_rate": 1.4287922019824516e-05, - "loss": 0.7162, + "epoch": 0.764049703473595, + "grad_norm": 3.758807796492364, + "learning_rate": 1.746758511403602e-05, + "loss": 0.9876, "step": 5411 }, { - "epoch": 1.1348291046340953, - "grad_norm": 7.176187410836672, - "learning_rate": 1.4285876709490085e-05, - "loss": 1.0302, + "epoch": 0.7641909065235809, + "grad_norm": 4.315461009830911, + "learning_rate": 1.7466571120808684e-05, + "loss": 1.2768, "step": 5412 }, { - "epoch": 1.1350387921996226, - "grad_norm": 8.880645963740191, - "learning_rate": 1.4283831179497039e-05, - "loss": 1.0544, + "epoch": 0.7643321095735668, + "grad_norm": 3.904467858299182, + "learning_rate": 1.746555695406146e-05, + "loss": 0.962, "step": 5413 }, { - "epoch": 1.1352484797651499, - "grad_norm": 6.43805237999645, - "learning_rate": 1.4281785429950214e-05, - "loss": 0.7447, + "epoch": 0.7644733126235527, + "grad_norm": 4.048332341990393, + "learning_rate": 1.746454261381792e-05, + "loss": 1.061, "step": 5414 }, { - "epoch": 1.1354581673306772, - "grad_norm": 7.11316350625244, - "learning_rate": 1.4279739460954462e-05, - "loss": 0.8329, + "epoch": 0.7646145156735386, + "grad_norm": 2.89725901657611, + "learning_rate": 1.746352810010163e-05, + "loss": 0.7986, "step": 5415 }, { - "epoch": 1.1356678548962047, - "grad_norm": 7.289649089850713, - "learning_rate": 1.4277693272614638e-05, - "loss": 0.892, + "epoch": 0.7647557187235244, + "grad_norm": 3.675640570372638, + "learning_rate": 1.7462513412936168e-05, + "loss": 1.0599, "step": 5416 }, { - "epoch": 1.135877542461732, - "grad_norm": 7.6424405921300576, - "learning_rate": 1.4275646865035615e-05, - "loss": 0.8431, + "epoch": 0.7648969217735103, + "grad_norm": 4.130022945934367, + "learning_rate": 1.746149855234512e-05, + "loss": 1.2213, "step": 5417 }, { - "epoch": 1.1360872300272593, - "grad_norm": 6.440815832464622, - "learning_rate": 1.4273600238322276e-05, - "loss": 0.641, + "epoch": 0.7650381248234962, + "grad_norm": 4.340604375697738, + "learning_rate": 1.7460483518352068e-05, + "loss": 1.0833, "step": 5418 }, { - "epoch": 1.1362969175927868, - "grad_norm": 7.600968965507533, - "learning_rate": 1.427155339257951e-05, - "loss": 0.8979, + "epoch": 0.7651793278734821, + "grad_norm": 4.234077591550618, + "learning_rate": 1.74594683109806e-05, + "loss": 1.1927, "step": 5419 }, { - "epoch": 1.1365066051583141, - "grad_norm": 7.737941670705575, - "learning_rate": 1.4269506327912223e-05, - "loss": 1.13, + "epoch": 0.765320530923468, + "grad_norm": 3.6938625175461, + "learning_rate": 1.745845293025431e-05, + "loss": 1.0849, "step": 5420 }, { - "epoch": 1.1367162927238414, - "grad_norm": 7.535933877638378, - "learning_rate": 1.4267459044425336e-05, - "loss": 0.9126, + "epoch": 0.7654617339734539, + "grad_norm": 3.174687021745017, + "learning_rate": 1.7457437376196796e-05, + "loss": 0.887, "step": 5421 }, { - "epoch": 1.1369259802893688, - "grad_norm": 7.42295276144768, - "learning_rate": 1.4265411542223771e-05, - "loss": 0.7701, + "epoch": 0.7656029370234397, + "grad_norm": 3.3389908803388235, + "learning_rate": 1.7456421648831658e-05, + "loss": 1.0011, "step": 5422 }, { - "epoch": 1.1371356678548963, - "grad_norm": 8.979993618462947, - "learning_rate": 1.426336382141246e-05, - "loss": 0.9879, + "epoch": 0.7657441400734256, + "grad_norm": 3.253803332742542, + "learning_rate": 1.74554057481825e-05, + "loss": 0.9493, "step": 5423 }, { - "epoch": 1.1373453554204236, - "grad_norm": 7.87568916179552, - "learning_rate": 1.4261315882096365e-05, - "loss": 0.8786, + "epoch": 0.7658853431234115, + "grad_norm": 3.704802917420437, + "learning_rate": 1.745438967427293e-05, + "loss": 1.0061, "step": 5424 }, { - "epoch": 1.137555042985951, - "grad_norm": 6.525528916586531, - "learning_rate": 1.425926772438044e-05, - "loss": 0.7614, + "epoch": 0.7660265461733974, + "grad_norm": 4.484934165129177, + "learning_rate": 1.745337342712657e-05, + "loss": 1.2865, "step": 5425 }, { - "epoch": 1.1377647305514782, - "grad_norm": 6.422331421067672, - "learning_rate": 1.4257219348369655e-05, - "loss": 0.6883, + "epoch": 0.7661677492233833, + "grad_norm": 3.509162679541262, + "learning_rate": 1.7452357006767026e-05, + "loss": 1.2294, "step": 5426 }, { - "epoch": 1.1379744181170057, - "grad_norm": 7.327133505357456, - "learning_rate": 1.4255170754168991e-05, - "loss": 0.8042, + "epoch": 0.7663089522733691, + "grad_norm": 3.7455921833957726, + "learning_rate": 1.7451340413217925e-05, + "loss": 1.0694, "step": 5427 }, { - "epoch": 1.138184105682533, - "grad_norm": 6.436046421043193, - "learning_rate": 1.4253121941883453e-05, - "loss": 0.7845, + "epoch": 0.766450155323355, + "grad_norm": 2.9745703764914815, + "learning_rate": 1.745032364650289e-05, + "loss": 0.8589, "step": 5428 }, { - "epoch": 1.1383937932480603, - "grad_norm": 7.847622278661194, - "learning_rate": 1.4251072911618034e-05, - "loss": 0.6945, + "epoch": 0.7665913583733409, + "grad_norm": 3.41854172843559, + "learning_rate": 1.744930670664555e-05, + "loss": 1.0238, "step": 5429 }, { - "epoch": 1.1386034808135879, - "grad_norm": 8.087800338874967, - "learning_rate": 1.4249023663477756e-05, - "loss": 0.9851, + "epoch": 0.7667325614233268, + "grad_norm": 4.339301578153021, + "learning_rate": 1.744828959366954e-05, + "loss": 1.3126, "step": 5430 }, { - "epoch": 1.1388131683791152, - "grad_norm": 6.861010799839549, - "learning_rate": 1.4246974197567646e-05, - "loss": 0.5995, + "epoch": 0.7668737644733126, + "grad_norm": 3.764292027619486, + "learning_rate": 1.74472723075985e-05, + "loss": 1.1256, "step": 5431 }, { - "epoch": 1.1390228559446425, - "grad_norm": 6.665686617124723, - "learning_rate": 1.4244924513992744e-05, - "loss": 0.9058, + "epoch": 0.7670149675232985, + "grad_norm": 3.696481527413238, + "learning_rate": 1.744625484845606e-05, + "loss": 1.2617, "step": 5432 }, { - "epoch": 1.1392325435101698, - "grad_norm": 7.153367845813502, - "learning_rate": 1.4242874612858095e-05, - "loss": 0.7087, + "epoch": 0.7671561705732843, + "grad_norm": 4.644200044629729, + "learning_rate": 1.7445237216265877e-05, + "loss": 1.3092, "step": 5433 }, { - "epoch": 1.139442231075697, - "grad_norm": 7.148266461937071, - "learning_rate": 1.4240824494268766e-05, - "loss": 0.7308, + "epoch": 0.7672973736232702, + "grad_norm": 3.2806309193983894, + "learning_rate": 1.7444219411051598e-05, + "loss": 0.7684, "step": 5434 }, { - "epoch": 1.1396519186412246, - "grad_norm": 6.460811251948656, - "learning_rate": 1.4238774158329825e-05, - "loss": 0.7916, + "epoch": 0.7674385766732561, + "grad_norm": 4.518475474337838, + "learning_rate": 1.7443201432836874e-05, + "loss": 1.0505, "step": 5435 }, { - "epoch": 1.139861606206752, - "grad_norm": 7.631437728851122, - "learning_rate": 1.4236723605146357e-05, - "loss": 0.9981, + "epoch": 0.767579779723242, + "grad_norm": 3.7840102976092695, + "learning_rate": 1.744218328164536e-05, + "loss": 1.0776, "step": 5436 }, { - "epoch": 1.1400712937722792, - "grad_norm": 8.402694519254885, - "learning_rate": 1.4234672834823455e-05, - "loss": 1.0714, + "epoch": 0.7677209827732279, + "grad_norm": 2.9856873075148673, + "learning_rate": 1.7441164957500728e-05, + "loss": 0.9342, "step": 5437 }, { - "epoch": 1.1402809813378068, - "grad_norm": 7.574711241112634, - "learning_rate": 1.4232621847466226e-05, - "loss": 0.8949, + "epoch": 0.7678621858232137, + "grad_norm": 3.1755142624079937, + "learning_rate": 1.744014646042663e-05, + "loss": 0.9841, "step": 5438 }, { - "epoch": 1.140490668903334, - "grad_norm": 6.289394414033147, - "learning_rate": 1.4230570643179788e-05, - "loss": 0.6595, + "epoch": 0.7680033888731996, + "grad_norm": 3.345074315074015, + "learning_rate": 1.7439127790446743e-05, + "loss": 0.876, "step": 5439 }, { - "epoch": 1.1407003564688614, - "grad_norm": 8.17520562098901, - "learning_rate": 1.4228519222069263e-05, - "loss": 0.7566, + "epoch": 0.7681445919231855, + "grad_norm": 3.751632042020778, + "learning_rate": 1.7438108947584737e-05, + "loss": 1.0794, "step": 5440 }, { - "epoch": 1.1409100440343887, - "grad_norm": 6.496316717579892, - "learning_rate": 1.4226467584239798e-05, - "loss": 0.802, + "epoch": 0.7682857949731714, + "grad_norm": 3.000512645335439, + "learning_rate": 1.7437089931864292e-05, + "loss": 0.8496, "step": 5441 }, { - "epoch": 1.1411197315999162, - "grad_norm": 7.5708734055535265, - "learning_rate": 1.4224415729796537e-05, - "loss": 0.7837, + "epoch": 0.7684269980231573, + "grad_norm": 4.340099870446398, + "learning_rate": 1.7436070743309093e-05, + "loss": 1.2383, "step": 5442 }, { - "epoch": 1.1413294191654435, - "grad_norm": 7.642492943105506, - "learning_rate": 1.4222363658844644e-05, - "loss": 0.9662, + "epoch": 0.7685682010731432, + "grad_norm": 4.187557147082218, + "learning_rate": 1.7435051381942817e-05, + "loss": 1.243, "step": 5443 }, { - "epoch": 1.1415391067309708, - "grad_norm": 8.61439368358466, - "learning_rate": 1.4220311371489288e-05, - "loss": 1.0253, + "epoch": 0.768709404123129, + "grad_norm": 3.7940743329894837, + "learning_rate": 1.743403184778916e-05, + "loss": 0.9338, "step": 5444 }, { - "epoch": 1.1417487942964981, - "grad_norm": 7.218490475421361, - "learning_rate": 1.4218258867835659e-05, - "loss": 0.9208, + "epoch": 0.7688506071731149, + "grad_norm": 3.7226482026615093, + "learning_rate": 1.743301214087181e-05, + "loss": 1.0496, "step": 5445 }, { - "epoch": 1.1419584818620256, - "grad_norm": 7.367251894731128, - "learning_rate": 1.421620614798894e-05, - "loss": 0.8471, + "epoch": 0.7689918102231008, + "grad_norm": 4.007191339929926, + "learning_rate": 1.743199226121447e-05, + "loss": 1.0488, "step": 5446 }, { - "epoch": 1.142168169427553, - "grad_norm": 7.797885488867483, - "learning_rate": 1.421415321205435e-05, - "loss": 0.9664, + "epoch": 0.7691330132730867, + "grad_norm": 3.024329206614843, + "learning_rate": 1.743097220884084e-05, + "loss": 0.8295, "step": 5447 }, { - "epoch": 1.1423778569930803, - "grad_norm": 6.8906410375503615, - "learning_rate": 1.4212100060137095e-05, - "loss": 0.8608, + "epoch": 0.7692742163230726, + "grad_norm": 3.6449986142668096, + "learning_rate": 1.7429951983774626e-05, + "loss": 0.8466, "step": 5448 }, { - "epoch": 1.1425875445586078, - "grad_norm": 7.2203249763958, - "learning_rate": 1.4210046692342407e-05, - "loss": 0.9089, + "epoch": 0.7694154193730585, + "grad_norm": 3.2101255872679757, + "learning_rate": 1.7428931586039538e-05, + "loss": 1.111, "step": 5449 }, { - "epoch": 1.142797232124135, - "grad_norm": 7.503569062454911, - "learning_rate": 1.4207993108775525e-05, - "loss": 0.9927, + "epoch": 0.7695566224230443, + "grad_norm": 3.313580892439186, + "learning_rate": 1.742791101565928e-05, + "loss": 0.9577, "step": 5450 }, { - "epoch": 1.1430069196896624, - "grad_norm": 6.232991054800343, - "learning_rate": 1.4205939309541696e-05, - "loss": 0.6247, + "epoch": 0.7696978254730302, + "grad_norm": 3.505627211405446, + "learning_rate": 1.7426890272657585e-05, + "loss": 1.0076, "step": 5451 }, { - "epoch": 1.1432166072551897, - "grad_norm": 6.677568354945869, - "learning_rate": 1.4203885294746182e-05, - "loss": 0.7053, + "epoch": 0.7698390285230161, + "grad_norm": 3.0838992554599916, + "learning_rate": 1.7425869357058167e-05, + "loss": 1.0761, "step": 5452 }, { - "epoch": 1.1434262948207172, - "grad_norm": 8.240647765606626, - "learning_rate": 1.420183106449426e-05, - "loss": 1.0007, + "epoch": 0.769980231573002, + "grad_norm": 3.595019344386333, + "learning_rate": 1.7424848268884752e-05, + "loss": 1.1028, "step": 5453 }, { - "epoch": 1.1436359823862445, - "grad_norm": 6.646773335539556, - "learning_rate": 1.4199776618891203e-05, - "loss": 0.9256, + "epoch": 0.7701214346229879, + "grad_norm": 3.6288427831493277, + "learning_rate": 1.742382700816107e-05, + "loss": 1.1409, "step": 5454 }, { - "epoch": 1.1438456699517718, - "grad_norm": 6.482277637266567, - "learning_rate": 1.4197721958042314e-05, - "loss": 0.8001, + "epoch": 0.7702626376729738, + "grad_norm": 3.3321765766156926, + "learning_rate": 1.7422805574910856e-05, + "loss": 0.93, "step": 5455 }, { - "epoch": 1.1440553575172991, - "grad_norm": 7.002879712712853, - "learning_rate": 1.4195667082052891e-05, - "loss": 0.9192, + "epoch": 0.7704038407229596, + "grad_norm": 3.464115670158806, + "learning_rate": 1.742178396915784e-05, + "loss": 1.0121, "step": 5456 }, { - "epoch": 1.1442650450828267, - "grad_norm": 7.143624612409936, - "learning_rate": 1.4193611991028256e-05, - "loss": 0.8597, + "epoch": 0.7705450437729455, + "grad_norm": 4.224355370678105, + "learning_rate": 1.7420762190925774e-05, + "loss": 1.1307, "step": 5457 }, { - "epoch": 1.144474732648354, - "grad_norm": 7.338617162956875, - "learning_rate": 1.419155668507373e-05, - "loss": 0.7998, + "epoch": 0.7706862468229314, + "grad_norm": 3.5041705622031567, + "learning_rate": 1.74197402402384e-05, + "loss": 1.0237, "step": 5458 }, { - "epoch": 1.1446844202138813, - "grad_norm": 7.6422898013689435, - "learning_rate": 1.4189501164294657e-05, - "loss": 1.0, + "epoch": 0.7708274498729173, + "grad_norm": 2.6948047971716114, + "learning_rate": 1.7418718117119465e-05, + "loss": 0.7643, "step": 5459 }, { - "epoch": 1.1448941077794086, - "grad_norm": 9.784501662221857, - "learning_rate": 1.4187445428796381e-05, - "loss": 1.2785, + "epoch": 0.7709686529229032, + "grad_norm": 2.9406067347511216, + "learning_rate": 1.7417695821592727e-05, + "loss": 0.86, "step": 5460 }, { - "epoch": 1.1451037953449361, - "grad_norm": 6.850346504508089, - "learning_rate": 1.4185389478684264e-05, - "loss": 0.7997, + "epoch": 0.7711098559728891, + "grad_norm": 3.351453901775548, + "learning_rate": 1.741667335368194e-05, + "loss": 1.0129, "step": 5461 }, { - "epoch": 1.1453134829104634, - "grad_norm": 7.633463296409012, - "learning_rate": 1.4183333314063678e-05, - "loss": 0.9655, + "epoch": 0.7712510590228749, + "grad_norm": 4.208230882773419, + "learning_rate": 1.7415650713410867e-05, + "loss": 1.1629, "step": 5462 }, { - "epoch": 1.1455231704759907, - "grad_norm": 6.754837321516644, - "learning_rate": 1.4181276935040003e-05, - "loss": 0.9435, + "epoch": 0.7713922620728608, + "grad_norm": 4.019132949998647, + "learning_rate": 1.7414627900803274e-05, + "loss": 1.1561, "step": 5463 }, { - "epoch": 1.145732858041518, - "grad_norm": 6.987679325512715, - "learning_rate": 1.4179220341718633e-05, - "loss": 0.8839, + "epoch": 0.7715334651228467, + "grad_norm": 4.247148358457009, + "learning_rate": 1.7413604915882932e-05, + "loss": 0.9532, "step": 5464 }, { - "epoch": 1.1459425456070456, - "grad_norm": 6.070002473673756, - "learning_rate": 1.4177163534204971e-05, - "loss": 0.6228, + "epoch": 0.7716746681728325, + "grad_norm": 3.569117274691089, + "learning_rate": 1.7412581758673612e-05, + "loss": 0.9867, "step": 5465 }, { - "epoch": 1.1461522331725729, - "grad_norm": 7.71743445030266, - "learning_rate": 1.4175106512604435e-05, - "loss": 0.9694, + "epoch": 0.7718158712228184, + "grad_norm": 3.1162766539873985, + "learning_rate": 1.7411558429199095e-05, + "loss": 0.9167, "step": 5466 }, { - "epoch": 1.1463619207381002, - "grad_norm": 6.2618518902646025, - "learning_rate": 1.4173049277022448e-05, - "loss": 0.8574, + "epoch": 0.7719570742728042, + "grad_norm": 5.46008178870321, + "learning_rate": 1.741053492748316e-05, + "loss": 1.2995, "step": 5467 }, { - "epoch": 1.1465716083036277, - "grad_norm": 7.380993351776506, - "learning_rate": 1.4170991827564451e-05, - "loss": 1.0179, + "epoch": 0.7720982773227901, + "grad_norm": 3.2082701757948655, + "learning_rate": 1.7409511253549592e-05, + "loss": 0.8864, "step": 5468 }, { - "epoch": 1.146781295869155, - "grad_norm": 7.817066112586511, - "learning_rate": 1.4168934164335883e-05, - "loss": 0.9811, + "epoch": 0.772239480372776, + "grad_norm": 2.8525684895234895, + "learning_rate": 1.7408487407422186e-05, + "loss": 0.7714, "step": 5469 }, { - "epoch": 1.1469909834346823, - "grad_norm": 7.299111753765806, - "learning_rate": 1.4166876287442212e-05, - "loss": 0.993, + "epoch": 0.7723806834227619, + "grad_norm": 3.6576185486850905, + "learning_rate": 1.7407463389124728e-05, + "loss": 1.0568, "step": 5470 }, { - "epoch": 1.1472006710002096, - "grad_norm": 8.375354497127166, - "learning_rate": 1.4164818196988902e-05, - "loss": 1.2464, + "epoch": 0.7725218864727478, + "grad_norm": 4.286791900303226, + "learning_rate": 1.7406439198681024e-05, + "loss": 1.1446, "step": 5471 }, { - "epoch": 1.1474103585657371, - "grad_norm": 6.314364313934077, - "learning_rate": 1.416275989308144e-05, - "loss": 0.8252, + "epoch": 0.7726630895227337, + "grad_norm": 3.1759813703188375, + "learning_rate": 1.7405414836114868e-05, + "loss": 0.8352, "step": 5472 }, { - "epoch": 1.1476200461312644, - "grad_norm": 8.534205740589762, - "learning_rate": 1.416070137582531e-05, - "loss": 1.0242, + "epoch": 0.7728042925727195, + "grad_norm": 2.6397627933379724, + "learning_rate": 1.740439030145007e-05, + "loss": 0.7177, "step": 5473 }, { - "epoch": 1.1478297336967918, - "grad_norm": 6.490236419386976, - "learning_rate": 1.4158642645326022e-05, - "loss": 0.7404, + "epoch": 0.7729454956227054, + "grad_norm": 4.462811800151406, + "learning_rate": 1.740336559471044e-05, + "loss": 1.0938, "step": 5474 }, { - "epoch": 1.148039421262319, - "grad_norm": 9.556320277445884, - "learning_rate": 1.415658370168908e-05, - "loss": 1.0618, + "epoch": 0.7730866986726913, + "grad_norm": 3.95150236802696, + "learning_rate": 1.7402340715919793e-05, + "loss": 1.0352, "step": 5475 }, { - "epoch": 1.1482491088278466, - "grad_norm": 9.629835816676714, - "learning_rate": 1.4154524545020016e-05, - "loss": 1.0136, + "epoch": 0.7732279017226772, + "grad_norm": 3.273378117230015, + "learning_rate": 1.7401315665101942e-05, + "loss": 1.0301, "step": 5476 }, { - "epoch": 1.148458796393374, - "grad_norm": 7.8685707630138415, - "learning_rate": 1.4152465175424364e-05, - "loss": 0.9567, + "epoch": 0.7733691047726631, + "grad_norm": 5.649363911289255, + "learning_rate": 1.740029044228071e-05, + "loss": 1.0424, "step": 5477 }, { - "epoch": 1.1486684839589012, - "grad_norm": 6.944827705966579, - "learning_rate": 1.415040559300767e-05, - "loss": 0.7847, + "epoch": 0.773510307822649, + "grad_norm": 3.7934081582467614, + "learning_rate": 1.7399265047479926e-05, + "loss": 1.1031, "step": 5478 }, { - "epoch": 1.1488781715244287, - "grad_norm": 6.777523453800765, - "learning_rate": 1.4148345797875491e-05, - "loss": 0.7664, + "epoch": 0.7736515108726348, + "grad_norm": 3.1056314494622077, + "learning_rate": 1.739823948072342e-05, + "loss": 0.8952, "step": 5479 }, { - "epoch": 1.149087859089956, - "grad_norm": 8.704783105333753, - "learning_rate": 1.4146285790133397e-05, - "loss": 1.074, + "epoch": 0.7737927139226207, + "grad_norm": 3.026185934311214, + "learning_rate": 1.739721374203502e-05, + "loss": 0.9753, "step": 5480 }, { - "epoch": 1.1492975466554833, - "grad_norm": 7.09972048807626, - "learning_rate": 1.4144225569886958e-05, - "loss": 1.1704, + "epoch": 0.7739339169726066, + "grad_norm": 3.8915820198743676, + "learning_rate": 1.7396187831438568e-05, + "loss": 1.2617, "step": 5481 }, { - "epoch": 1.1495072342210106, - "grad_norm": 7.594352756869436, - "learning_rate": 1.4142165137241776e-05, - "loss": 0.9961, + "epoch": 0.7740751200225925, + "grad_norm": 3.4740854816840283, + "learning_rate": 1.7395161748957905e-05, + "loss": 0.9645, "step": 5482 }, { - "epoch": 1.149716921786538, - "grad_norm": 8.34161196552639, - "learning_rate": 1.4140104492303448e-05, - "loss": 0.9647, + "epoch": 0.7742163230725784, + "grad_norm": 3.629940180428351, + "learning_rate": 1.7394135494616876e-05, + "loss": 1.1941, "step": 5483 }, { - "epoch": 1.1499266093520655, - "grad_norm": 7.735172609181506, - "learning_rate": 1.4138043635177581e-05, - "loss": 0.791, + "epoch": 0.7743575261225643, + "grad_norm": 3.574778499354198, + "learning_rate": 1.7393109068439336e-05, + "loss": 1.1333, "step": 5484 }, { - "epoch": 1.1501362969175928, - "grad_norm": 6.786035346943688, - "learning_rate": 1.4135982565969801e-05, - "loss": 0.8098, + "epoch": 0.7744987291725501, + "grad_norm": 3.9754269387608394, + "learning_rate": 1.739208247044913e-05, + "loss": 1.0417, "step": 5485 }, { - "epoch": 1.15034598448312, - "grad_norm": 7.4437442520881705, - "learning_rate": 1.413392128478574e-05, - "loss": 0.7806, + "epoch": 0.774639932222536, + "grad_norm": 3.701183823612949, + "learning_rate": 1.739105570067012e-05, + "loss": 0.9895, "step": 5486 }, { - "epoch": 1.1505556720486476, - "grad_norm": 6.819171280766591, - "learning_rate": 1.4131859791731044e-05, - "loss": 0.8952, + "epoch": 0.7747811352725219, + "grad_norm": 3.3709946006546323, + "learning_rate": 1.7390028759126165e-05, + "loss": 0.8507, "step": 5487 }, { - "epoch": 1.150765359614175, - "grad_norm": 6.582856210208786, - "learning_rate": 1.4129798086911368e-05, - "loss": 0.7943, + "epoch": 0.7749223383225078, + "grad_norm": 3.8490580919742188, + "learning_rate": 1.7389001645841137e-05, + "loss": 1.2366, "step": 5488 }, { - "epoch": 1.1509750471797022, - "grad_norm": 7.271065259561847, - "learning_rate": 1.4127736170432377e-05, - "loss": 0.7505, + "epoch": 0.7750635413724937, + "grad_norm": 3.251808414210018, + "learning_rate": 1.73879743608389e-05, + "loss": 0.7669, "step": 5489 }, { - "epoch": 1.1511847347452295, - "grad_norm": 6.887401926933448, - "learning_rate": 1.412567404239975e-05, - "loss": 0.9178, + "epoch": 0.7752047444224796, + "grad_norm": 3.927139522898013, + "learning_rate": 1.738694690414333e-05, + "loss": 1.0259, "step": 5490 }, { - "epoch": 1.151394422310757, - "grad_norm": 6.193309009302699, - "learning_rate": 1.4123611702919171e-05, - "loss": 0.8851, + "epoch": 0.7753459474724654, + "grad_norm": 3.748144603127959, + "learning_rate": 1.7385919275778306e-05, + "loss": 1.0627, "step": 5491 }, { - "epoch": 1.1516041098762844, - "grad_norm": 6.81822685477887, - "learning_rate": 1.412154915209634e-05, - "loss": 0.8567, + "epoch": 0.7754871505224513, + "grad_norm": 3.3794624734631613, + "learning_rate": 1.7384891475767706e-05, + "loss": 0.9415, "step": 5492 }, { - "epoch": 1.1518137974418117, - "grad_norm": 8.329740290258938, - "learning_rate": 1.4119486390036965e-05, - "loss": 0.9356, + "epoch": 0.7756283535724372, + "grad_norm": 3.7116643244219176, + "learning_rate": 1.7383863504135416e-05, + "loss": 0.9309, "step": 5493 }, { - "epoch": 1.152023485007339, - "grad_norm": 7.950447617615511, - "learning_rate": 1.411742341684677e-05, - "loss": 0.9432, + "epoch": 0.7757695566224231, + "grad_norm": 3.3049839921968474, + "learning_rate": 1.738283536090533e-05, + "loss": 0.8912, "step": 5494 }, { - "epoch": 1.1522331725728665, - "grad_norm": 7.8955176151099185, - "learning_rate": 1.4115360232631483e-05, - "loss": 0.936, + "epoch": 0.775910759672409, + "grad_norm": 3.681256149724283, + "learning_rate": 1.7381807046101336e-05, + "loss": 0.9897, "step": 5495 }, { - "epoch": 1.1524428601383938, - "grad_norm": 7.181955443057608, - "learning_rate": 1.4113296837496845e-05, - "loss": 0.9743, + "epoch": 0.7760519627223949, + "grad_norm": 3.672725583650286, + "learning_rate": 1.7380778559747335e-05, + "loss": 0.9971, "step": 5496 }, { - "epoch": 1.1526525477039211, - "grad_norm": 7.175857646589454, - "learning_rate": 1.4111233231548613e-05, - "loss": 0.7054, + "epoch": 0.7761931657723807, + "grad_norm": 3.3309420464260393, + "learning_rate": 1.7379749901867227e-05, + "loss": 0.9425, "step": 5497 }, { - "epoch": 1.1528622352694486, - "grad_norm": 6.605911814145561, - "learning_rate": 1.4109169414892542e-05, - "loss": 0.7528, + "epoch": 0.7763343688223666, + "grad_norm": 3.311376863797063, + "learning_rate": 1.7378721072484923e-05, + "loss": 0.9884, "step": 5498 }, { - "epoch": 1.153071922834976, - "grad_norm": 6.698349629541683, - "learning_rate": 1.410710538763442e-05, - "loss": 0.8278, + "epoch": 0.7764755718723524, + "grad_norm": 3.717433860806013, + "learning_rate": 1.7377692071624323e-05, + "loss": 1.0926, "step": 5499 }, { - "epoch": 1.1532816104005033, - "grad_norm": 7.1362884474757, - "learning_rate": 1.4105041149880017e-05, - "loss": 0.9904, + "epoch": 0.7766167749223383, + "grad_norm": 3.4785140139755706, + "learning_rate": 1.7376662899309346e-05, + "loss": 1.0404, "step": 5500 }, { - "epoch": 1.1534912979660306, - "grad_norm": 7.4270664036861245, - "learning_rate": 1.4102976701735136e-05, - "loss": 0.8968, + "epoch": 0.7767579779723242, + "grad_norm": 4.332064995389705, + "learning_rate": 1.737563355556391e-05, + "loss": 1.1124, "step": 5501 }, { - "epoch": 1.1537009855315579, - "grad_norm": 8.133383293898993, - "learning_rate": 1.4100912043305583e-05, - "loss": 1.0909, + "epoch": 0.77689918102231, + "grad_norm": 4.2221848129915704, + "learning_rate": 1.7374604040411934e-05, + "loss": 1.3102, "step": 5502 }, { - "epoch": 1.1539106730970854, - "grad_norm": 8.679252677141207, - "learning_rate": 1.4098847174697177e-05, - "loss": 1.153, + "epoch": 0.7770403840722959, + "grad_norm": 3.8195297291987615, + "learning_rate": 1.7373574353877346e-05, + "loss": 1.0377, "step": 5503 }, { - "epoch": 1.1541203606626127, - "grad_norm": 6.3730700554106114, - "learning_rate": 1.409678209601574e-05, - "loss": 0.7648, + "epoch": 0.7771815871222818, + "grad_norm": 3.6650804155583843, + "learning_rate": 1.7372544495984076e-05, + "loss": 1.2207, "step": 5504 }, { - "epoch": 1.15433004822814, - "grad_norm": 6.911199352827545, - "learning_rate": 1.4094716807367119e-05, - "loss": 0.7541, + "epoch": 0.7773227901722677, + "grad_norm": 3.552013961931948, + "learning_rate": 1.7371514466756055e-05, + "loss": 1.029, "step": 5505 }, { - "epoch": 1.1545397357936675, - "grad_norm": 7.3096013355807195, - "learning_rate": 1.409265130885716e-05, - "loss": 0.9996, + "epoch": 0.7774639932222536, + "grad_norm": 4.007375262694977, + "learning_rate": 1.7370484266217223e-05, + "loss": 1.0972, "step": 5506 }, { - "epoch": 1.1547494233591948, - "grad_norm": 9.500041494545538, - "learning_rate": 1.4090585600591719e-05, - "loss": 1.2379, + "epoch": 0.7776051962722395, + "grad_norm": 3.9234146059004296, + "learning_rate": 1.7369453894391513e-05, + "loss": 1.0395, "step": 5507 }, { - "epoch": 1.1549591109247221, - "grad_norm": 8.337608550441358, - "learning_rate": 1.4088519682676676e-05, - "loss": 1.0607, + "epoch": 0.7777463993222253, + "grad_norm": 3.3864143224196317, + "learning_rate": 1.7368423351302884e-05, + "loss": 0.9433, "step": 5508 }, { - "epoch": 1.1551687984902494, - "grad_norm": 7.315950088466112, - "learning_rate": 1.4086453555217908e-05, - "loss": 0.9575, + "epoch": 0.7778876023722112, + "grad_norm": 3.4138152270609163, + "learning_rate": 1.7367392636975275e-05, + "loss": 0.9627, "step": 5509 }, { - "epoch": 1.155378486055777, - "grad_norm": 5.79010133280334, - "learning_rate": 1.4084387218321302e-05, - "loss": 0.5228, + "epoch": 0.7780288054221971, + "grad_norm": 5.298940634031672, + "learning_rate": 1.7366361751432645e-05, + "loss": 1.2785, "step": 5510 }, { - "epoch": 1.1555881736213043, - "grad_norm": 6.080233663676579, - "learning_rate": 1.4082320672092773e-05, - "loss": 0.8593, + "epoch": 0.778170008472183, + "grad_norm": 3.450417246640402, + "learning_rate": 1.7365330694698947e-05, + "loss": 0.907, "step": 5511 }, { - "epoch": 1.1557978611868316, - "grad_norm": 8.448473847480178, - "learning_rate": 1.4080253916638226e-05, - "loss": 0.8149, + "epoch": 0.7783112115221689, + "grad_norm": 3.612703100238923, + "learning_rate": 1.7364299466798146e-05, + "loss": 1.1738, "step": 5512 }, { - "epoch": 1.156007548752359, - "grad_norm": 7.138541019769388, - "learning_rate": 1.4078186952063588e-05, - "loss": 0.7999, + "epoch": 0.7784524145721547, + "grad_norm": 3.396398651059966, + "learning_rate": 1.7363268067754205e-05, + "loss": 0.9299, "step": 5513 }, { - "epoch": 1.1562172363178864, - "grad_norm": 8.235108656836639, - "learning_rate": 1.4076119778474798e-05, - "loss": 0.973, + "epoch": 0.7785936176221406, + "grad_norm": 4.312523998912026, + "learning_rate": 1.7362236497591097e-05, + "loss": 0.9635, "step": 5514 }, { - "epoch": 1.1564269238834137, - "grad_norm": 6.83693505093105, - "learning_rate": 1.4074052395977798e-05, - "loss": 0.777, + "epoch": 0.7787348206721265, + "grad_norm": 4.188675861491466, + "learning_rate": 1.7361204756332788e-05, + "loss": 1.0595, "step": 5515 }, { - "epoch": 1.156636611448941, - "grad_norm": 7.713760104790157, - "learning_rate": 1.4071984804678547e-05, - "loss": 1.0136, + "epoch": 0.7788760237221124, + "grad_norm": 3.6056031724780397, + "learning_rate": 1.7360172844003263e-05, + "loss": 0.972, "step": 5516 }, { - "epoch": 1.1568462990144686, - "grad_norm": 7.964724850386856, - "learning_rate": 1.4069917004683014e-05, - "loss": 1.0548, + "epoch": 0.7790172267720983, + "grad_norm": 3.6793624464795034, + "learning_rate": 1.7359140760626497e-05, + "loss": 1.188, "step": 5517 }, { - "epoch": 1.1570559865799959, - "grad_norm": 8.074774986415274, - "learning_rate": 1.4067848996097176e-05, - "loss": 1.0524, + "epoch": 0.7791584298220842, + "grad_norm": 4.366159804164966, + "learning_rate": 1.7358108506226477e-05, + "loss": 0.9186, "step": 5518 }, { - "epoch": 1.1572656741455232, - "grad_norm": 5.786259214412862, - "learning_rate": 1.4065780779027017e-05, - "loss": 0.674, + "epoch": 0.77929963287207, + "grad_norm": 3.0893745697555515, + "learning_rate": 1.7357076080827195e-05, + "loss": 0.8816, "step": 5519 }, { - "epoch": 1.1574753617110505, - "grad_norm": 6.6902506177516505, - "learning_rate": 1.4063712353578545e-05, - "loss": 0.7824, + "epoch": 0.7794408359220559, + "grad_norm": 3.030196049102594, + "learning_rate": 1.7356043484452643e-05, + "loss": 0.7042, "step": 5520 }, { - "epoch": 1.1576850492765778, - "grad_norm": 5.974558650783818, - "learning_rate": 1.4061643719857764e-05, - "loss": 0.785, + "epoch": 0.7795820389720418, + "grad_norm": 3.7040992261152614, + "learning_rate": 1.7355010717126817e-05, + "loss": 1.0734, "step": 5521 }, { - "epoch": 1.1578947368421053, - "grad_norm": 8.159967530096615, - "learning_rate": 1.4059574877970701e-05, - "loss": 1.0235, + "epoch": 0.7797232420220277, + "grad_norm": 4.264404868495973, + "learning_rate": 1.7353977778873718e-05, + "loss": 1.1105, "step": 5522 }, { - "epoch": 1.1581044244076326, - "grad_norm": 8.013114604405413, - "learning_rate": 1.4057505828023384e-05, - "loss": 1.118, + "epoch": 0.7798644450720136, + "grad_norm": 4.357580447683498, + "learning_rate": 1.7352944669717352e-05, + "loss": 1.2296, "step": 5523 }, { - "epoch": 1.15831411197316, - "grad_norm": 7.218966011225552, - "learning_rate": 1.4055436570121854e-05, - "loss": 0.8187, + "epoch": 0.7800056481219995, + "grad_norm": 3.063175072050123, + "learning_rate": 1.7351911389681725e-05, + "loss": 0.8829, "step": 5524 }, { - "epoch": 1.1585237995386874, - "grad_norm": 6.347011089257012, - "learning_rate": 1.4053367104372169e-05, - "loss": 0.675, + "epoch": 0.7801468511719853, + "grad_norm": 3.9241556927208645, + "learning_rate": 1.7350877938790855e-05, + "loss": 1.1596, "step": 5525 }, { - "epoch": 1.1587334871042148, - "grad_norm": 7.232725412116838, - "learning_rate": 1.4051297430880389e-05, - "loss": 0.97, + "epoch": 0.7802880542219712, + "grad_norm": 3.5182196371223218, + "learning_rate": 1.7349844317068754e-05, + "loss": 0.9067, "step": 5526 }, { - "epoch": 1.158943174669742, - "grad_norm": 7.367964054879141, - "learning_rate": 1.4049227549752584e-05, - "loss": 0.7577, + "epoch": 0.7804292572719571, + "grad_norm": 3.586726406258281, + "learning_rate": 1.7348810524539447e-05, + "loss": 0.9022, "step": 5527 }, { - "epoch": 1.1591528622352694, - "grad_norm": 6.3463847016507975, - "learning_rate": 1.4047157461094849e-05, - "loss": 0.83, + "epoch": 0.780570460321943, + "grad_norm": 3.8174445189897477, + "learning_rate": 1.7347776561226956e-05, + "loss": 1.0311, "step": 5528 }, { - "epoch": 1.159362549800797, - "grad_norm": 8.202100298322755, - "learning_rate": 1.4045087165013275e-05, - "loss": 0.8041, + "epoch": 0.7807116633719289, + "grad_norm": 3.4182166002878267, + "learning_rate": 1.734674242715531e-05, + "loss": 0.9857, "step": 5529 }, { - "epoch": 1.1595722373663242, - "grad_norm": 6.54470561602857, - "learning_rate": 1.4043016661613967e-05, - "loss": 0.6681, + "epoch": 0.7808528664219148, + "grad_norm": 2.9570319719723925, + "learning_rate": 1.7345708122348543e-05, + "loss": 0.803, "step": 5530 }, { - "epoch": 1.1597819249318515, - "grad_norm": 7.096689065706821, - "learning_rate": 1.404094595100304e-05, - "loss": 1.0322, + "epoch": 0.7809940694719006, + "grad_norm": 4.15145507734481, + "learning_rate": 1.7344673646830696e-05, + "loss": 1.3433, "step": 5531 }, { - "epoch": 1.1599916124973788, - "grad_norm": 7.382077728774741, - "learning_rate": 1.403887503328663e-05, - "loss": 0.7374, + "epoch": 0.7811352725218865, + "grad_norm": 3.7864881117714413, + "learning_rate": 1.73436390006258e-05, + "loss": 0.9908, "step": 5532 }, { - "epoch": 1.1602013000629063, - "grad_norm": 6.215511067107648, - "learning_rate": 1.4036803908570864e-05, - "loss": 0.8929, + "epoch": 0.7812764755718723, + "grad_norm": 3.5576658542729933, + "learning_rate": 1.734260418375791e-05, + "loss": 1.0185, "step": 5533 }, { - "epoch": 1.1604109876284336, - "grad_norm": 6.627689878403522, - "learning_rate": 1.40347325769619e-05, - "loss": 0.8195, + "epoch": 0.7814176786218582, + "grad_norm": 3.7861328871430175, + "learning_rate": 1.7341569196251065e-05, + "loss": 1.0762, "step": 5534 }, { - "epoch": 1.160620675193961, - "grad_norm": 8.518418074280659, - "learning_rate": 1.4032661038565892e-05, - "loss": 1.1725, + "epoch": 0.7815588816718441, + "grad_norm": 3.955852146748055, + "learning_rate": 1.7340534038129324e-05, + "loss": 1.1641, "step": 5535 }, { - "epoch": 1.1608303627594885, - "grad_norm": 7.432250519187269, - "learning_rate": 1.4030589293489012e-05, - "loss": 1.0548, + "epoch": 0.78170008472183, + "grad_norm": 3.514410541832739, + "learning_rate": 1.7339498709416744e-05, + "loss": 1.08, "step": 5536 }, { - "epoch": 1.1610400503250158, - "grad_norm": 7.426710246257958, - "learning_rate": 1.4028517341837438e-05, - "loss": 0.8218, + "epoch": 0.7818412877718158, + "grad_norm": 3.7785978993521465, + "learning_rate": 1.733846321013738e-05, + "loss": 0.9411, "step": 5537 }, { - "epoch": 1.161249737890543, - "grad_norm": 7.370784988061927, - "learning_rate": 1.4026445183717367e-05, - "loss": 0.9162, + "epoch": 0.7819824908218017, + "grad_norm": 3.680362821417703, + "learning_rate": 1.7337427540315305e-05, + "loss": 1.1397, "step": 5538 }, { - "epoch": 1.1614594254560704, - "grad_norm": 7.189066173863027, - "learning_rate": 1.4024372819234992e-05, - "loss": 0.7991, + "epoch": 0.7821236938717876, + "grad_norm": 3.172021116652061, + "learning_rate": 1.733639169997458e-05, + "loss": 0.9694, "step": 5539 }, { - "epoch": 1.1616691130215977, - "grad_norm": 6.5924398303649445, - "learning_rate": 1.4022300248496534e-05, - "loss": 0.8224, + "epoch": 0.7822648969217735, + "grad_norm": 3.2258629903312883, + "learning_rate": 1.733535568913928e-05, + "loss": 0.9221, "step": 5540 }, { - "epoch": 1.1618788005871252, - "grad_norm": 6.314382383975065, - "learning_rate": 1.402022747160821e-05, - "loss": 0.7187, + "epoch": 0.7824060999717594, + "grad_norm": 4.3784446453653185, + "learning_rate": 1.733431950783348e-05, + "loss": 1.0239, "step": 5541 }, { - "epoch": 1.1620884881526525, - "grad_norm": 8.031428610173755, - "learning_rate": 1.4018154488676255e-05, - "loss": 0.9767, + "epoch": 0.7825473030217452, + "grad_norm": 3.743959887502583, + "learning_rate": 1.7333283156081266e-05, + "loss": 0.9902, "step": 5542 }, { - "epoch": 1.1622981757181798, - "grad_norm": 7.652719003610233, - "learning_rate": 1.4016081299806914e-05, - "loss": 0.9668, + "epoch": 0.7826885060717311, + "grad_norm": 3.225849988433045, + "learning_rate": 1.7332246633906717e-05, + "loss": 0.8535, "step": 5543 }, { - "epoch": 1.1625078632837074, - "grad_norm": 8.804170511260384, - "learning_rate": 1.4014007905106442e-05, - "loss": 1.0849, + "epoch": 0.782829709121717, + "grad_norm": 3.423264695366045, + "learning_rate": 1.733120994133392e-05, + "loss": 0.8664, "step": 5544 }, { - "epoch": 1.1627175508492347, - "grad_norm": 6.260027568115352, - "learning_rate": 1.40119343046811e-05, - "loss": 0.708, + "epoch": 0.7829709121717029, + "grad_norm": 3.0580152853401135, + "learning_rate": 1.7330173078386975e-05, + "loss": 0.9779, "step": 5545 }, { - "epoch": 1.162927238414762, - "grad_norm": 6.406260400129809, - "learning_rate": 1.400986049863717e-05, - "loss": 0.5666, + "epoch": 0.7831121152216888, + "grad_norm": 4.930223593609097, + "learning_rate": 1.732913604508997e-05, + "loss": 1.2184, "step": 5546 }, { - "epoch": 1.1631369259802893, - "grad_norm": 7.281576327350118, - "learning_rate": 1.4007786487080935e-05, - "loss": 0.7486, + "epoch": 0.7832533182716747, + "grad_norm": 4.649803431847359, + "learning_rate": 1.7328098841467008e-05, + "loss": 1.0283, "step": 5547 }, { - "epoch": 1.1633466135458168, - "grad_norm": 7.2884372167514035, - "learning_rate": 1.4005712270118685e-05, - "loss": 0.8374, + "epoch": 0.7833945213216605, + "grad_norm": 3.193176297270458, + "learning_rate": 1.73270614675422e-05, + "loss": 0.8722, "step": 5548 }, { - "epoch": 1.1635563011113441, - "grad_norm": 8.07932108328747, - "learning_rate": 1.4003637847856741e-05, - "loss": 1.0004, + "epoch": 0.7835357243716464, + "grad_norm": 2.9981894439731076, + "learning_rate": 1.732602392333964e-05, + "loss": 0.8468, "step": 5549 }, { - "epoch": 1.1637659886768714, - "grad_norm": 7.502899606791628, - "learning_rate": 1.4001563220401409e-05, - "loss": 0.8037, + "epoch": 0.7836769274216323, + "grad_norm": 3.7753569711488444, + "learning_rate": 1.732498620888345e-05, + "loss": 1.1134, "step": 5550 }, { - "epoch": 1.1639756762423987, - "grad_norm": 7.1295667896213875, - "learning_rate": 1.3999488387859023e-05, - "loss": 0.7517, + "epoch": 0.7838181304716182, + "grad_norm": 4.385082287075697, + "learning_rate": 1.7323948324197747e-05, + "loss": 1.1967, "step": 5551 }, { - "epoch": 1.1641853638079263, - "grad_norm": 7.92445807553929, - "learning_rate": 1.3997413350335922e-05, - "loss": 0.8036, + "epoch": 0.7839593335216041, + "grad_norm": 3.7639666428052667, + "learning_rate": 1.7322910269306645e-05, + "loss": 1.1527, "step": 5552 }, { - "epoch": 1.1643950513734536, - "grad_norm": 6.495541696637586, - "learning_rate": 1.399533810793845e-05, - "loss": 0.7468, + "epoch": 0.78410053657159, + "grad_norm": 3.175049290044388, + "learning_rate": 1.732187204423427e-05, + "loss": 0.8871, "step": 5553 }, { - "epoch": 1.1646047389389809, - "grad_norm": 7.10402048255727, - "learning_rate": 1.3993262660772973e-05, - "loss": 1.0148, + "epoch": 0.7842417396215758, + "grad_norm": 3.383394788299492, + "learning_rate": 1.7320833649004754e-05, + "loss": 1.0735, "step": 5554 }, { - "epoch": 1.1648144265045084, - "grad_norm": 8.104166012369038, - "learning_rate": 1.3991187008945859e-05, - "loss": 0.9111, + "epoch": 0.7843829426715617, + "grad_norm": 3.0986046529606424, + "learning_rate": 1.7319795083642223e-05, + "loss": 0.8646, "step": 5555 }, { - "epoch": 1.1650241140700357, - "grad_norm": 8.564299309979349, - "learning_rate": 1.3989111152563486e-05, - "loss": 0.7724, + "epoch": 0.7845241457215476, + "grad_norm": 4.0506815446207565, + "learning_rate": 1.7318756348170817e-05, + "loss": 1.1784, "step": 5556 }, { - "epoch": 1.165233801635563, - "grad_norm": 8.384485187190366, - "learning_rate": 1.3987035091732251e-05, - "loss": 0.843, + "epoch": 0.7846653487715335, + "grad_norm": 3.5914037911705186, + "learning_rate": 1.7317717442614673e-05, + "loss": 1.1047, "step": 5557 }, { - "epoch": 1.1654434892010903, - "grad_norm": 7.8049558778581165, - "learning_rate": 1.398495882655855e-05, - "loss": 0.9738, + "epoch": 0.7848065518215194, + "grad_norm": 3.424726557642637, + "learning_rate": 1.7316678366997935e-05, + "loss": 0.9891, "step": 5558 }, { - "epoch": 1.1656531767666176, - "grad_norm": 7.208790850154003, - "learning_rate": 1.3982882357148796e-05, - "loss": 0.902, + "epoch": 0.7849477548715053, + "grad_norm": 3.0875944220221303, + "learning_rate": 1.7315639121344755e-05, + "loss": 0.857, "step": 5559 }, { - "epoch": 1.1658628643321451, - "grad_norm": 6.558483501709594, - "learning_rate": 1.3980805683609418e-05, - "loss": 0.7199, + "epoch": 0.7850889579214911, + "grad_norm": 3.238190377724806, + "learning_rate": 1.731459970567928e-05, + "loss": 0.9024, "step": 5560 }, { - "epoch": 1.1660725518976724, - "grad_norm": 6.858139462030764, - "learning_rate": 1.3978728806046845e-05, - "loss": 0.8666, + "epoch": 0.785230160971477, + "grad_norm": 3.1130475442057364, + "learning_rate": 1.7313560120025667e-05, + "loss": 0.9403, "step": 5561 }, { - "epoch": 1.1662822394631998, - "grad_norm": 8.262889832043188, - "learning_rate": 1.3976651724567511e-05, - "loss": 1.0382, + "epoch": 0.7853713640214629, + "grad_norm": 3.2170217222508013, + "learning_rate": 1.731252036440807e-05, + "loss": 0.9293, "step": 5562 }, { - "epoch": 1.1664919270287273, - "grad_norm": 8.028379588806763, - "learning_rate": 1.3974574439277889e-05, - "loss": 0.9271, + "epoch": 0.7855125670714488, + "grad_norm": 3.3374823425453752, + "learning_rate": 1.7311480438850664e-05, + "loss": 0.9483, "step": 5563 }, { - "epoch": 1.1667016145942546, - "grad_norm": 7.218026748875909, - "learning_rate": 1.3972496950284427e-05, - "loss": 0.8656, + "epoch": 0.7856537701214347, + "grad_norm": 3.266927980915045, + "learning_rate": 1.7310440343377608e-05, + "loss": 0.9346, "step": 5564 }, { - "epoch": 1.166911302159782, - "grad_norm": 8.720782145476438, - "learning_rate": 1.3970419257693609e-05, - "loss": 0.998, + "epoch": 0.7857949731714206, + "grad_norm": 3.7127208274997727, + "learning_rate": 1.7309400078013077e-05, + "loss": 1.3239, "step": 5565 }, { - "epoch": 1.1671209897253092, - "grad_norm": 6.810971121596743, - "learning_rate": 1.3968341361611916e-05, - "loss": 0.8755, + "epoch": 0.7859361762214064, + "grad_norm": 3.6147743841381796, + "learning_rate": 1.730835964278124e-05, + "loss": 1.0566, "step": 5566 }, { - "epoch": 1.1673306772908367, - "grad_norm": 8.250873105852879, - "learning_rate": 1.3966263262145848e-05, - "loss": 0.9699, + "epoch": 0.7860773792713922, + "grad_norm": 3.316404727212875, + "learning_rate": 1.7307319037706286e-05, + "loss": 0.9942, "step": 5567 }, { - "epoch": 1.167540364856364, - "grad_norm": 7.035244909248895, - "learning_rate": 1.3964184959401906e-05, - "loss": 0.7971, + "epoch": 0.7862185823213781, + "grad_norm": 2.9725723729494695, + "learning_rate": 1.7306278262812393e-05, + "loss": 0.8373, "step": 5568 }, { - "epoch": 1.1677500524218913, - "grad_norm": 7.776495239408806, - "learning_rate": 1.3962106453486613e-05, - "loss": 1.1185, + "epoch": 0.786359785371364, + "grad_norm": 3.2510958108970844, + "learning_rate": 1.7305237318123748e-05, + "loss": 1.1214, "step": 5569 }, { - "epoch": 1.1679597399874186, - "grad_norm": 6.995030796561895, - "learning_rate": 1.3960027744506488e-05, - "loss": 0.7593, + "epoch": 0.7865009884213499, + "grad_norm": 3.213887722487075, + "learning_rate": 1.7304196203664544e-05, + "loss": 1.0394, "step": 5570 }, { - "epoch": 1.1681694275529462, - "grad_norm": 9.033657207726348, - "learning_rate": 1.3957948832568076e-05, - "loss": 0.833, + "epoch": 0.7866421914713357, + "grad_norm": 2.7703904675542472, + "learning_rate": 1.7303154919458972e-05, + "loss": 0.8856, "step": 5571 }, { - "epoch": 1.1683791151184735, - "grad_norm": 6.641138610331567, - "learning_rate": 1.3955869717777918e-05, - "loss": 0.8655, + "epoch": 0.7867833945213216, + "grad_norm": 4.089659752634055, + "learning_rate": 1.7302113465531233e-05, + "loss": 1.0658, "step": 5572 }, { - "epoch": 1.1685888026840008, - "grad_norm": 7.708460540892471, - "learning_rate": 1.395379040024258e-05, - "loss": 0.8105, + "epoch": 0.7869245975713075, + "grad_norm": 3.5849388316931887, + "learning_rate": 1.7301071841905535e-05, + "loss": 0.9173, "step": 5573 }, { - "epoch": 1.1687984902495283, - "grad_norm": 7.373430849396879, - "learning_rate": 1.3951710880068623e-05, - "loss": 0.9267, + "epoch": 0.7870658006212934, + "grad_norm": 4.092817944480415, + "learning_rate": 1.7300030048606077e-05, + "loss": 0.8767, "step": 5574 }, { - "epoch": 1.1690081778150556, - "grad_norm": 7.124819158258448, - "learning_rate": 1.394963115736263e-05, - "loss": 0.8531, + "epoch": 0.7872070036712793, + "grad_norm": 4.039515390228647, + "learning_rate": 1.7298988085657073e-05, + "loss": 1.1206, "step": 5575 }, { - "epoch": 1.169217865380583, - "grad_norm": 7.59226208133222, - "learning_rate": 1.3947551232231194e-05, - "loss": 0.9472, + "epoch": 0.7873482067212652, + "grad_norm": 3.2167899777118953, + "learning_rate": 1.729794595308274e-05, + "loss": 0.9047, "step": 5576 }, { - "epoch": 1.1694275529461102, - "grad_norm": 5.888968694301112, - "learning_rate": 1.3945471104780906e-05, - "loss": 0.745, + "epoch": 0.787489409771251, + "grad_norm": 3.375398191699207, + "learning_rate": 1.729690365090729e-05, + "loss": 1.0103, "step": 5577 }, { - "epoch": 1.1696372405116378, - "grad_norm": 7.268625721218548, - "learning_rate": 1.3943390775118383e-05, - "loss": 0.8901, + "epoch": 0.7876306128212369, + "grad_norm": 3.979955602085848, + "learning_rate": 1.7295861179154954e-05, + "loss": 1.0797, "step": 5578 }, { - "epoch": 1.169846928077165, - "grad_norm": 7.380352274636955, - "learning_rate": 1.3941310243350244e-05, - "loss": 0.6978, + "epoch": 0.7877718158712228, + "grad_norm": 3.9673889872253842, + "learning_rate": 1.729481853784996e-05, + "loss": 1.1959, "step": 5579 }, { - "epoch": 1.1700566156426924, - "grad_norm": 7.473076522652288, - "learning_rate": 1.3939229509583117e-05, - "loss": 0.8239, + "epoch": 0.7879130189212087, + "grad_norm": 3.194145157451402, + "learning_rate": 1.729377572701653e-05, + "loss": 0.8583, "step": 5580 }, { - "epoch": 1.1702663032082197, - "grad_norm": 6.80577748516548, - "learning_rate": 1.3937148573923647e-05, - "loss": 0.7917, + "epoch": 0.7880542219711946, + "grad_norm": 4.618724150636717, + "learning_rate": 1.7292732746678898e-05, + "loss": 1.3627, "step": 5581 }, { - "epoch": 1.1704759907737472, - "grad_norm": 6.2537552565444, - "learning_rate": 1.3935067436478483e-05, - "loss": 0.7429, + "epoch": 0.7881954250211805, + "grad_norm": 3.5027224212787846, + "learning_rate": 1.729168959686131e-05, + "loss": 1.0713, "step": 5582 }, { - "epoch": 1.1706856783392745, - "grad_norm": 9.183670258619124, - "learning_rate": 1.3932986097354288e-05, - "loss": 1.0381, + "epoch": 0.7883366280711663, + "grad_norm": 3.617407610343722, + "learning_rate": 1.7290646277588004e-05, + "loss": 1.0417, "step": 5583 }, { - "epoch": 1.1708953659048018, - "grad_norm": 5.916981098487638, - "learning_rate": 1.3930904556657735e-05, - "loss": 0.5183, + "epoch": 0.7884778311211522, + "grad_norm": 3.414663293788455, + "learning_rate": 1.7289602788883227e-05, + "loss": 0.8317, "step": 5584 }, { - "epoch": 1.1711050534703291, - "grad_norm": 6.4062889992003695, - "learning_rate": 1.3928822814495504e-05, - "loss": 0.8093, + "epoch": 0.7886190341711381, + "grad_norm": 3.354513127406612, + "learning_rate": 1.728855913077123e-05, + "loss": 0.9047, "step": 5585 }, { - "epoch": 1.1713147410358566, - "grad_norm": 6.652514843020139, - "learning_rate": 1.392674087097429e-05, - "loss": 0.9062, + "epoch": 0.788760237221124, + "grad_norm": 3.614118585685528, + "learning_rate": 1.728751530327627e-05, + "loss": 1.3129, "step": 5586 }, { - "epoch": 1.171524428601384, - "grad_norm": 6.97181508790011, - "learning_rate": 1.3924658726200798e-05, - "loss": 0.851, + "epoch": 0.7889014402711099, + "grad_norm": 3.6826238586648383, + "learning_rate": 1.7286471306422594e-05, + "loss": 0.8989, "step": 5587 }, { - "epoch": 1.1717341161669113, - "grad_norm": 8.499517112840234, - "learning_rate": 1.3922576380281739e-05, - "loss": 1.073, + "epoch": 0.7890426433210957, + "grad_norm": 3.328047564233775, + "learning_rate": 1.7285427140234476e-05, + "loss": 0.8334, "step": 5588 }, { - "epoch": 1.1719438037324386, - "grad_norm": 6.077591549342404, - "learning_rate": 1.3920493833323833e-05, - "loss": 0.8214, + "epoch": 0.7891838463710816, + "grad_norm": 3.8771571349971143, + "learning_rate": 1.7284382804736178e-05, + "loss": 1.1559, "step": 5589 }, { - "epoch": 1.172153491297966, - "grad_norm": 5.993412622136807, - "learning_rate": 1.3918411085433822e-05, - "loss": 0.7873, + "epoch": 0.7893250494210675, + "grad_norm": 3.7381213192988447, + "learning_rate": 1.728333829995197e-05, + "loss": 1.1182, "step": 5590 }, { - "epoch": 1.1723631788634934, - "grad_norm": 6.697136904012644, - "learning_rate": 1.3916328136718444e-05, - "loss": 0.7363, + "epoch": 0.7894662524710534, + "grad_norm": 3.7583253552563036, + "learning_rate": 1.7282293625906123e-05, + "loss": 1.0599, "step": 5591 }, { - "epoch": 1.1725728664290207, - "grad_norm": 6.975223740668854, - "learning_rate": 1.391424498728446e-05, - "loss": 0.8073, + "epoch": 0.7896074555210393, + "grad_norm": 3.4245662368654277, + "learning_rate": 1.7281248782622916e-05, + "loss": 0.9355, "step": 5592 }, { - "epoch": 1.1727825539945482, - "grad_norm": 6.302969948249736, - "learning_rate": 1.391216163723863e-05, - "loss": 0.8286, + "epoch": 0.7897486585710252, + "grad_norm": 3.815138867480394, + "learning_rate": 1.7280203770126634e-05, + "loss": 1.114, "step": 5593 }, { - "epoch": 1.1729922415600755, - "grad_norm": 6.3999766222298184, - "learning_rate": 1.3910078086687731e-05, - "loss": 0.7968, + "epoch": 0.789889861621011, + "grad_norm": 3.2577437247835865, + "learning_rate": 1.7279158588441558e-05, + "loss": 1.0135, "step": 5594 }, { - "epoch": 1.1732019291256028, - "grad_norm": 7.694049833517075, - "learning_rate": 1.3907994335738546e-05, - "loss": 0.7568, + "epoch": 0.7900310646709969, + "grad_norm": 4.249401455340824, + "learning_rate": 1.7278113237591985e-05, + "loss": 0.9072, "step": 5595 }, { - "epoch": 1.1734116166911301, - "grad_norm": 9.462558731463119, - "learning_rate": 1.3905910384497876e-05, - "loss": 0.7763, + "epoch": 0.7901722677209828, + "grad_norm": 3.8572822756403586, + "learning_rate": 1.7277067717602197e-05, + "loss": 1.1375, "step": 5596 }, { - "epoch": 1.1736213042566577, - "grad_norm": 7.8162239671601075, - "learning_rate": 1.3903826233072523e-05, - "loss": 1.0712, + "epoch": 0.7903134707709687, + "grad_norm": 3.1692793142160265, + "learning_rate": 1.7276022028496505e-05, + "loss": 0.7544, "step": 5597 }, { - "epoch": 1.173830991822185, - "grad_norm": 7.528316198425317, - "learning_rate": 1.3901741881569306e-05, - "loss": 1.0436, + "epoch": 0.7904546738209546, + "grad_norm": 3.6550327120836466, + "learning_rate": 1.7274976170299197e-05, + "loss": 0.9915, "step": 5598 }, { - "epoch": 1.1740406793877123, - "grad_norm": 8.156716409452379, - "learning_rate": 1.389965733009505e-05, - "loss": 0.901, + "epoch": 0.7905958768709405, + "grad_norm": 4.551089517885977, + "learning_rate": 1.727393014303459e-05, + "loss": 1.0764, "step": 5599 }, { - "epoch": 1.1742503669532396, - "grad_norm": 7.7806969009443, - "learning_rate": 1.389757257875659e-05, - "loss": 0.8909, + "epoch": 0.7907370799209263, + "grad_norm": 2.8715677140076714, + "learning_rate": 1.7272883946726986e-05, + "loss": 0.8208, "step": 5600 }, { - "epoch": 1.1744600545187671, - "grad_norm": 6.747483240429086, - "learning_rate": 1.389548762766078e-05, - "loss": 0.8724, + "epoch": 0.7908782829709121, + "grad_norm": 3.6483004810001427, + "learning_rate": 1.72718375814007e-05, + "loss": 0.9627, "step": 5601 }, { - "epoch": 1.1746697420842944, - "grad_norm": 7.882417573478209, - "learning_rate": 1.3893402476914468e-05, - "loss": 0.8693, + "epoch": 0.791019486020898, + "grad_norm": 3.6195621959955124, + "learning_rate": 1.727079104708005e-05, + "loss": 1.3154, "step": 5602 }, { - "epoch": 1.1748794296498217, - "grad_norm": 6.231588148887154, - "learning_rate": 1.3891317126624528e-05, - "loss": 0.8189, + "epoch": 0.7911606890708839, + "grad_norm": 3.562321564611492, + "learning_rate": 1.7269744343789354e-05, + "loss": 0.6574, "step": 5603 }, { - "epoch": 1.175089117215349, - "grad_norm": 7.172811633752633, - "learning_rate": 1.3889231576897837e-05, - "loss": 0.9693, + "epoch": 0.7913018921208698, + "grad_norm": 4.176793378568638, + "learning_rate": 1.7268697471552937e-05, + "loss": 1.2392, "step": 5604 }, { - "epoch": 1.1752988047808766, - "grad_norm": 8.951661442173059, - "learning_rate": 1.3887145827841282e-05, - "loss": 0.7747, + "epoch": 0.7914430951708556, + "grad_norm": 3.0482972944825506, + "learning_rate": 1.7267650430395134e-05, + "loss": 0.8102, "step": 5605 }, { - "epoch": 1.1755084923464039, - "grad_norm": 7.818523917704988, - "learning_rate": 1.388505987956176e-05, - "loss": 0.7961, + "epoch": 0.7915842982208415, + "grad_norm": 3.5540292381404504, + "learning_rate": 1.7266603220340273e-05, + "loss": 0.9388, "step": 5606 }, { - "epoch": 1.1757181799119312, - "grad_norm": 6.471893346882293, - "learning_rate": 1.3882973732166183e-05, - "loss": 0.763, + "epoch": 0.7917255012708274, + "grad_norm": 2.959431400209971, + "learning_rate": 1.726555584141269e-05, + "loss": 0.8839, "step": 5607 }, { - "epoch": 1.1759278674774585, - "grad_norm": 8.243674209259343, - "learning_rate": 1.3880887385761466e-05, - "loss": 0.9605, + "epoch": 0.7918667043208133, + "grad_norm": 3.3973481605813642, + "learning_rate": 1.7264508293636726e-05, + "loss": 0.9087, "step": 5608 }, { - "epoch": 1.176137555042986, - "grad_norm": 7.6602389527154955, - "learning_rate": 1.3878800840454539e-05, - "loss": 0.8205, + "epoch": 0.7920079073707992, + "grad_norm": 3.52378967886429, + "learning_rate": 1.726346057703673e-05, + "loss": 1.0837, "step": 5609 }, { - "epoch": 1.1763472426085133, - "grad_norm": 6.995182514484757, - "learning_rate": 1.3876714096352341e-05, - "loss": 0.887, + "epoch": 0.7921491104207851, + "grad_norm": 3.559858126373494, + "learning_rate": 1.7262412691637044e-05, + "loss": 0.9548, "step": 5610 }, { - "epoch": 1.1765569301740406, - "grad_norm": 7.104625402296216, - "learning_rate": 1.3874627153561822e-05, - "loss": 0.8944, + "epoch": 0.792290313470771, + "grad_norm": 3.5478044614050295, + "learning_rate": 1.7261364637462026e-05, + "loss": 1.2252, "step": 5611 }, { - "epoch": 1.1767666177395681, - "grad_norm": 7.880185866381647, - "learning_rate": 1.3872540012189943e-05, - "loss": 0.7541, + "epoch": 0.7924315165207568, + "grad_norm": 3.1313948131891074, + "learning_rate": 1.7260316414536026e-05, + "loss": 0.8664, "step": 5612 }, { - "epoch": 1.1769763053050954, - "grad_norm": 5.416838954782173, - "learning_rate": 1.3870452672343673e-05, - "loss": 0.566, + "epoch": 0.7925727195707427, + "grad_norm": 3.345390008898074, + "learning_rate": 1.725926802288341e-05, + "loss": 0.9052, "step": 5613 }, { - "epoch": 1.1771859928706228, - "grad_norm": 7.566946464813779, - "learning_rate": 1.3868365134129987e-05, - "loss": 0.9677, + "epoch": 0.7927139226207286, + "grad_norm": 3.6354923570935886, + "learning_rate": 1.7258219462528543e-05, + "loss": 1.059, "step": 5614 }, { - "epoch": 1.17739568043615, - "grad_norm": 8.072919877677197, - "learning_rate": 1.3866277397655879e-05, - "loss": 1.0179, + "epoch": 0.7928551256707145, + "grad_norm": 4.028224192322754, + "learning_rate": 1.7257170733495786e-05, + "loss": 1.2128, "step": 5615 }, { - "epoch": 1.1776053680016776, - "grad_norm": 6.402403958045082, - "learning_rate": 1.386418946302835e-05, - "loss": 0.8271, + "epoch": 0.7929963287207004, + "grad_norm": 3.8016071756978715, + "learning_rate": 1.725612183580952e-05, + "loss": 0.9815, "step": 5616 }, { - "epoch": 1.177815055567205, - "grad_norm": 5.880345512606271, - "learning_rate": 1.3862101330354406e-05, - "loss": 0.6467, + "epoch": 0.7931375317706862, + "grad_norm": 3.7766241341128053, + "learning_rate": 1.725507276949411e-05, + "loss": 1.2226, "step": 5617 }, { - "epoch": 1.1780247431327322, - "grad_norm": 7.154139220718624, - "learning_rate": 1.3860012999741076e-05, - "loss": 0.8529, + "epoch": 0.7932787348206721, + "grad_norm": 3.293212639459275, + "learning_rate": 1.7254023534573946e-05, + "loss": 1.0946, "step": 5618 }, { - "epoch": 1.1782344306982595, - "grad_norm": 6.169033575279547, - "learning_rate": 1.3857924471295384e-05, - "loss": 0.7324, + "epoch": 0.793419937870658, + "grad_norm": 3.8491280066730287, + "learning_rate": 1.7252974131073407e-05, + "loss": 1.1051, "step": 5619 }, { - "epoch": 1.178444118263787, - "grad_norm": 7.798326766380699, - "learning_rate": 1.3855835745124365e-05, - "loss": 0.8376, + "epoch": 0.7935611409206439, + "grad_norm": 4.397739877632676, + "learning_rate": 1.7251924559016885e-05, + "loss": 1.1929, "step": 5620 }, { - "epoch": 1.1786538058293143, - "grad_norm": 8.129685071081152, - "learning_rate": 1.3853746821335084e-05, - "loss": 0.9424, + "epoch": 0.7937023439706298, + "grad_norm": 3.213885060430707, + "learning_rate": 1.7250874818428763e-05, + "loss": 0.9268, "step": 5621 }, { - "epoch": 1.1788634933948416, - "grad_norm": 7.068441554158626, - "learning_rate": 1.3851657700034591e-05, - "loss": 0.8675, + "epoch": 0.7938435470206157, + "grad_norm": 3.426073763377816, + "learning_rate": 1.7249824909333445e-05, + "loss": 0.9959, "step": 5622 }, { - "epoch": 1.1790731809603692, - "grad_norm": 7.788652169860666, - "learning_rate": 1.3849568381329958e-05, - "loss": 0.8562, + "epoch": 0.7939847500706015, + "grad_norm": 4.23477758167533, + "learning_rate": 1.7248774831755324e-05, + "loss": 1.1895, "step": 5623 }, { - "epoch": 1.1792828685258965, - "grad_norm": 6.334949374905051, - "learning_rate": 1.3847478865328272e-05, - "loss": 0.6565, + "epoch": 0.7941259531205874, + "grad_norm": 3.9337431366398428, + "learning_rate": 1.7247724585718807e-05, + "loss": 1.2488, "step": 5624 }, { - "epoch": 1.1794925560914238, - "grad_norm": 8.790791662719936, - "learning_rate": 1.384538915213662e-05, - "loss": 0.8423, + "epoch": 0.7942671561705733, + "grad_norm": 3.384446178218081, + "learning_rate": 1.7246674171248304e-05, + "loss": 1.2503, "step": 5625 }, { - "epoch": 1.179702243656951, - "grad_norm": 7.3620297013365406, - "learning_rate": 1.3843299241862105e-05, - "loss": 0.8906, + "epoch": 0.7944083592205592, + "grad_norm": 2.766893726732559, + "learning_rate": 1.7245623588368217e-05, + "loss": 0.8349, "step": 5626 }, { - "epoch": 1.1799119312224784, - "grad_norm": 8.371907005332536, - "learning_rate": 1.384120913461184e-05, - "loss": 1.0709, + "epoch": 0.7945495622705451, + "grad_norm": 4.462461431382719, + "learning_rate": 1.7244572837102974e-05, + "loss": 1.3379, "step": 5627 }, { - "epoch": 1.180121618788006, - "grad_norm": 8.081840045947917, - "learning_rate": 1.3839118830492942e-05, - "loss": 0.8053, + "epoch": 0.794690765320531, + "grad_norm": 2.958669721434582, + "learning_rate": 1.7243521917476984e-05, + "loss": 0.8713, "step": 5628 }, { - "epoch": 1.1803313063535332, - "grad_norm": 7.474865402390149, - "learning_rate": 1.3837028329612542e-05, - "loss": 0.9166, + "epoch": 0.7948319683705168, + "grad_norm": 3.2921073505695304, + "learning_rate": 1.7242470829514674e-05, + "loss": 0.9114, "step": 5629 }, { - "epoch": 1.1805409939190605, - "grad_norm": 7.663703695457021, - "learning_rate": 1.3834937632077794e-05, - "loss": 0.8424, + "epoch": 0.7949731714205027, + "grad_norm": 4.045780646736548, + "learning_rate": 1.7241419573240463e-05, + "loss": 1.2129, "step": 5630 }, { - "epoch": 1.180750681484588, - "grad_norm": 8.86509457205034, - "learning_rate": 1.3832846737995836e-05, - "loss": 0.8927, + "epoch": 0.7951143744704886, + "grad_norm": 3.6477694825745983, + "learning_rate": 1.7240368148678793e-05, + "loss": 1.1109, "step": 5631 }, { - "epoch": 1.1809603690501154, - "grad_norm": 5.723959602838004, - "learning_rate": 1.3830755647473835e-05, - "loss": 0.7921, + "epoch": 0.7952555775204745, + "grad_norm": 3.3786596971451597, + "learning_rate": 1.7239316555854096e-05, + "loss": 0.9392, "step": 5632 }, { - "epoch": 1.1811700566156427, - "grad_norm": 6.770830196951483, - "learning_rate": 1.382866436061896e-05, - "loss": 0.7365, + "epoch": 0.7953967805704604, + "grad_norm": 4.218556220562897, + "learning_rate": 1.723826479479081e-05, + "loss": 0.8799, "step": 5633 }, { - "epoch": 1.18137974418117, - "grad_norm": 6.528917255086503, - "learning_rate": 1.3826572877538403e-05, - "loss": 0.847, + "epoch": 0.7955379836204463, + "grad_norm": 4.3397071483583884, + "learning_rate": 1.723721286551337e-05, + "loss": 1.0307, "step": 5634 }, { - "epoch": 1.1815894317466975, - "grad_norm": 7.008963864291205, - "learning_rate": 1.3824481198339342e-05, - "loss": 0.6544, + "epoch": 0.795679186670432, + "grad_norm": 2.988290376687753, + "learning_rate": 1.7236160768046234e-05, + "loss": 0.8105, "step": 5635 }, { - "epoch": 1.1817991193122248, - "grad_norm": 9.751552006833972, - "learning_rate": 1.382238932312899e-05, - "loss": 1.0352, + "epoch": 0.7958203897204179, + "grad_norm": 4.108123455540468, + "learning_rate": 1.7235108502413844e-05, + "loss": 0.9541, "step": 5636 }, { - "epoch": 1.1820088068777521, - "grad_norm": 7.27222110116671, - "learning_rate": 1.3820297252014556e-05, - "loss": 0.9215, + "epoch": 0.7959615927704038, + "grad_norm": 3.2496570351757277, + "learning_rate": 1.7234056068640658e-05, + "loss": 1.0448, "step": 5637 }, { - "epoch": 1.1822184944432794, - "grad_norm": 6.613857179429183, - "learning_rate": 1.381820498510326e-05, - "loss": 0.8005, + "epoch": 0.7961027958203897, + "grad_norm": 3.1702481861816754, + "learning_rate": 1.7233003466751133e-05, + "loss": 0.7888, "step": 5638 }, { - "epoch": 1.182428182008807, - "grad_norm": 7.171706679961983, - "learning_rate": 1.3816112522502335e-05, - "loss": 0.8353, + "epoch": 0.7962439988703756, + "grad_norm": 3.696681412077238, + "learning_rate": 1.7231950696769733e-05, + "loss": 0.9871, "step": 5639 }, { - "epoch": 1.1826378695743343, - "grad_norm": 9.291744305245487, - "learning_rate": 1.381401986431903e-05, - "loss": 0.8169, + "epoch": 0.7963852019203614, + "grad_norm": 3.3361634641335605, + "learning_rate": 1.7230897758720916e-05, + "loss": 0.9872, "step": 5640 }, { - "epoch": 1.1828475571398616, - "grad_norm": 6.493545370857968, - "learning_rate": 1.3811927010660587e-05, - "loss": 0.7442, + "epoch": 0.7965264049703473, + "grad_norm": 3.2023248677367007, + "learning_rate": 1.722984465262916e-05, + "loss": 0.8924, "step": 5641 }, { - "epoch": 1.183057244705389, - "grad_norm": 6.996088691690428, - "learning_rate": 1.3809833961634275e-05, - "loss": 0.8455, + "epoch": 0.7966676080203332, + "grad_norm": 3.9216539566302124, + "learning_rate": 1.722879137851894e-05, + "loss": 0.9518, "step": 5642 }, { - "epoch": 1.1832669322709164, - "grad_norm": 9.47597907787096, - "learning_rate": 1.3807740717347362e-05, - "loss": 0.913, + "epoch": 0.7968088110703191, + "grad_norm": 2.8873382949743056, + "learning_rate": 1.7227737936414733e-05, + "loss": 0.9408, "step": 5643 }, { - "epoch": 1.1834766198364437, - "grad_norm": 7.3598284589264695, - "learning_rate": 1.3805647277907137e-05, - "loss": 0.8749, + "epoch": 0.796950014120305, + "grad_norm": 3.616981459946964, + "learning_rate": 1.7226684326341014e-05, + "loss": 0.8917, "step": 5644 }, { - "epoch": 1.183686307401971, - "grad_norm": 7.325876382295701, - "learning_rate": 1.3803553643420888e-05, - "loss": 0.9871, + "epoch": 0.7970912171702909, + "grad_norm": 3.2696074315812793, + "learning_rate": 1.722563054832227e-05, + "loss": 1.0187, "step": 5645 }, { - "epoch": 1.1838959949674983, - "grad_norm": 8.405622361817615, - "learning_rate": 1.3801459813995917e-05, - "loss": 1.0752, + "epoch": 0.7972324202202767, + "grad_norm": 3.2816344178091335, + "learning_rate": 1.7224576602382993e-05, + "loss": 0.9796, "step": 5646 }, { - "epoch": 1.1841056825330258, - "grad_norm": 8.296161667417985, - "learning_rate": 1.3799365789739537e-05, - "loss": 0.712, + "epoch": 0.7973736232702626, + "grad_norm": 3.4429876637984056, + "learning_rate": 1.7223522488547678e-05, + "loss": 1.0807, "step": 5647 }, { - "epoch": 1.1843153700985531, - "grad_norm": 6.877579577069931, - "learning_rate": 1.3797271570759072e-05, - "loss": 0.8776, + "epoch": 0.7975148263202485, + "grad_norm": 2.981552798263127, + "learning_rate": 1.722246820684082e-05, + "loss": 0.7922, "step": 5648 }, { - "epoch": 1.1845250576640804, - "grad_norm": 7.899024418042107, - "learning_rate": 1.3795177157161849e-05, - "loss": 0.8103, + "epoch": 0.7976560293702344, + "grad_norm": 3.116658450966832, + "learning_rate": 1.7221413757286916e-05, + "loss": 0.8143, "step": 5649 }, { - "epoch": 1.184734745229608, - "grad_norm": 7.597763211053912, - "learning_rate": 1.379308254905522e-05, - "loss": 0.9938, + "epoch": 0.7977972324202203, + "grad_norm": 3.2953140457742176, + "learning_rate": 1.722035913991048e-05, + "loss": 0.9233, "step": 5650 }, { - "epoch": 1.1849444327951353, - "grad_norm": 6.456469495670153, - "learning_rate": 1.3790987746546527e-05, - "loss": 0.7621, + "epoch": 0.7979384354702062, + "grad_norm": 3.739178375582755, + "learning_rate": 1.7219304354736013e-05, + "loss": 1.1204, "step": 5651 }, { - "epoch": 1.1851541203606626, - "grad_norm": 8.15718963114159, - "learning_rate": 1.3788892749743137e-05, - "loss": 0.8181, + "epoch": 0.798079638520192, + "grad_norm": 3.876233024730047, + "learning_rate": 1.7218249401788033e-05, + "loss": 1.1663, "step": 5652 }, { - "epoch": 1.18536380792619, - "grad_norm": 7.2348404563641076, - "learning_rate": 1.3786797558752424e-05, - "loss": 0.962, + "epoch": 0.7982208415701779, + "grad_norm": 3.577865630515231, + "learning_rate": 1.721719428109105e-05, + "loss": 0.8175, "step": 5653 }, { - "epoch": 1.1855734954917174, - "grad_norm": 7.1112588427510115, - "learning_rate": 1.3784702173681767e-05, - "loss": 0.8297, + "epoch": 0.7983620446201638, + "grad_norm": 3.486035862473454, + "learning_rate": 1.7216138992669593e-05, + "loss": 1.1233, "step": 5654 }, { - "epoch": 1.1857831830572447, - "grad_norm": 7.453227357966235, - "learning_rate": 1.378260659463856e-05, - "loss": 0.9245, + "epoch": 0.7985032476701497, + "grad_norm": 3.634782829218257, + "learning_rate": 1.721508353654818e-05, + "loss": 1.1401, "step": 5655 }, { - "epoch": 1.185992870622772, - "grad_norm": 6.894858060769164, - "learning_rate": 1.3780510821730205e-05, - "loss": 0.7762, + "epoch": 0.7986444507201356, + "grad_norm": 3.115458851223582, + "learning_rate": 1.7214027912751342e-05, + "loss": 0.8681, "step": 5656 }, { - "epoch": 1.1862025581882993, - "grad_norm": 7.9072020500757665, - "learning_rate": 1.3778414855064112e-05, - "loss": 0.8422, + "epoch": 0.7987856537701215, + "grad_norm": 3.3529372948495695, + "learning_rate": 1.721297212130361e-05, + "loss": 0.9279, "step": 5657 }, { - "epoch": 1.1864122457538269, - "grad_norm": 5.900324071668547, - "learning_rate": 1.3776318694747704e-05, - "loss": 0.6924, + "epoch": 0.7989268568201073, + "grad_norm": 3.259706039622505, + "learning_rate": 1.7211916162229524e-05, + "loss": 1.1726, "step": 5658 }, { - "epoch": 1.1866219333193542, - "grad_norm": 7.368427903878405, - "learning_rate": 1.3774222340888418e-05, - "loss": 0.7939, + "epoch": 0.7990680598700932, + "grad_norm": 3.5016640836178152, + "learning_rate": 1.7210860035553617e-05, + "loss": 0.9302, "step": 5659 }, { - "epoch": 1.1868316208848815, - "grad_norm": 7.423932904741232, - "learning_rate": 1.3772125793593684e-05, - "loss": 0.9915, + "epoch": 0.7992092629200791, + "grad_norm": 4.3452420322325915, + "learning_rate": 1.720980374130044e-05, + "loss": 0.8945, "step": 5660 }, { - "epoch": 1.187041308450409, - "grad_norm": 7.6574107222029975, - "learning_rate": 1.3770029052970965e-05, - "loss": 0.9254, + "epoch": 0.799350465970065, + "grad_norm": 3.42804758024367, + "learning_rate": 1.7208747279494535e-05, + "loss": 0.988, "step": 5661 }, { - "epoch": 1.1872509960159363, - "grad_norm": 6.7788323944186155, - "learning_rate": 1.3767932119127717e-05, - "loss": 0.8037, + "epoch": 0.7994916690200509, + "grad_norm": 3.794524794938975, + "learning_rate": 1.720769065016046e-05, + "loss": 1.0704, "step": 5662 }, { - "epoch": 1.1874606835814636, - "grad_norm": 7.6923952980322765, - "learning_rate": 1.3765834992171413e-05, - "loss": 0.8313, + "epoch": 0.7996328720700367, + "grad_norm": 3.6253327499766836, + "learning_rate": 1.7206633853322766e-05, + "loss": 1.0324, "step": 5663 }, { - "epoch": 1.187670371146991, - "grad_norm": 6.733331331135807, - "learning_rate": 1.3763737672209534e-05, - "loss": 0.6348, + "epoch": 0.7997740751200226, + "grad_norm": 3.624499999657056, + "learning_rate": 1.720557688900601e-05, + "loss": 0.889, "step": 5664 }, { - "epoch": 1.1878800587125182, - "grad_norm": 6.205615802015711, - "learning_rate": 1.3761640159349572e-05, - "loss": 0.7039, + "epoch": 0.7999152781700085, + "grad_norm": 2.9038279622004164, + "learning_rate": 1.7204519757234763e-05, + "loss": 0.8118, "step": 5665 }, { - "epoch": 1.1880897462780458, - "grad_norm": 7.551457945344353, - "learning_rate": 1.3759542453699028e-05, - "loss": 0.7845, + "epoch": 0.8000564812199944, + "grad_norm": 3.0475602519073988, + "learning_rate": 1.7203462458033586e-05, + "loss": 0.8897, "step": 5666 }, { - "epoch": 1.188299433843573, - "grad_norm": 7.362289169878077, - "learning_rate": 1.375744455536541e-05, - "loss": 0.9324, + "epoch": 0.8001976842699803, + "grad_norm": 3.366591855794279, + "learning_rate": 1.7202404991427054e-05, + "loss": 0.8562, "step": 5667 }, { - "epoch": 1.1885091214091004, - "grad_norm": 8.133515370656067, - "learning_rate": 1.3755346464456245e-05, - "loss": 0.7779, + "epoch": 0.8003388873199662, + "grad_norm": 3.3871864290407667, + "learning_rate": 1.720134735743974e-05, + "loss": 1.0064, "step": 5668 }, { - "epoch": 1.188718808974628, - "grad_norm": 7.999635896276008, - "learning_rate": 1.375324818107906e-05, - "loss": 1.2023, + "epoch": 0.800480090369952, + "grad_norm": 3.7088562720499287, + "learning_rate": 1.7200289556096224e-05, + "loss": 1.071, "step": 5669 }, { - "epoch": 1.1889284965401552, - "grad_norm": 7.410687155600046, - "learning_rate": 1.3751149705341393e-05, - "loss": 1.1157, + "epoch": 0.8006212934199378, + "grad_norm": 3.4901118839522223, + "learning_rate": 1.7199231587421087e-05, + "loss": 1.1796, "step": 5670 }, { - "epoch": 1.1891381841056825, - "grad_norm": 8.057169250716472, - "learning_rate": 1.37490510373508e-05, - "loss": 0.906, + "epoch": 0.8007624964699237, + "grad_norm": 3.2479831352701716, + "learning_rate": 1.719817345143892e-05, + "loss": 1.0612, "step": 5671 }, { - "epoch": 1.1893478716712098, - "grad_norm": 6.73665642484448, - "learning_rate": 1.3746952177214838e-05, - "loss": 0.863, + "epoch": 0.8009036995199096, + "grad_norm": 3.299421339600392, + "learning_rate": 1.7197115148174305e-05, + "loss": 0.9246, "step": 5672 }, { - "epoch": 1.1895575592367373, - "grad_norm": 7.778994155451082, - "learning_rate": 1.374485312504108e-05, - "loss": 0.8987, + "epoch": 0.8010449025698955, + "grad_norm": 3.4681165837982104, + "learning_rate": 1.7196056677651846e-05, + "loss": 0.9879, "step": 5673 }, { - "epoch": 1.1897672468022646, - "grad_norm": 8.323020373750847, - "learning_rate": 1.3742753880937101e-05, - "loss": 0.8837, + "epoch": 0.8011861056198814, + "grad_norm": 3.563022849693697, + "learning_rate": 1.7194998039896134e-05, + "loss": 1.0384, "step": 5674 }, { - "epoch": 1.189976934367792, - "grad_norm": 8.013136007869543, - "learning_rate": 1.3740654445010497e-05, - "loss": 0.8605, + "epoch": 0.8013273086698672, + "grad_norm": 3.4676801413233345, + "learning_rate": 1.719393923493178e-05, + "loss": 1.0407, "step": 5675 }, { - "epoch": 1.1901866219333193, - "grad_norm": 6.096103210483433, - "learning_rate": 1.3738554817368862e-05, - "loss": 0.829, + "epoch": 0.8014685117198531, + "grad_norm": 3.3572882118116474, + "learning_rate": 1.719288026278338e-05, + "loss": 0.7867, "step": 5676 }, { - "epoch": 1.1903963094988468, - "grad_norm": 7.060517636841401, - "learning_rate": 1.373645499811981e-05, - "loss": 0.8782, + "epoch": 0.801609714769839, + "grad_norm": 3.8246798797378734, + "learning_rate": 1.7191821123475555e-05, + "loss": 1.0487, "step": 5677 }, { - "epoch": 1.190605997064374, - "grad_norm": 7.157504180440596, - "learning_rate": 1.373435498737096e-05, - "loss": 0.9056, + "epoch": 0.8017509178198249, + "grad_norm": 3.329061192527636, + "learning_rate": 1.719076181703291e-05, + "loss": 0.7192, "step": 5678 }, { - "epoch": 1.1908156846299014, - "grad_norm": 7.846248401214299, - "learning_rate": 1.3732254785229937e-05, - "loss": 0.9993, + "epoch": 0.8018921208698108, + "grad_norm": 3.7891338625566786, + "learning_rate": 1.7189702343480067e-05, + "loss": 1.0386, "step": 5679 }, { - "epoch": 1.191025372195429, - "grad_norm": 7.739747852972373, - "learning_rate": 1.3730154391804387e-05, - "loss": 0.9501, + "epoch": 0.8020333239197966, + "grad_norm": 3.9969102595485118, + "learning_rate": 1.7188642702841643e-05, + "loss": 1.2247, "step": 5680 }, { - "epoch": 1.1912350597609562, - "grad_norm": 5.990113444486688, - "learning_rate": 1.3728053807201953e-05, - "loss": 0.7729, + "epoch": 0.8021745269697825, + "grad_norm": 3.058334834296358, + "learning_rate": 1.7187582895142266e-05, + "loss": 0.856, "step": 5681 }, { - "epoch": 1.1914447473264835, - "grad_norm": 7.768612059040285, - "learning_rate": 1.3725953031530295e-05, - "loss": 1.0722, + "epoch": 0.8023157300197684, + "grad_norm": 3.9333074466022175, + "learning_rate": 1.7186522920406572e-05, + "loss": 1.0895, "step": 5682 }, { - "epoch": 1.1916544348920108, - "grad_norm": 7.3697195910017275, - "learning_rate": 1.372385206489708e-05, - "loss": 0.867, + "epoch": 0.8024569330697543, + "grad_norm": 3.89759237703865, + "learning_rate": 1.7185462778659187e-05, + "loss": 1.1567, "step": 5683 }, { - "epoch": 1.1918641224575381, - "grad_norm": 6.991360739360432, - "learning_rate": 1.3721750907409989e-05, - "loss": 0.8595, + "epoch": 0.8025981361197402, + "grad_norm": 4.750843469383844, + "learning_rate": 1.7184402469924748e-05, + "loss": 1.2414, "step": 5684 }, { - "epoch": 1.1920738100230657, - "grad_norm": 7.749075191615825, - "learning_rate": 1.371964955917671e-05, - "loss": 0.7957, + "epoch": 0.8027393391697261, + "grad_norm": 3.0172101828239355, + "learning_rate": 1.71833419942279e-05, + "loss": 0.8523, "step": 5685 }, { - "epoch": 1.192283497588593, - "grad_norm": 6.2818291643835185, - "learning_rate": 1.3717548020304943e-05, - "loss": 0.7302, + "epoch": 0.802880542219712, + "grad_norm": 4.182358050626544, + "learning_rate": 1.7182281351593288e-05, + "loss": 1.2095, "step": 5686 }, { - "epoch": 1.1924931851541203, - "grad_norm": 7.270376324921736, - "learning_rate": 1.3715446290902385e-05, - "loss": 0.805, + "epoch": 0.8030217452696978, + "grad_norm": 3.783317136705109, + "learning_rate": 1.7181220542045557e-05, + "loss": 0.8191, "step": 5687 }, { - "epoch": 1.1927028727196478, - "grad_norm": 5.963688055820944, - "learning_rate": 1.3713344371076767e-05, - "loss": 0.7588, + "epoch": 0.8031629483196837, + "grad_norm": 3.91678778156472, + "learning_rate": 1.718015956560936e-05, + "loss": 0.9489, "step": 5688 }, { - "epoch": 1.1929125602851751, - "grad_norm": 5.788906936865552, - "learning_rate": 1.3711242260935806e-05, - "loss": 0.6473, + "epoch": 0.8033041513696696, + "grad_norm": 3.3075166175090662, + "learning_rate": 1.717909842230936e-05, + "loss": 0.8831, "step": 5689 }, { - "epoch": 1.1931222478507024, - "grad_norm": 7.528176276733837, - "learning_rate": 1.3709139960587244e-05, - "loss": 1.0728, + "epoch": 0.8034453544196555, + "grad_norm": 4.017990949078927, + "learning_rate": 1.7178037112170213e-05, + "loss": 1.0205, "step": 5690 }, { - "epoch": 1.1933319354162297, - "grad_norm": 6.537163593453765, - "learning_rate": 1.3707037470138823e-05, - "loss": 0.7303, + "epoch": 0.8035865574696414, + "grad_norm": 3.7930383952931814, + "learning_rate": 1.717697563521658e-05, + "loss": 1.1714, "step": 5691 }, { - "epoch": 1.1935416229817573, - "grad_norm": 5.456843612546987, - "learning_rate": 1.3704934789698305e-05, - "loss": 0.642, + "epoch": 0.8037277605196272, + "grad_norm": 2.8983915495163775, + "learning_rate": 1.7175913991473137e-05, + "loss": 0.7216, "step": 5692 }, { - "epoch": 1.1937513105472846, - "grad_norm": 6.972496139600782, - "learning_rate": 1.3702831919373449e-05, - "loss": 0.867, + "epoch": 0.8038689635696131, + "grad_norm": 3.74032180819834, + "learning_rate": 1.7174852180964546e-05, + "loss": 1.0592, "step": 5693 }, { - "epoch": 1.1939609981128119, - "grad_norm": 6.572162711965942, - "learning_rate": 1.370072885927204e-05, - "loss": 0.705, + "epoch": 0.804010166619599, + "grad_norm": 3.4109752323304385, + "learning_rate": 1.7173790203715494e-05, + "loss": 0.8605, "step": 5694 }, { - "epoch": 1.1941706856783392, - "grad_norm": 7.778179095339917, - "learning_rate": 1.3698625609501857e-05, - "loss": 0.944, + "epoch": 0.8041513696695849, + "grad_norm": 3.4763099917619544, + "learning_rate": 1.7172728059750655e-05, + "loss": 0.8975, "step": 5695 }, { - "epoch": 1.1943803732438667, - "grad_norm": 6.357895186306804, - "learning_rate": 1.3696522170170696e-05, - "loss": 0.697, + "epoch": 0.8042925727195708, + "grad_norm": 3.694446840350058, + "learning_rate": 1.7171665749094713e-05, + "loss": 1.0667, "step": 5696 }, { - "epoch": 1.194590060809394, - "grad_norm": 5.8972221361853485, - "learning_rate": 1.3694418541386361e-05, - "loss": 0.5594, + "epoch": 0.8044337757695567, + "grad_norm": 3.569246880532397, + "learning_rate": 1.7170603271772354e-05, + "loss": 0.9342, "step": 5697 }, { - "epoch": 1.1947997483749213, - "grad_norm": 8.618190455193671, - "learning_rate": 1.3692314723256672e-05, - "loss": 0.8786, + "epoch": 0.8045749788195425, + "grad_norm": 4.028246080786916, + "learning_rate": 1.7169540627808276e-05, + "loss": 1.1244, "step": 5698 }, { - "epoch": 1.1950094359404488, - "grad_norm": 6.7017434015992645, - "learning_rate": 1.3690210715889445e-05, - "loss": 0.7151, + "epoch": 0.8047161818695284, + "grad_norm": 3.63315635152293, + "learning_rate": 1.7168477817227166e-05, + "loss": 0.9091, "step": 5699 }, { - "epoch": 1.1952191235059761, - "grad_norm": 6.059835278007399, - "learning_rate": 1.3688106519392523e-05, - "loss": 0.7323, + "epoch": 0.8048573849195143, + "grad_norm": 3.3931946474695924, + "learning_rate": 1.716741484005373e-05, + "loss": 1.0119, "step": 5700 }, { - "epoch": 1.1954288110715034, - "grad_norm": 8.125017227328817, - "learning_rate": 1.3686002133873742e-05, - "loss": 1.0936, + "epoch": 0.8049985879695002, + "grad_norm": 3.0992890855834085, + "learning_rate": 1.7166351696312665e-05, + "loss": 0.8549, "step": 5701 }, { - "epoch": 1.1956384986370308, - "grad_norm": 9.414138666612521, - "learning_rate": 1.368389755944096e-05, - "loss": 1.0402, + "epoch": 0.8051397910194861, + "grad_norm": 3.4271780881199843, + "learning_rate": 1.7165288386028683e-05, + "loss": 0.8898, "step": 5702 }, { - "epoch": 1.1958481862025583, - "grad_norm": 8.127317288872796, - "learning_rate": 1.3681792796202036e-05, - "loss": 1.0002, + "epoch": 0.805280994069472, + "grad_norm": 4.13970380393167, + "learning_rate": 1.7164224909226497e-05, + "loss": 1.1469, "step": 5703 }, { - "epoch": 1.1960578737680856, - "grad_norm": 6.777870999255757, - "learning_rate": 1.3679687844264847e-05, - "loss": 0.5327, + "epoch": 0.8054221971194577, + "grad_norm": 4.039115144119851, + "learning_rate": 1.7163161265930814e-05, + "loss": 1.0747, "step": 5704 }, { - "epoch": 1.196267561333613, - "grad_norm": 7.790669362522949, - "learning_rate": 1.3677582703737273e-05, - "loss": 0.8775, + "epoch": 0.8055634001694436, + "grad_norm": 3.3159786008994776, + "learning_rate": 1.716209745616636e-05, + "loss": 1.0783, "step": 5705 }, { - "epoch": 1.1964772488991402, - "grad_norm": 8.645957764865734, - "learning_rate": 1.3675477374727208e-05, - "loss": 1.1264, + "epoch": 0.8057046032194295, + "grad_norm": 3.379172070121552, + "learning_rate": 1.716103347995785e-05, + "loss": 0.8885, "step": 5706 }, { - "epoch": 1.1966869364646677, - "grad_norm": 7.03945421016497, - "learning_rate": 1.3673371857342552e-05, - "loss": 0.8234, + "epoch": 0.8058458062694154, + "grad_norm": 3.777939441472578, + "learning_rate": 1.7159969337330018e-05, + "loss": 1.2025, "step": 5707 }, { - "epoch": 1.196896624030195, - "grad_norm": 6.001833355127882, - "learning_rate": 1.3671266151691218e-05, - "loss": 0.8453, + "epoch": 0.8059870093194013, + "grad_norm": 3.6344828918316936, + "learning_rate": 1.715890502830759e-05, + "loss": 0.9674, "step": 5708 }, { - "epoch": 1.1971063115957223, - "grad_norm": 6.521835098649482, - "learning_rate": 1.3669160257881129e-05, - "loss": 0.7212, + "epoch": 0.8061282123693871, + "grad_norm": 3.3375923475952436, + "learning_rate": 1.71578405529153e-05, + "loss": 0.9814, "step": 5709 }, { - "epoch": 1.1973159991612496, - "grad_norm": 6.650573122010452, - "learning_rate": 1.3667054176020208e-05, - "loss": 0.6899, + "epoch": 0.806269415419373, + "grad_norm": 3.926671106156196, + "learning_rate": 1.7156775911177888e-05, + "loss": 1.0768, "step": 5710 }, { - "epoch": 1.1975256867267772, - "grad_norm": 6.268032820509766, - "learning_rate": 1.3664947906216405e-05, - "loss": 0.826, + "epoch": 0.8064106184693589, + "grad_norm": 3.2149902656105436, + "learning_rate": 1.715571110312009e-05, + "loss": 0.8192, "step": 5711 }, { - "epoch": 1.1977353742923045, - "grad_norm": 8.564742689990075, - "learning_rate": 1.3662841448577662e-05, - "loss": 0.9765, + "epoch": 0.8065518215193448, + "grad_norm": 4.063355332553528, + "learning_rate": 1.7154646128766663e-05, + "loss": 1.1364, "step": 5712 }, { - "epoch": 1.1979450618578318, - "grad_norm": 6.21533456164436, - "learning_rate": 1.3660734803211944e-05, - "loss": 0.6996, + "epoch": 0.8066930245693307, + "grad_norm": 3.678149002894032, + "learning_rate": 1.7153580988142348e-05, + "loss": 1.0788, "step": 5713 }, { - "epoch": 1.198154749423359, - "grad_norm": 7.504434566476642, - "learning_rate": 1.3658627970227217e-05, - "loss": 0.8581, + "epoch": 0.8068342276193166, + "grad_norm": 3.6157646197990516, + "learning_rate": 1.7152515681271896e-05, + "loss": 1.1353, "step": 5714 }, { - "epoch": 1.1983644369888866, - "grad_norm": 7.1183965613350475, - "learning_rate": 1.3656520949731462e-05, - "loss": 0.6891, + "epoch": 0.8069754306693024, + "grad_norm": 3.782277933358547, + "learning_rate": 1.7151450208180075e-05, + "loss": 1.0758, "step": 5715 }, { - "epoch": 1.198574124554414, - "grad_norm": 5.930331315207084, - "learning_rate": 1.3654413741832661e-05, - "loss": 0.6662, + "epoch": 0.8071166337192883, + "grad_norm": 3.6944131199965327, + "learning_rate": 1.7150384568891634e-05, + "loss": 1.0843, "step": 5716 }, { - "epoch": 1.1987838121199412, - "grad_norm": 6.493826608451485, - "learning_rate": 1.3652306346638824e-05, - "loss": 0.8084, + "epoch": 0.8072578367692742, + "grad_norm": 3.629072456929432, + "learning_rate": 1.7149318763431345e-05, + "loss": 0.9361, "step": 5717 }, { - "epoch": 1.1989934996854688, - "grad_norm": 7.326420822225876, - "learning_rate": 1.365019876425795e-05, - "loss": 0.9489, + "epoch": 0.8073990398192601, + "grad_norm": 3.4966032581795607, + "learning_rate": 1.714825279182398e-05, + "loss": 0.7638, "step": 5718 }, { - "epoch": 1.199203187250996, - "grad_norm": 8.382636046841132, - "learning_rate": 1.3648090994798058e-05, - "loss": 0.9105, + "epoch": 0.807540242869246, + "grad_norm": 3.3749154071831753, + "learning_rate": 1.7147186654094304e-05, + "loss": 0.994, "step": 5719 }, { - "epoch": 1.1994128748165234, - "grad_norm": 6.6749204942302685, - "learning_rate": 1.3645983038367177e-05, - "loss": 0.6622, + "epoch": 0.8076814459192319, + "grad_norm": 3.4403068126410785, + "learning_rate": 1.7146120350267094e-05, + "loss": 0.9372, "step": 5720 }, { - "epoch": 1.1996225623820507, - "grad_norm": 7.020075640003441, - "learning_rate": 1.3643874895073341e-05, - "loss": 0.7872, + "epoch": 0.8078226489692177, + "grad_norm": 3.166200927672278, + "learning_rate": 1.7145053880367134e-05, + "loss": 1.0126, "step": 5721 }, { - "epoch": 1.1998322499475782, - "grad_norm": 8.708283237671763, - "learning_rate": 1.3641766565024592e-05, - "loss": 1.0609, + "epoch": 0.8079638520192036, + "grad_norm": 4.108168600389199, + "learning_rate": 1.7143987244419212e-05, + "loss": 1.3065, "step": 5722 }, { - "epoch": 1.2000419375131055, - "grad_norm": 8.026461588750792, - "learning_rate": 1.3639658048328997e-05, - "loss": 1.0342, + "epoch": 0.8081050550691895, + "grad_norm": 2.8857495921785494, + "learning_rate": 1.7142920442448107e-05, + "loss": 0.8709, "step": 5723 }, { - "epoch": 1.2002516250786328, - "grad_norm": 7.397692312896382, - "learning_rate": 1.3637549345094611e-05, - "loss": 0.9278, + "epoch": 0.8082462581191754, + "grad_norm": 3.9229217804714356, + "learning_rate": 1.7141853474478618e-05, + "loss": 1.3251, "step": 5724 }, { - "epoch": 1.2004613126441601, - "grad_norm": 6.716244304076115, - "learning_rate": 1.3635440455429512e-05, - "loss": 0.7962, + "epoch": 0.8083874611691613, + "grad_norm": 3.735286256421067, + "learning_rate": 1.714078634053554e-05, + "loss": 1.041, "step": 5725 }, { - "epoch": 1.2006710002096876, - "grad_norm": 8.754467628706172, - "learning_rate": 1.3633331379441784e-05, - "loss": 0.8922, + "epoch": 0.8085286642191472, + "grad_norm": 3.4298656520142585, + "learning_rate": 1.713971904064367e-05, + "loss": 1.0632, "step": 5726 }, { - "epoch": 1.200880687775215, - "grad_norm": 7.589019317339162, - "learning_rate": 1.3631222117239522e-05, - "loss": 0.9809, + "epoch": 0.808669867269133, + "grad_norm": 3.4957065073575695, + "learning_rate": 1.713865157482781e-05, + "loss": 0.9638, "step": 5727 }, { - "epoch": 1.2010903753407423, - "grad_norm": 6.398821724311576, - "learning_rate": 1.3629112668930829e-05, - "loss": 0.6275, + "epoch": 0.8088110703191189, + "grad_norm": 4.335416234677013, + "learning_rate": 1.7137583943112776e-05, + "loss": 1.2524, "step": 5728 }, { - "epoch": 1.2013000629062696, - "grad_norm": 7.626480589037755, - "learning_rate": 1.3627003034623819e-05, - "loss": 1.0377, + "epoch": 0.8089522733691048, + "grad_norm": 4.036844979090874, + "learning_rate": 1.713651614552337e-05, + "loss": 0.898, "step": 5729 }, { - "epoch": 1.201509750471797, - "grad_norm": 8.479275665629238, - "learning_rate": 1.3624893214426607e-05, - "loss": 0.9998, + "epoch": 0.8090934764190907, + "grad_norm": 3.707318157970143, + "learning_rate": 1.713544818208441e-05, + "loss": 1.1559, "step": 5730 }, { - "epoch": 1.2017194380373244, - "grad_norm": 8.001897589863493, - "learning_rate": 1.3622783208447334e-05, - "loss": 0.8685, + "epoch": 0.8092346794690766, + "grad_norm": 3.476462421570581, + "learning_rate": 1.7134380052820715e-05, + "loss": 0.9229, "step": 5731 }, { - "epoch": 1.2019291256028517, - "grad_norm": 7.297073278857654, - "learning_rate": 1.3620673016794139e-05, - "loss": 0.7937, + "epoch": 0.8093758825190625, + "grad_norm": 3.3862946190682455, + "learning_rate": 1.7133311757757112e-05, + "loss": 1.0165, "step": 5732 }, { - "epoch": 1.202138813168379, - "grad_norm": 6.839904216823691, - "learning_rate": 1.3618562639575171e-05, - "loss": 0.8639, + "epoch": 0.8095170855690483, + "grad_norm": 3.6055314726454815, + "learning_rate": 1.7132243296918424e-05, + "loss": 0.9641, "step": 5733 }, { - "epoch": 1.2023485007339065, - "grad_norm": 6.8544338934169975, - "learning_rate": 1.3616452076898592e-05, - "loss": 0.8296, + "epoch": 0.8096582886190342, + "grad_norm": 4.2205854658364155, + "learning_rate": 1.713117467032948e-05, + "loss": 1.2135, "step": 5734 }, { - "epoch": 1.2025581882994338, - "grad_norm": 6.562994680606597, - "learning_rate": 1.3614341328872571e-05, - "loss": 0.6985, + "epoch": 0.8097994916690201, + "grad_norm": 2.962013328187995, + "learning_rate": 1.7130105878015116e-05, + "loss": 0.8187, "step": 5735 }, { - "epoch": 1.2027678758649611, - "grad_norm": 8.827253288509787, - "learning_rate": 1.3612230395605288e-05, - "loss": 1.0727, + "epoch": 0.809940694719006, + "grad_norm": 3.7191913867899586, + "learning_rate": 1.7129036920000174e-05, + "loss": 0.9997, "step": 5736 }, { - "epoch": 1.2029775634304887, - "grad_norm": 8.029930089777471, - "learning_rate": 1.3610119277204934e-05, - "loss": 1.0457, + "epoch": 0.8100818977689919, + "grad_norm": 3.9766919009778348, + "learning_rate": 1.712796779630949e-05, + "loss": 1.066, "step": 5737 }, { - "epoch": 1.203187250996016, - "grad_norm": 7.329344568919264, - "learning_rate": 1.3608007973779707e-05, - "loss": 0.8419, + "epoch": 0.8102231008189776, + "grad_norm": 3.1475415636136352, + "learning_rate": 1.7126898506967913e-05, + "loss": 0.8357, "step": 5738 }, { - "epoch": 1.2033969385615433, - "grad_norm": 6.168357207548109, - "learning_rate": 1.3605896485437806e-05, - "loss": 0.5288, + "epoch": 0.8103643038689635, + "grad_norm": 2.9716153190597936, + "learning_rate": 1.7125829052000295e-05, + "loss": 0.8737, "step": 5739 }, { - "epoch": 1.2036066261270706, - "grad_norm": 6.396981215655067, - "learning_rate": 1.3603784812287464e-05, - "loss": 0.7379, + "epoch": 0.8105055069189494, + "grad_norm": 4.22819347811825, + "learning_rate": 1.7124759431431485e-05, + "loss": 1.1211, "step": 5740 }, { - "epoch": 1.2038163136925981, - "grad_norm": 7.3451230911238055, - "learning_rate": 1.3601672954436897e-05, - "loss": 0.8139, + "epoch": 0.8106467099689353, + "grad_norm": 4.064594185251849, + "learning_rate": 1.712368964528634e-05, + "loss": 1.1126, "step": 5741 }, { - "epoch": 1.2040260012581254, - "grad_norm": 7.6025934306842435, - "learning_rate": 1.3599560911994343e-05, - "loss": 0.9753, + "epoch": 0.8107879130189212, + "grad_norm": 3.253308280245384, + "learning_rate": 1.712261969358973e-05, + "loss": 1.1477, "step": 5742 }, { - "epoch": 1.2042356888236527, - "grad_norm": 6.512066890172266, - "learning_rate": 1.3597448685068051e-05, - "loss": 0.7649, + "epoch": 0.810929116068907, + "grad_norm": 4.174879586133742, + "learning_rate": 1.712154957636651e-05, + "loss": 0.9996, "step": 5743 }, { - "epoch": 1.20444537638918, - "grad_norm": 6.902733762901995, - "learning_rate": 1.3595336273766275e-05, - "loss": 0.8065, + "epoch": 0.8110703191188929, + "grad_norm": 3.9047707072926405, + "learning_rate": 1.7120479293641558e-05, + "loss": 1.2572, "step": 5744 }, { - "epoch": 1.2046550639547076, - "grad_norm": 6.294020155438753, - "learning_rate": 1.3593223678197277e-05, - "loss": 0.5302, + "epoch": 0.8112115221688788, + "grad_norm": 3.2668279454822895, + "learning_rate": 1.7119408845439735e-05, + "loss": 0.7538, "step": 5745 }, { - "epoch": 1.2048647515202349, - "grad_norm": 8.148232140380923, - "learning_rate": 1.3591110898469335e-05, - "loss": 0.9913, + "epoch": 0.8113527252188647, + "grad_norm": 3.037128670130711, + "learning_rate": 1.711833823178593e-05, + "loss": 0.8262, "step": 5746 }, { - "epoch": 1.2050744390857622, - "grad_norm": 7.991627440028552, - "learning_rate": 1.358899793469073e-05, - "loss": 1.1258, + "epoch": 0.8114939282688506, + "grad_norm": 3.6367350454383707, + "learning_rate": 1.7117267452705018e-05, + "loss": 1.1567, "step": 5747 }, { - "epoch": 1.2052841266512897, - "grad_norm": 7.200093339637468, - "learning_rate": 1.3586884786969756e-05, - "loss": 0.8399, + "epoch": 0.8116351313188365, + "grad_norm": 3.725139218296415, + "learning_rate": 1.7116196508221886e-05, + "loss": 1.093, "step": 5748 }, { - "epoch": 1.205493814216817, - "grad_norm": 7.7696380929135875, - "learning_rate": 1.3584771455414716e-05, - "loss": 0.8772, + "epoch": 0.8117763343688223, + "grad_norm": 4.062659405042165, + "learning_rate": 1.7115125398361418e-05, + "loss": 1.114, "step": 5749 }, { - "epoch": 1.2057035017823443, - "grad_norm": 6.841433987534582, - "learning_rate": 1.3582657940133923e-05, - "loss": 0.7387, + "epoch": 0.8119175374188082, + "grad_norm": 3.4150592867027734, + "learning_rate": 1.7114054123148508e-05, + "loss": 0.8329, "step": 5750 }, { - "epoch": 1.2059131893478716, - "grad_norm": 8.138228327916083, - "learning_rate": 1.3580544241235693e-05, - "loss": 1.0431, + "epoch": 0.8120587404687941, + "grad_norm": 3.390653966675382, + "learning_rate": 1.7112982682608054e-05, + "loss": 1.0881, "step": 5751 }, { - "epoch": 1.206122876913399, - "grad_norm": 6.915714214525624, - "learning_rate": 1.3578430358828365e-05, - "loss": 0.844, + "epoch": 0.81219994351878, + "grad_norm": 4.681011931210241, + "learning_rate": 1.7111911076764954e-05, + "loss": 1.0354, "step": 5752 }, { - "epoch": 1.2063325644789265, - "grad_norm": 6.3925473917296065, - "learning_rate": 1.3576316293020273e-05, - "loss": 0.8788, + "epoch": 0.8123411465687659, + "grad_norm": 3.7498116991006936, + "learning_rate": 1.7110839305644113e-05, + "loss": 1.0779, "step": 5753 }, { - "epoch": 1.2065422520444538, - "grad_norm": 6.438985375449511, - "learning_rate": 1.3574202043919766e-05, - "loss": 0.713, + "epoch": 0.8124823496187518, + "grad_norm": 3.6145229677788935, + "learning_rate": 1.7109767369270437e-05, + "loss": 1.0916, "step": 5754 }, { - "epoch": 1.206751939609981, - "grad_norm": 6.835101481405855, - "learning_rate": 1.3572087611635208e-05, - "loss": 0.675, + "epoch": 0.8126235526687376, + "grad_norm": 3.083789066339782, + "learning_rate": 1.7108695267668836e-05, + "loss": 0.8599, "step": 5755 }, { - "epoch": 1.2069616271755086, - "grad_norm": 6.983494548832377, - "learning_rate": 1.356997299627496e-05, - "loss": 0.7774, + "epoch": 0.8127647557187235, + "grad_norm": 3.2934155765738673, + "learning_rate": 1.710762300086423e-05, + "loss": 0.9477, "step": 5756 }, { - "epoch": 1.207171314741036, - "grad_norm": 7.442430330131553, - "learning_rate": 1.356785819794741e-05, - "loss": 0.951, + "epoch": 0.8129059587687094, + "grad_norm": 3.309566612485001, + "learning_rate": 1.710655056888153e-05, + "loss": 0.9326, "step": 5757 }, { - "epoch": 1.2073810023065632, - "grad_norm": 5.9332606574959845, - "learning_rate": 1.3565743216760936e-05, - "loss": 0.6644, + "epoch": 0.8130471618186953, + "grad_norm": 3.3178397032374485, + "learning_rate": 1.7105477971745668e-05, + "loss": 1.0848, "step": 5758 }, { - "epoch": 1.2075906898720905, - "grad_norm": 8.40179973789322, - "learning_rate": 1.356362805282394e-05, - "loss": 1.0526, + "epoch": 0.8131883648686812, + "grad_norm": 3.600273685717095, + "learning_rate": 1.7104405209481563e-05, + "loss": 0.9988, "step": 5759 }, { - "epoch": 1.207800377437618, - "grad_norm": 6.761574912786429, - "learning_rate": 1.3561512706244821e-05, - "loss": 0.826, + "epoch": 0.8133295679186671, + "grad_norm": 3.0057221608906413, + "learning_rate": 1.7103332282114156e-05, + "loss": 0.7617, "step": 5760 }, { - "epoch": 1.2080100650031453, - "grad_norm": 7.45411495615844, - "learning_rate": 1.3559397177132004e-05, - "loss": 0.8968, + "epoch": 0.813470770968653, + "grad_norm": 3.0171600113905495, + "learning_rate": 1.7102259189668368e-05, + "loss": 0.8799, "step": 5761 }, { - "epoch": 1.2082197525686726, - "grad_norm": 7.164678889769534, - "learning_rate": 1.3557281465593904e-05, - "loss": 0.7948, + "epoch": 0.8136119740186388, + "grad_norm": 3.2249493276914887, + "learning_rate": 1.7101185932169147e-05, + "loss": 0.9947, "step": 5762 }, { - "epoch": 1.2084294401342, - "grad_norm": 7.213025581436023, - "learning_rate": 1.355516557173896e-05, - "loss": 0.7809, + "epoch": 0.8137531770686247, + "grad_norm": 3.961378646708445, + "learning_rate": 1.710011250964143e-05, + "loss": 0.9026, "step": 5763 }, { - "epoch": 1.2086391276997275, - "grad_norm": 6.858731692746869, - "learning_rate": 1.3553049495675615e-05, - "loss": 0.794, + "epoch": 0.8138943801186106, + "grad_norm": 3.577776642384495, + "learning_rate": 1.7099038922110164e-05, + "loss": 0.82, "step": 5764 }, { - "epoch": 1.2088488152652548, - "grad_norm": 7.05163035365398, - "learning_rate": 1.355093323751232e-05, - "loss": 0.7515, + "epoch": 0.8140355831685965, + "grad_norm": 3.0140303574821408, + "learning_rate": 1.70979651696003e-05, + "loss": 0.8317, "step": 5765 }, { - "epoch": 1.209058502830782, - "grad_norm": 7.183114214983822, - "learning_rate": 1.3548816797357538e-05, - "loss": 0.8056, + "epoch": 0.8141767862185824, + "grad_norm": 3.0322701306926225, + "learning_rate": 1.709689125213679e-05, + "loss": 1.0097, "step": 5766 }, { - "epoch": 1.2092681903963096, - "grad_norm": 7.364000561030981, - "learning_rate": 1.3546700175319742e-05, - "loss": 0.778, + "epoch": 0.8143179892685682, + "grad_norm": 3.3557782998798014, + "learning_rate": 1.7095817169744596e-05, + "loss": 0.8828, "step": 5767 }, { - "epoch": 1.209477877961837, - "grad_norm": 7.656695137902211, - "learning_rate": 1.3544583371507406e-05, - "loss": 0.9765, + "epoch": 0.8144591923185541, + "grad_norm": 3.8332590631097845, + "learning_rate": 1.7094742922448674e-05, + "loss": 1.0529, "step": 5768 }, { - "epoch": 1.2096875655273642, - "grad_norm": 7.025927988790655, - "learning_rate": 1.354246638602903e-05, - "loss": 0.9057, + "epoch": 0.81460039536854, + "grad_norm": 3.921753372221941, + "learning_rate": 1.7093668510273987e-05, + "loss": 1.3827, "step": 5769 }, { - "epoch": 1.2098972530928915, - "grad_norm": 6.538520109419792, - "learning_rate": 1.3540349218993105e-05, - "loss": 0.8847, + "epoch": 0.8147415984185259, + "grad_norm": 3.654490672878271, + "learning_rate": 1.7092593933245513e-05, + "loss": 1.1379, "step": 5770 }, { - "epoch": 1.2101069406584188, - "grad_norm": 9.108695219956731, - "learning_rate": 1.3538231870508144e-05, - "loss": 0.9617, + "epoch": 0.8148828014685118, + "grad_norm": 3.2634412631470946, + "learning_rate": 1.7091519191388212e-05, + "loss": 0.8773, "step": 5771 }, { - "epoch": 1.2103166282239464, - "grad_norm": 7.751211697026719, - "learning_rate": 1.353611434068266e-05, - "loss": 0.7606, + "epoch": 0.8150240045184975, + "grad_norm": 3.2965078674422728, + "learning_rate": 1.709044428472707e-05, + "loss": 0.9704, "step": 5772 }, { - "epoch": 1.2105263157894737, - "grad_norm": 7.588075721721866, - "learning_rate": 1.3533996629625186e-05, - "loss": 0.8311, + "epoch": 0.8151652075684834, + "grad_norm": 3.2974919908035347, + "learning_rate": 1.7089369213287068e-05, + "loss": 1.0449, "step": 5773 }, { - "epoch": 1.210736003355001, - "grad_norm": 7.6830240271379, - "learning_rate": 1.3531878737444251e-05, - "loss": 0.729, + "epoch": 0.8153064106184693, + "grad_norm": 3.4596732877531395, + "learning_rate": 1.7088293977093187e-05, + "loss": 0.9916, "step": 5774 }, { - "epoch": 1.2109456909205285, - "grad_norm": 6.040714517724161, - "learning_rate": 1.3529760664248409e-05, - "loss": 0.6059, + "epoch": 0.8154476136684552, + "grad_norm": 3.158839372751099, + "learning_rate": 1.7087218576170412e-05, + "loss": 0.854, "step": 5775 }, { - "epoch": 1.2111553784860558, - "grad_norm": 6.861154118087046, - "learning_rate": 1.3527642410146208e-05, - "loss": 0.8101, + "epoch": 0.8155888167184411, + "grad_norm": 3.2415502949942274, + "learning_rate": 1.7086143010543737e-05, + "loss": 0.9604, "step": 5776 }, { - "epoch": 1.2113650660515831, - "grad_norm": 6.969826662307672, - "learning_rate": 1.3525523975246218e-05, - "loss": 0.8381, + "epoch": 0.815730019768427, + "grad_norm": 3.604498813630204, + "learning_rate": 1.7085067280238164e-05, + "loss": 1.2164, "step": 5777 }, { - "epoch": 1.2115747536171104, - "grad_norm": 6.707459184089527, - "learning_rate": 1.3523405359657007e-05, - "loss": 0.7922, + "epoch": 0.8158712228184128, + "grad_norm": 3.03545057478487, + "learning_rate": 1.7083991385278686e-05, + "loss": 0.8376, "step": 5778 }, { - "epoch": 1.211784441182638, - "grad_norm": 7.115086227233861, - "learning_rate": 1.3521286563487164e-05, - "loss": 0.8251, + "epoch": 0.8160124258683987, + "grad_norm": 3.4099815374798936, + "learning_rate": 1.7082915325690304e-05, + "loss": 0.9554, "step": 5779 }, { - "epoch": 1.2119941287481653, - "grad_norm": 10.161048983402722, - "learning_rate": 1.3519167586845272e-05, - "loss": 1.1279, + "epoch": 0.8161536289183846, + "grad_norm": 3.8781491276250897, + "learning_rate": 1.7081839101498033e-05, + "loss": 1.1385, "step": 5780 }, { - "epoch": 1.2122038163136926, - "grad_norm": 8.65768740740381, - "learning_rate": 1.3517048429839941e-05, - "loss": 0.8399, + "epoch": 0.8162948319683705, + "grad_norm": 2.8191883704677174, + "learning_rate": 1.7080762712726878e-05, + "loss": 0.7949, "step": 5781 }, { - "epoch": 1.2124135038792199, - "grad_norm": 6.7985642857851305, - "learning_rate": 1.3514929092579779e-05, - "loss": 0.8635, + "epoch": 0.8164360350183564, + "grad_norm": 3.585568733818966, + "learning_rate": 1.7079686159401853e-05, + "loss": 1.0857, "step": 5782 }, { - "epoch": 1.2126231914447474, - "grad_norm": 9.130974509997364, - "learning_rate": 1.35128095751734e-05, - "loss": 1.0259, + "epoch": 0.8165772380683423, + "grad_norm": 4.859614661772849, + "learning_rate": 1.7078609441547983e-05, + "loss": 0.7672, "step": 5783 }, { - "epoch": 1.2128328790102747, - "grad_norm": 6.8095952310766785, - "learning_rate": 1.351068987772944e-05, - "loss": 0.8597, + "epoch": 0.8167184411183281, + "grad_norm": 3.5293849637013635, + "learning_rate": 1.7077532559190282e-05, + "loss": 0.9033, "step": 5784 }, { - "epoch": 1.213042566575802, - "grad_norm": 6.289008042887556, - "learning_rate": 1.3508570000356536e-05, - "loss": 0.8336, + "epoch": 0.816859644168314, + "grad_norm": 3.2504820137981034, + "learning_rate": 1.7076455512353782e-05, + "loss": 0.9333, "step": 5785 }, { - "epoch": 1.2132522541413295, - "grad_norm": 7.488260055714972, - "learning_rate": 1.3506449943163334e-05, - "loss": 0.8446, + "epoch": 0.8170008472182999, + "grad_norm": 4.93326422032294, + "learning_rate": 1.7075378301063518e-05, + "loss": 1.2205, "step": 5786 }, { - "epoch": 1.2134619417068568, - "grad_norm": 7.404368186402006, - "learning_rate": 1.3504329706258487e-05, - "loss": 0.8246, + "epoch": 0.8171420502682858, + "grad_norm": 4.520854437932972, + "learning_rate": 1.707430092534451e-05, + "loss": 1.3726, "step": 5787 }, { - "epoch": 1.2136716292723841, - "grad_norm": 5.540675882143761, - "learning_rate": 1.350220928975067e-05, - "loss": 0.6501, + "epoch": 0.8172832533182717, + "grad_norm": 3.3688415365303674, + "learning_rate": 1.7073223385221806e-05, + "loss": 0.9363, "step": 5788 }, { - "epoch": 1.2138813168379115, - "grad_norm": 7.7800573776504, - "learning_rate": 1.3500088693748548e-05, - "loss": 0.9843, + "epoch": 0.8174244563682576, + "grad_norm": 2.93170211923061, + "learning_rate": 1.707214568072045e-05, + "loss": 0.8056, "step": 5789 }, { - "epoch": 1.2140910044034388, - "grad_norm": 7.932426653927388, - "learning_rate": 1.3497967918360814e-05, - "loss": 1.0091, + "epoch": 0.8175656594182434, + "grad_norm": 2.7619718391588988, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8103, "step": 5790 }, { - "epoch": 1.2143006919689663, - "grad_norm": 5.506493200739908, - "learning_rate": 1.3495846963696155e-05, - "loss": 0.5913, + "epoch": 0.8177068624682293, + "grad_norm": 3.422578269565292, + "learning_rate": 1.7069989778681943e-05, + "loss": 0.8877, "step": 5791 }, { - "epoch": 1.2145103795344936, - "grad_norm": 6.477688603750444, - "learning_rate": 1.3493725829863275e-05, - "loss": 0.8906, + "epoch": 0.8178480655182152, + "grad_norm": 4.440784935628404, + "learning_rate": 1.70689115811949e-05, + "loss": 1.1413, "step": 5792 }, { - "epoch": 1.214720067100021, - "grad_norm": 6.654095785939124, - "learning_rate": 1.3491604516970888e-05, - "loss": 0.9041, + "epoch": 0.8179892685682011, + "grad_norm": 3.451292086862934, + "learning_rate": 1.7067833219429407e-05, + "loss": 0.9585, "step": 5793 }, { - "epoch": 1.2149297546655484, - "grad_norm": 7.760193316581298, - "learning_rate": 1.3489483025127717e-05, - "loss": 0.9124, + "epoch": 0.818130471618187, + "grad_norm": 3.2039878229268086, + "learning_rate": 1.706675469341052e-05, + "loss": 0.973, "step": 5794 }, { - "epoch": 1.2151394422310757, - "grad_norm": 6.618860455220378, - "learning_rate": 1.3487361354442482e-05, - "loss": 0.9493, + "epoch": 0.8182716746681729, + "grad_norm": 3.2068002755990443, + "learning_rate": 1.7065676003163308e-05, + "loss": 0.8865, "step": 5795 }, { - "epoch": 1.215349129796603, - "grad_norm": 8.493228930089309, - "learning_rate": 1.3485239505023934e-05, - "loss": 1.1713, + "epoch": 0.8184128777181587, + "grad_norm": 3.3479492532277404, + "learning_rate": 1.7064597148712834e-05, + "loss": 0.9934, "step": 5796 }, { - "epoch": 1.2155588173621303, - "grad_norm": 9.080628066415878, - "learning_rate": 1.3483117476980813e-05, - "loss": 1.1585, + "epoch": 0.8185540807681446, + "grad_norm": 3.3976171550718406, + "learning_rate": 1.7063518130084172e-05, + "loss": 0.9893, "step": 5797 }, { - "epoch": 1.2157685049276579, - "grad_norm": 7.9495879968555645, - "learning_rate": 1.3480995270421884e-05, - "loss": 0.9118, + "epoch": 0.8186952838181305, + "grad_norm": 3.48079273847605, + "learning_rate": 1.7062438947302405e-05, + "loss": 0.9776, "step": 5798 }, { - "epoch": 1.2159781924931852, - "grad_norm": 6.435364138855383, - "learning_rate": 1.3478872885455907e-05, - "loss": 0.8682, + "epoch": 0.8188364868681164, + "grad_norm": 3.2520881394877277, + "learning_rate": 1.7061359600392603e-05, + "loss": 0.9642, "step": 5799 }, { - "epoch": 1.2161878800587125, - "grad_norm": 8.46589264710626, - "learning_rate": 1.3476750322191663e-05, - "loss": 1.0381, + "epoch": 0.8189776899181023, + "grad_norm": 3.026586352850766, + "learning_rate": 1.7060280089379854e-05, + "loss": 0.7532, "step": 5800 }, { - "epoch": 1.2163975676242398, - "grad_norm": 8.427985165070345, - "learning_rate": 1.3474627580737934e-05, - "loss": 0.9015, + "epoch": 0.8191188929680882, + "grad_norm": 3.690306343426726, + "learning_rate": 1.7059200414289247e-05, + "loss": 0.9636, "step": 5801 }, { - "epoch": 1.2166072551897673, - "grad_norm": 7.1154281494012475, - "learning_rate": 1.3472504661203517e-05, - "loss": 0.8885, + "epoch": 0.819260096018074, + "grad_norm": 3.6688792244160298, + "learning_rate": 1.7058120575145868e-05, + "loss": 1.1461, "step": 5802 }, { - "epoch": 1.2168169427552946, - "grad_norm": 7.057007463857058, - "learning_rate": 1.3470381563697208e-05, - "loss": 0.836, + "epoch": 0.8194012990680599, + "grad_norm": 3.9671312357358355, + "learning_rate": 1.7057040571974816e-05, + "loss": 1.2288, "step": 5803 }, { - "epoch": 1.217026630320822, - "grad_norm": 6.589679220047355, - "learning_rate": 1.346825828832783e-05, - "loss": 0.6272, + "epoch": 0.8195425021180458, + "grad_norm": 3.527876885657798, + "learning_rate": 1.7055960404801187e-05, + "loss": 1.1018, "step": 5804 }, { - "epoch": 1.2172363178863495, - "grad_norm": 7.132657349968361, - "learning_rate": 1.3466134835204194e-05, - "loss": 0.9581, + "epoch": 0.8196837051680317, + "grad_norm": 3.7744990629820316, + "learning_rate": 1.7054880073650088e-05, + "loss": 1.0157, "step": 5805 }, { - "epoch": 1.2174460054518768, - "grad_norm": 6.67662517805874, - "learning_rate": 1.3464011204435141e-05, - "loss": 0.8307, + "epoch": 0.8198249082180175, + "grad_norm": 3.910949228800974, + "learning_rate": 1.7053799578546623e-05, + "loss": 1.204, "step": 5806 }, { - "epoch": 1.217655693017404, - "grad_norm": 7.334988777728441, - "learning_rate": 1.3461887396129503e-05, - "loss": 1.0547, + "epoch": 0.8199661112680033, + "grad_norm": 3.054887716788059, + "learning_rate": 1.7052718919515904e-05, + "loss": 0.8231, "step": 5807 }, { - "epoch": 1.2178653805829314, - "grad_norm": 5.914237707344567, - "learning_rate": 1.3459763410396133e-05, - "loss": 0.6177, + "epoch": 0.8201073143179892, + "grad_norm": 3.69340035666905, + "learning_rate": 1.705163809658304e-05, + "loss": 1.3826, "step": 5808 }, { - "epoch": 1.2180750681484587, - "grad_norm": 6.9676987454875965, - "learning_rate": 1.3457639247343885e-05, - "loss": 0.9422, + "epoch": 0.8202485173679751, + "grad_norm": 3.962241081862201, + "learning_rate": 1.705055710977315e-05, + "loss": 1.1039, "step": 5809 }, { - "epoch": 1.2182847557139862, - "grad_norm": 7.905012194279419, - "learning_rate": 1.3455514907081635e-05, - "loss": 0.9716, + "epoch": 0.820389720417961, + "grad_norm": 3.987713984058454, + "learning_rate": 1.704947595911136e-05, + "loss": 1.002, "step": 5810 }, { - "epoch": 1.2184944432795135, - "grad_norm": 6.313732395166456, - "learning_rate": 1.3453390389718246e-05, - "loss": 0.8202, + "epoch": 0.8205309234679469, + "grad_norm": 3.453207982887673, + "learning_rate": 1.704839464462279e-05, + "loss": 1.1135, "step": 5811 }, { - "epoch": 1.2187041308450408, - "grad_norm": 7.945915293192182, - "learning_rate": 1.3451265695362615e-05, - "loss": 0.7789, + "epoch": 0.8206721265179328, + "grad_norm": 4.218345876616529, + "learning_rate": 1.704731316633258e-05, + "loss": 1.2511, "step": 5812 }, { - "epoch": 1.2189138184105683, - "grad_norm": 7.380412735034448, - "learning_rate": 1.3449140824123626e-05, - "loss": 0.7683, + "epoch": 0.8208133295679186, + "grad_norm": 3.2929394998797465, + "learning_rate": 1.704623152426585e-05, + "loss": 0.819, "step": 5813 }, { - "epoch": 1.2191235059760956, - "grad_norm": 7.178414898355785, - "learning_rate": 1.344701577611019e-05, - "loss": 0.9125, + "epoch": 0.8209545326179045, + "grad_norm": 3.344913537556092, + "learning_rate": 1.7045149718447743e-05, + "loss": 1.1679, "step": 5814 }, { - "epoch": 1.219333193541623, - "grad_norm": 7.401430215930856, - "learning_rate": 1.3444890551431215e-05, - "loss": 0.9301, + "epoch": 0.8210957356678904, + "grad_norm": 3.7168081927847347, + "learning_rate": 1.7044067748903402e-05, + "loss": 1.1642, "step": 5815 }, { - "epoch": 1.2195428811071503, - "grad_norm": 8.920824643369071, - "learning_rate": 1.3442765150195627e-05, - "loss": 1.0505, + "epoch": 0.8212369387178763, + "grad_norm": 3.703046675520849, + "learning_rate": 1.7042985615657964e-05, + "loss": 0.9206, "step": 5816 }, { - "epoch": 1.2197525686726778, - "grad_norm": 6.611875757190905, - "learning_rate": 1.3440639572512355e-05, - "loss": 0.8275, + "epoch": 0.8213781417678622, + "grad_norm": 3.66714145407268, + "learning_rate": 1.7041903318736586e-05, + "loss": 1.002, "step": 5817 }, { - "epoch": 1.219962256238205, - "grad_norm": 6.877354754794797, - "learning_rate": 1.3438513818490333e-05, - "loss": 0.8539, + "epoch": 0.821519344817848, + "grad_norm": 4.202229662247463, + "learning_rate": 1.7040820858164413e-05, + "loss": 1.1988, "step": 5818 }, { - "epoch": 1.2201719438037324, - "grad_norm": 7.781236188227104, - "learning_rate": 1.343638788823852e-05, - "loss": 0.9723, + "epoch": 0.8216605478678339, + "grad_norm": 3.707431859532501, + "learning_rate": 1.7039738233966607e-05, + "loss": 0.9763, "step": 5819 }, { - "epoch": 1.2203816313692597, - "grad_norm": 8.605524634137819, - "learning_rate": 1.3434261781865865e-05, - "loss": 0.9765, + "epoch": 0.8218017509178198, + "grad_norm": 3.4229616456462155, + "learning_rate": 1.703865544616832e-05, + "loss": 0.9472, "step": 5820 }, { - "epoch": 1.2205913189347872, - "grad_norm": 7.901321182569662, - "learning_rate": 1.3432135499481333e-05, - "loss": 1.0103, + "epoch": 0.8219429539678057, + "grad_norm": 3.5513247334096425, + "learning_rate": 1.7037572494794724e-05, + "loss": 0.9603, "step": 5821 }, { - "epoch": 1.2208010065003145, - "grad_norm": 7.516978977100672, - "learning_rate": 1.343000904119391e-05, - "loss": 1.1042, + "epoch": 0.8220841570177916, + "grad_norm": 3.167205595848142, + "learning_rate": 1.7036489379870982e-05, + "loss": 0.8196, "step": 5822 }, { - "epoch": 1.2210106940658418, - "grad_norm": 7.49216359570243, - "learning_rate": 1.3427882407112574e-05, - "loss": 1.063, + "epoch": 0.8222253600677775, + "grad_norm": 3.156262250205531, + "learning_rate": 1.7035406101422264e-05, + "loss": 0.8184, "step": 5823 }, { - "epoch": 1.2212203816313694, - "grad_norm": 8.128059586001783, - "learning_rate": 1.3425755597346313e-05, - "loss": 0.9747, + "epoch": 0.8223665631177633, + "grad_norm": 3.7699004774444997, + "learning_rate": 1.7034322659473748e-05, + "loss": 1.076, "step": 5824 }, { - "epoch": 1.2214300691968967, - "grad_norm": 6.93213545359693, - "learning_rate": 1.3423628612004143e-05, - "loss": 0.6665, + "epoch": 0.8225077661677492, + "grad_norm": 3.204048930980347, + "learning_rate": 1.703323905405061e-05, + "loss": 0.916, "step": 5825 }, { - "epoch": 1.221639756762424, - "grad_norm": 7.433121774727369, - "learning_rate": 1.3421501451195065e-05, - "loss": 0.9259, + "epoch": 0.8226489692177351, + "grad_norm": 3.7790144191123587, + "learning_rate": 1.7032155285178036e-05, + "loss": 1.0115, "step": 5826 }, { - "epoch": 1.2218494443279513, - "grad_norm": 6.5885814651724655, - "learning_rate": 1.3419374115028105e-05, - "loss": 0.7764, + "epoch": 0.822790172267721, + "grad_norm": 3.5875153584590653, + "learning_rate": 1.703107135288121e-05, + "loss": 0.9727, "step": 5827 }, { - "epoch": 1.2220591318934788, - "grad_norm": 7.399287103784915, - "learning_rate": 1.341724660361229e-05, - "loss": 0.8835, + "epoch": 0.8229313753177069, + "grad_norm": 3.6237002464377857, + "learning_rate": 1.702998725718532e-05, + "loss": 1.2298, "step": 5828 }, { - "epoch": 1.2222688194590061, - "grad_norm": 6.947010390574959, - "learning_rate": 1.341511891705666e-05, - "loss": 0.872, + "epoch": 0.8230725783676928, + "grad_norm": 3.466637758207509, + "learning_rate": 1.7028902998115566e-05, + "loss": 1.0482, "step": 5829 }, { - "epoch": 1.2224785070245334, - "grad_norm": 8.0832772624248, - "learning_rate": 1.3412991055470262e-05, - "loss": 0.9682, + "epoch": 0.8232137814176786, + "grad_norm": 3.9559017144629824, + "learning_rate": 1.7027818575697136e-05, + "loss": 1.1248, "step": 5830 }, { - "epoch": 1.2226881945900607, - "grad_norm": 8.126337344578992, - "learning_rate": 1.3410863018962155e-05, - "loss": 0.8755, + "epoch": 0.8233549844676645, + "grad_norm": 4.552308853152018, + "learning_rate": 1.7026733989955243e-05, + "loss": 1.309, "step": 5831 }, { - "epoch": 1.2228978821555883, - "grad_norm": 6.260063604829489, - "learning_rate": 1.3408734807641398e-05, - "loss": 1.0027, + "epoch": 0.8234961875176504, + "grad_norm": 3.8170900321759267, + "learning_rate": 1.7025649240915085e-05, + "loss": 0.9313, "step": 5832 }, { - "epoch": 1.2231075697211156, - "grad_norm": 6.560116287988558, - "learning_rate": 1.3406606421617075e-05, - "loss": 0.5288, + "epoch": 0.8236373905676363, + "grad_norm": 3.2442376484374167, + "learning_rate": 1.7024564328601873e-05, + "loss": 0.9453, "step": 5833 }, { - "epoch": 1.2233172572866429, - "grad_norm": 5.534130860741491, - "learning_rate": 1.340447786099826e-05, - "loss": 0.6164, + "epoch": 0.8237785936176222, + "grad_norm": 3.42622635224423, + "learning_rate": 1.7023479253040817e-05, + "loss": 1.1723, "step": 5834 }, { - "epoch": 1.2235269448521702, - "grad_norm": 7.241061585545885, - "learning_rate": 1.3402349125894052e-05, - "loss": 0.9813, + "epoch": 0.8239197966676081, + "grad_norm": 3.507803866355308, + "learning_rate": 1.702239401425714e-05, + "loss": 0.9912, "step": 5835 }, { - "epoch": 1.2237366324176977, - "grad_norm": 7.313403086803212, - "learning_rate": 1.3400220216413549e-05, - "loss": 0.7714, + "epoch": 0.824060999717594, + "grad_norm": 3.098321862390577, + "learning_rate": 1.7021308612276056e-05, + "loss": 0.8773, "step": 5836 }, { - "epoch": 1.223946319983225, - "grad_norm": 8.064592007794385, - "learning_rate": 1.3398091132665863e-05, - "loss": 0.9183, + "epoch": 0.8242022027675798, + "grad_norm": 3.384633765533748, + "learning_rate": 1.7020223047122794e-05, + "loss": 0.9368, "step": 5837 }, { - "epoch": 1.2241560075487523, - "grad_norm": 7.0345402871027645, - "learning_rate": 1.3395961874760115e-05, - "loss": 0.6848, + "epoch": 0.8243434058175657, + "grad_norm": 3.0332608855619556, + "learning_rate": 1.7019137318822577e-05, + "loss": 0.8364, "step": 5838 }, { - "epoch": 1.2243656951142796, - "grad_norm": 7.094041493020795, - "learning_rate": 1.339383244280543e-05, - "loss": 0.8814, + "epoch": 0.8244846088675516, + "grad_norm": 3.8343875677952575, + "learning_rate": 1.7018051427400643e-05, + "loss": 1.0591, "step": 5839 }, { - "epoch": 1.2245753826798071, - "grad_norm": 7.638651665661071, - "learning_rate": 1.3391702836910946e-05, - "loss": 0.9442, + "epoch": 0.8246258119175374, + "grad_norm": 3.450928433850531, + "learning_rate": 1.7016965372882227e-05, + "loss": 1.0085, "step": 5840 }, { - "epoch": 1.2247850702453345, - "grad_norm": 6.784243208199457, - "learning_rate": 1.3389573057185807e-05, - "loss": 0.9653, + "epoch": 0.8247670149675232, + "grad_norm": 3.5786389932154457, + "learning_rate": 1.701587915529256e-05, + "loss": 0.9758, "step": 5841 }, { - "epoch": 1.2249947578108618, - "grad_norm": 6.499346902308113, - "learning_rate": 1.338744310373917e-05, - "loss": 0.7606, + "epoch": 0.8249082180175091, + "grad_norm": 3.49987500228449, + "learning_rate": 1.7014792774656896e-05, + "loss": 1.0739, "step": 5842 }, { - "epoch": 1.2252044453763893, - "grad_norm": 5.899680961661284, - "learning_rate": 1.33853129766802e-05, - "loss": 0.5521, + "epoch": 0.825049421067495, + "grad_norm": 2.807231638781993, + "learning_rate": 1.7013706231000477e-05, + "loss": 0.7708, "step": 5843 }, { - "epoch": 1.2254141329419166, - "grad_norm": 7.695289028320727, - "learning_rate": 1.3383182676118067e-05, - "loss": 1.0382, + "epoch": 0.8251906241174809, + "grad_norm": 3.6248229437827515, + "learning_rate": 1.7012619524348552e-05, + "loss": 0.8654, "step": 5844 }, { - "epoch": 1.225623820507444, - "grad_norm": 5.960646240841862, - "learning_rate": 1.3381052202161957e-05, - "loss": 0.6716, + "epoch": 0.8253318271674668, + "grad_norm": 3.4480576869823722, + "learning_rate": 1.7011532654726377e-05, + "loss": 1.0496, "step": 5845 }, { - "epoch": 1.2258335080729712, - "grad_norm": 6.363195936557256, - "learning_rate": 1.3378921554921056e-05, - "loss": 0.7683, + "epoch": 0.8254730302174527, + "grad_norm": 3.5490542754129435, + "learning_rate": 1.7010445622159214e-05, + "loss": 1.0609, "step": 5846 }, { - "epoch": 1.2260431956384987, - "grad_norm": 6.415364351995851, - "learning_rate": 1.3376790734504564e-05, - "loss": 0.8462, + "epoch": 0.8256142332674385, + "grad_norm": 3.518291347350679, + "learning_rate": 1.700935842667232e-05, + "loss": 0.9585, "step": 5847 }, { - "epoch": 1.226252883204026, - "grad_norm": 7.603025485521094, - "learning_rate": 1.3374659741021692e-05, - "loss": 0.9919, + "epoch": 0.8257554363174244, + "grad_norm": 4.357876682400807, + "learning_rate": 1.7008271068290966e-05, + "loss": 0.8722, "step": 5848 }, { - "epoch": 1.2264625707695533, - "grad_norm": 6.292475174734174, - "learning_rate": 1.3372528574581653e-05, - "loss": 0.7473, + "epoch": 0.8258966393674103, + "grad_norm": 3.66777770256138, + "learning_rate": 1.700718354704042e-05, + "loss": 0.9161, "step": 5849 }, { - "epoch": 1.2266722583350806, - "grad_norm": 7.320104911572224, - "learning_rate": 1.3370397235293676e-05, - "loss": 0.8022, + "epoch": 0.8260378424173962, + "grad_norm": 3.623639935739178, + "learning_rate": 1.7006095862945948e-05, + "loss": 1.0593, "step": 5850 }, { - "epoch": 1.2268819459006082, - "grad_norm": 7.197282582784523, - "learning_rate": 1.3368265723266995e-05, - "loss": 0.9807, + "epoch": 0.8261790454673821, + "grad_norm": 3.6210608514631053, + "learning_rate": 1.700500801603284e-05, + "loss": 1.0051, "step": 5851 }, { - "epoch": 1.2270916334661355, - "grad_norm": 7.576549698116118, - "learning_rate": 1.3366134038610855e-05, - "loss": 0.9105, + "epoch": 0.826320248517368, + "grad_norm": 3.940838994880735, + "learning_rate": 1.700392000632637e-05, + "loss": 1.2193, "step": 5852 }, { - "epoch": 1.2273013210316628, - "grad_norm": 7.039019074210788, - "learning_rate": 1.3364002181434503e-05, - "loss": 0.9779, + "epoch": 0.8264614515673538, + "grad_norm": 3.813050664531881, + "learning_rate": 1.700283183385182e-05, + "loss": 1.0671, "step": 5853 }, { - "epoch": 1.22751100859719, - "grad_norm": 6.181529393634165, - "learning_rate": 1.3361870151847211e-05, - "loss": 0.657, + "epoch": 0.8266026546173397, + "grad_norm": 4.157730596431297, + "learning_rate": 1.7001743498634487e-05, + "loss": 1.0796, "step": 5854 }, { - "epoch": 1.2277206961627176, - "grad_norm": 6.8707855577299, - "learning_rate": 1.335973794995824e-05, - "loss": 0.7589, + "epoch": 0.8267438576673256, + "grad_norm": 3.7966246413260087, + "learning_rate": 1.7000655000699654e-05, + "loss": 1.2088, "step": 5855 }, { - "epoch": 1.227930383728245, - "grad_norm": 6.804275218537813, - "learning_rate": 1.3357605575876872e-05, - "loss": 0.821, + "epoch": 0.8268850607173115, + "grad_norm": 3.279659739838586, + "learning_rate": 1.6999566340072628e-05, + "loss": 0.8907, "step": 5856 }, { - "epoch": 1.2281400712937722, - "grad_norm": 8.743401802296475, - "learning_rate": 1.3355473029712393e-05, - "loss": 1.0944, + "epoch": 0.8270262637672974, + "grad_norm": 3.114988373962563, + "learning_rate": 1.69984775167787e-05, + "loss": 0.7916, "step": 5857 }, { - "epoch": 1.2283497588592995, - "grad_norm": 8.962245158971008, - "learning_rate": 1.3353340311574101e-05, - "loss": 0.8815, + "epoch": 0.8271674668172833, + "grad_norm": 3.3690436191317215, + "learning_rate": 1.6997388530843176e-05, + "loss": 1.0451, "step": 5858 }, { - "epoch": 1.228559446424827, - "grad_norm": 8.356869150861097, - "learning_rate": 1.3351207421571304e-05, - "loss": 0.9533, + "epoch": 0.8273086698672691, + "grad_norm": 3.106926854888022, + "learning_rate": 1.699629938229137e-05, + "loss": 0.8966, "step": 5859 }, { - "epoch": 1.2287691339903544, - "grad_norm": 7.54195111888495, - "learning_rate": 1.3349074359813317e-05, - "loss": 0.7816, + "epoch": 0.827449872917255, + "grad_norm": 3.5764869413648124, + "learning_rate": 1.6995210071148582e-05, + "loss": 1.0345, "step": 5860 }, { - "epoch": 1.2289788215558817, - "grad_norm": 6.902442437315407, - "learning_rate": 1.3346941126409454e-05, - "loss": 0.7994, + "epoch": 0.8275910759672409, + "grad_norm": 3.188071626576011, + "learning_rate": 1.6994120597440136e-05, + "loss": 1.0039, "step": 5861 }, { - "epoch": 1.2291885091214092, - "grad_norm": 9.026227841637313, - "learning_rate": 1.3344807721469058e-05, - "loss": 0.8414, + "epoch": 0.8277322790172268, + "grad_norm": 3.038646076122861, + "learning_rate": 1.6993030961191345e-05, + "loss": 0.9434, "step": 5862 }, { - "epoch": 1.2293981966869365, - "grad_norm": 7.49394277908484, - "learning_rate": 1.3342674145101464e-05, - "loss": 1.0682, + "epoch": 0.8278734820672127, + "grad_norm": 3.4391253924124996, + "learning_rate": 1.6991941162427537e-05, + "loss": 1.0459, "step": 5863 }, { - "epoch": 1.2296078842524638, - "grad_norm": 8.065999280809644, - "learning_rate": 1.3340540397416022e-05, - "loss": 1.1124, + "epoch": 0.8280146851171986, + "grad_norm": 3.8720366023995285, + "learning_rate": 1.699085120117403e-05, + "loss": 1.0294, "step": 5864 }, { - "epoch": 1.2298175718179911, - "grad_norm": 6.282781222155777, - "learning_rate": 1.333840647852209e-05, - "loss": 0.6416, + "epoch": 0.8281558881671844, + "grad_norm": 3.4408658567224726, + "learning_rate": 1.6989761077456167e-05, + "loss": 0.9036, "step": 5865 }, { - "epoch": 1.2300272593835186, - "grad_norm": 5.932990583435032, - "learning_rate": 1.3336272388529037e-05, - "loss": 0.6896, + "epoch": 0.8282970912171703, + "grad_norm": 3.2247844920066435, + "learning_rate": 1.6988670791299274e-05, + "loss": 0.8994, "step": 5866 }, { - "epoch": 1.230236946949046, - "grad_norm": 7.978718459026874, - "learning_rate": 1.3334138127546237e-05, - "loss": 0.995, + "epoch": 0.8284382942671562, + "grad_norm": 3.52698901073545, + "learning_rate": 1.698758034272869e-05, + "loss": 0.9014, "step": 5867 }, { - "epoch": 1.2304466345145733, - "grad_norm": 7.252933352731293, - "learning_rate": 1.3332003695683076e-05, - "loss": 0.9244, + "epoch": 0.8285794973171421, + "grad_norm": 3.706696570315058, + "learning_rate": 1.6986489731769755e-05, + "loss": 1.0195, "step": 5868 }, { - "epoch": 1.2306563220801006, - "grad_norm": 7.184662412193372, - "learning_rate": 1.3329869093048945e-05, - "loss": 0.8365, + "epoch": 0.828720700367128, + "grad_norm": 3.613141662415506, + "learning_rate": 1.698539895844782e-05, + "loss": 1.1392, "step": 5869 }, { - "epoch": 1.230866009645628, - "grad_norm": 7.230314584662014, - "learning_rate": 1.332773431975325e-05, - "loss": 0.9782, + "epoch": 0.8288619034171139, + "grad_norm": 3.197691188627702, + "learning_rate": 1.6984308022788227e-05, + "loss": 0.878, "step": 5870 }, { - "epoch": 1.2310756972111554, - "grad_norm": 6.564339943227598, - "learning_rate": 1.3325599375905398e-05, - "loss": 0.7735, + "epoch": 0.8290031064670997, + "grad_norm": 4.117332052470007, + "learning_rate": 1.698321692481633e-05, + "loss": 1.2604, "step": 5871 }, { - "epoch": 1.2312853847766827, - "grad_norm": 7.081147035610259, - "learning_rate": 1.3323464261614812e-05, - "loss": 0.9147, + "epoch": 0.8291443095170856, + "grad_norm": 4.034601463089735, + "learning_rate": 1.698212566455749e-05, + "loss": 1.258, "step": 5872 }, { - "epoch": 1.2314950723422102, - "grad_norm": 6.551852590609151, - "learning_rate": 1.3321328976990914e-05, - "loss": 0.9308, + "epoch": 0.8292855125670715, + "grad_norm": 4.239921057748761, + "learning_rate": 1.698103424203706e-05, + "loss": 1.1634, "step": 5873 }, { - "epoch": 1.2317047599077375, - "grad_norm": 7.67515793737451, - "learning_rate": 1.3319193522143147e-05, - "loss": 0.9228, + "epoch": 0.8294267156170573, + "grad_norm": 4.465061700604938, + "learning_rate": 1.6979942657280414e-05, + "loss": 0.8648, "step": 5874 }, { - "epoch": 1.2319144474732648, - "grad_norm": 10.019820072331635, - "learning_rate": 1.3317057897180958e-05, - "loss": 1.0824, + "epoch": 0.8295679186670432, + "grad_norm": 3.3805132054400167, + "learning_rate": 1.6978850910312916e-05, + "loss": 0.8608, "step": 5875 }, { - "epoch": 1.2321241350387921, - "grad_norm": 6.691562703091251, - "learning_rate": 1.3314922102213794e-05, - "loss": 0.8399, + "epoch": 0.829709121717029, + "grad_norm": 3.541360255753306, + "learning_rate": 1.6977759001159934e-05, + "loss": 1.1114, "step": 5876 }, { - "epoch": 1.2323338226043195, - "grad_norm": 5.566115250426404, - "learning_rate": 1.3312786137351128e-05, - "loss": 0.7229, + "epoch": 0.8298503247670149, + "grad_norm": 3.275462772088245, + "learning_rate": 1.6976666929846847e-05, + "loss": 1.1193, "step": 5877 }, { - "epoch": 1.232543510169847, - "grad_norm": 8.668973974034632, - "learning_rate": 1.3310650002702424e-05, - "loss": 1.0549, + "epoch": 0.8299915278170008, + "grad_norm": 3.291296963393997, + "learning_rate": 1.6975574696399033e-05, + "loss": 0.8899, "step": 5878 }, { - "epoch": 1.2327531977353743, - "grad_norm": 8.331689781545807, - "learning_rate": 1.3308513698377166e-05, - "loss": 1.0334, + "epoch": 0.8301327308669867, + "grad_norm": 3.302637481732676, + "learning_rate": 1.697448230084188e-05, + "loss": 1.1653, "step": 5879 }, { - "epoch": 1.2329628853009016, - "grad_norm": 5.538892876202873, - "learning_rate": 1.3306377224484841e-05, - "loss": 0.686, + "epoch": 0.8302739339169726, + "grad_norm": 3.191813358649746, + "learning_rate": 1.6973389743200764e-05, + "loss": 1.0004, "step": 5880 }, { - "epoch": 1.2331725728664291, - "grad_norm": 8.221833789811694, - "learning_rate": 1.3304240581134953e-05, - "loss": 0.8611, + "epoch": 0.8304151369669585, + "grad_norm": 4.228033401666042, + "learning_rate": 1.6972297023501087e-05, + "loss": 1.1992, "step": 5881 }, { - "epoch": 1.2333822604319564, - "grad_norm": 10.305839729022868, - "learning_rate": 1.3302103768436998e-05, - "loss": 1.1235, + "epoch": 0.8305563400169443, + "grad_norm": 4.054907681321021, + "learning_rate": 1.6971204141768235e-05, + "loss": 1.0915, "step": 5882 }, { - "epoch": 1.2335919479974837, - "grad_norm": 8.713198883172174, - "learning_rate": 1.3299966786500502e-05, - "loss": 0.8551, + "epoch": 0.8306975430669302, + "grad_norm": 3.985149596037757, + "learning_rate": 1.697011109802761e-05, + "loss": 0.9379, "step": 5883 }, { - "epoch": 1.233801635563011, - "grad_norm": 7.012397694723294, - "learning_rate": 1.3297829635434982e-05, - "loss": 1.0189, + "epoch": 0.8308387461169161, + "grad_norm": 3.06295845931574, + "learning_rate": 1.696901789230461e-05, + "loss": 0.9655, "step": 5884 }, { - "epoch": 1.2340113231285386, - "grad_norm": 7.369799091393783, - "learning_rate": 1.3295692315349977e-05, - "loss": 0.9282, + "epoch": 0.830979949166902, + "grad_norm": 3.478679343141201, + "learning_rate": 1.6967924524624648e-05, + "loss": 0.8378, "step": 5885 }, { - "epoch": 1.2342210106940659, - "grad_norm": 9.457098879881555, - "learning_rate": 1.3293554826355021e-05, - "loss": 0.7594, + "epoch": 0.8311211522168879, + "grad_norm": 3.0790287115607207, + "learning_rate": 1.6966830995013134e-05, + "loss": 0.9457, "step": 5886 }, { - "epoch": 1.2344306982595932, - "grad_norm": 6.554775324120577, - "learning_rate": 1.3291417168559671e-05, - "loss": 0.7042, + "epoch": 0.8312623552668738, + "grad_norm": 3.453317369140833, + "learning_rate": 1.6965737303495466e-05, + "loss": 0.8738, "step": 5887 }, { - "epoch": 1.2346403858251205, - "grad_norm": 8.81949058365872, - "learning_rate": 1.3289279342073477e-05, - "loss": 0.8508, + "epoch": 0.8314035583168596, + "grad_norm": 3.2240776711703405, + "learning_rate": 1.6964643450097077e-05, + "loss": 0.7996, "step": 5888 }, { - "epoch": 1.234850073390648, - "grad_norm": 6.594094679849617, - "learning_rate": 1.3287141347006018e-05, - "loss": 0.8406, + "epoch": 0.8315447613668455, + "grad_norm": 3.042950756827184, + "learning_rate": 1.6963549434843383e-05, + "loss": 1.0046, "step": 5889 }, { - "epoch": 1.2350597609561753, - "grad_norm": 8.230547223398219, - "learning_rate": 1.3285003183466861e-05, - "loss": 0.9463, + "epoch": 0.8316859644168314, + "grad_norm": 3.481526637861636, + "learning_rate": 1.6962455257759806e-05, + "loss": 1.054, "step": 5890 }, { - "epoch": 1.2352694485217026, - "grad_norm": 7.241667764598087, - "learning_rate": 1.3282864851565593e-05, - "loss": 0.6958, + "epoch": 0.8318271674668173, + "grad_norm": 3.138601269918471, + "learning_rate": 1.6961360918871774e-05, + "loss": 0.8595, "step": 5891 }, { - "epoch": 1.2354791360872301, - "grad_norm": 6.277505374648453, - "learning_rate": 1.3280726351411807e-05, - "loss": 0.6448, + "epoch": 0.8319683705168032, + "grad_norm": 4.251834595222421, + "learning_rate": 1.696026641820472e-05, + "loss": 1.2265, "step": 5892 }, { - "epoch": 1.2356888236527575, - "grad_norm": 7.868498131415277, - "learning_rate": 1.3278587683115109e-05, - "loss": 0.9432, + "epoch": 0.832109573566789, + "grad_norm": 3.3779271834790423, + "learning_rate": 1.6959171755784086e-05, + "loss": 0.9088, "step": 5893 }, { - "epoch": 1.2358985112182848, - "grad_norm": 7.717147087834003, - "learning_rate": 1.3276448846785102e-05, - "loss": 0.8075, + "epoch": 0.8322507766167749, + "grad_norm": 3.3553186015781775, + "learning_rate": 1.69580769316353e-05, + "loss": 0.9234, "step": 5894 }, { - "epoch": 1.236108198783812, - "grad_norm": 7.610037105363087, - "learning_rate": 1.327430984253141e-05, - "loss": 1.0663, + "epoch": 0.8323919796667608, + "grad_norm": 2.93984560133028, + "learning_rate": 1.6956981945783814e-05, + "loss": 0.8071, "step": 5895 }, { - "epoch": 1.2363178863493394, - "grad_norm": 6.854136103788508, - "learning_rate": 1.327217067046366e-05, - "loss": 0.8529, + "epoch": 0.8325331827167467, + "grad_norm": 3.358985809640167, + "learning_rate": 1.695588679825507e-05, + "loss": 0.9534, "step": 5896 }, { - "epoch": 1.236527573914867, - "grad_norm": 7.23677996899156, - "learning_rate": 1.3270031330691486e-05, - "loss": 0.8035, + "epoch": 0.8326743857667326, + "grad_norm": 2.8503286434034223, + "learning_rate": 1.6954791489074524e-05, + "loss": 0.8056, "step": 5897 }, { - "epoch": 1.2367372614803942, - "grad_norm": 7.319395921905429, - "learning_rate": 1.3267891823324538e-05, - "loss": 0.9607, + "epoch": 0.8328155888167185, + "grad_norm": 2.898589432277315, + "learning_rate": 1.6953696018267627e-05, + "loss": 0.7662, "step": 5898 }, { - "epoch": 1.2369469490459215, - "grad_norm": 8.266866387025116, - "learning_rate": 1.3265752148472465e-05, - "loss": 0.8838, + "epoch": 0.8329567918667043, + "grad_norm": 3.5211869038855683, + "learning_rate": 1.6952600385859836e-05, + "loss": 0.9385, "step": 5899 }, { - "epoch": 1.237156636611449, - "grad_norm": 6.529974215078343, - "learning_rate": 1.3263612306244929e-05, - "loss": 0.7246, + "epoch": 0.8330979949166902, + "grad_norm": 4.12796382694839, + "learning_rate": 1.6951504591876614e-05, + "loss": 0.9873, "step": 5900 }, { - "epoch": 1.2373663241769763, - "grad_norm": 7.331703540721676, - "learning_rate": 1.3261472296751603e-05, - "loss": 0.7812, + "epoch": 0.8332391979666761, + "grad_norm": 4.391782705432725, + "learning_rate": 1.695040863634343e-05, + "loss": 1.2203, "step": 5901 }, { - "epoch": 1.2375760117425036, - "grad_norm": 7.8433271606143204, - "learning_rate": 1.3259332120102165e-05, - "loss": 0.9865, + "epoch": 0.833380401016662, + "grad_norm": 3.556783279911456, + "learning_rate": 1.694931251928575e-05, + "loss": 1.0176, "step": 5902 }, { - "epoch": 1.237785699308031, - "grad_norm": 7.222709676453146, - "learning_rate": 1.3257191776406301e-05, - "loss": 0.8065, + "epoch": 0.8335216040666479, + "grad_norm": 3.567865983044278, + "learning_rate": 1.6948216240729046e-05, + "loss": 1.1966, "step": 5903 }, { - "epoch": 1.2379953868735585, - "grad_norm": 7.5281333583430685, - "learning_rate": 1.3255051265773713e-05, - "loss": 0.9838, + "epoch": 0.8336628071166338, + "grad_norm": 3.5847216362340735, + "learning_rate": 1.69471198006988e-05, + "loss": 1.0892, "step": 5904 }, { - "epoch": 1.2382050744390858, - "grad_norm": 7.797138188646394, - "learning_rate": 1.3252910588314096e-05, - "loss": 0.9126, + "epoch": 0.8338040101666196, + "grad_norm": 3.468717501091991, + "learning_rate": 1.694602319922049e-05, + "loss": 0.9586, "step": 5905 }, { - "epoch": 1.238414762004613, - "grad_norm": 8.284317230859948, - "learning_rate": 1.3250769744137176e-05, - "loss": 0.8831, + "epoch": 0.8339452132166055, + "grad_norm": 4.113685450513926, + "learning_rate": 1.6944926436319596e-05, + "loss": 1.0895, "step": 5906 }, { - "epoch": 1.2386244495701404, - "grad_norm": 7.942609078798129, - "learning_rate": 1.3248628733352664e-05, - "loss": 1.069, + "epoch": 0.8340864162665914, + "grad_norm": 3.350290982174312, + "learning_rate": 1.6943829512021616e-05, + "loss": 1.0032, "step": 5907 }, { - "epoch": 1.238834137135668, - "grad_norm": 8.081077799230988, - "learning_rate": 1.3246487556070298e-05, - "loss": 0.8513, + "epoch": 0.8342276193165772, + "grad_norm": 3.8931686842306403, + "learning_rate": 1.6942732426352032e-05, + "loss": 1.2496, "step": 5908 }, { - "epoch": 1.2390438247011952, - "grad_norm": 7.556997169509274, - "learning_rate": 1.324434621239981e-05, - "loss": 0.8617, + "epoch": 0.8343688223665631, + "grad_norm": 3.685521780626063, + "learning_rate": 1.694163517933635e-05, + "loss": 1.1995, "step": 5909 }, { - "epoch": 1.2392535122667225, - "grad_norm": 7.170677953907514, - "learning_rate": 1.3242204702450956e-05, - "loss": 0.9087, + "epoch": 0.834510025416549, + "grad_norm": 3.7451885170354275, + "learning_rate": 1.694053777100006e-05, + "loss": 1.1621, "step": 5910 }, { - "epoch": 1.23946319983225, - "grad_norm": 8.97477581899254, - "learning_rate": 1.3240063026333485e-05, - "loss": 1.0813, + "epoch": 0.8346512284665348, + "grad_norm": 3.062268436005849, + "learning_rate": 1.6939440201368675e-05, + "loss": 0.8588, "step": 5911 }, { - "epoch": 1.2396728873977774, - "grad_norm": 8.183028029277942, - "learning_rate": 1.3237921184157165e-05, - "loss": 1.0831, + "epoch": 0.8347924315165207, + "grad_norm": 3.439367838047728, + "learning_rate": 1.693834247046769e-05, + "loss": 0.8138, "step": 5912 }, { - "epoch": 1.2398825749633047, - "grad_norm": 7.7608822384758644, - "learning_rate": 1.3235779176031768e-05, - "loss": 0.9606, + "epoch": 0.8349336345665066, + "grad_norm": 2.754630935228184, + "learning_rate": 1.693724457832263e-05, + "loss": 0.9266, "step": 5913 }, { - "epoch": 1.240092262528832, - "grad_norm": 9.086904608769242, - "learning_rate": 1.3233637002067077e-05, - "loss": 0.9414, + "epoch": 0.8350748376164925, + "grad_norm": 3.135884338177701, + "learning_rate": 1.6936146524958994e-05, + "loss": 1.0845, "step": 5914 }, { - "epoch": 1.2403019500943593, - "grad_norm": 6.580209896617262, - "learning_rate": 1.3231494662372879e-05, - "loss": 0.8229, + "epoch": 0.8352160406664784, + "grad_norm": 5.577325310002807, + "learning_rate": 1.6935048310402312e-05, + "loss": 0.9746, "step": 5915 }, { - "epoch": 1.2405116376598868, - "grad_norm": 6.783376017936515, - "learning_rate": 1.3229352157058976e-05, - "loss": 0.8793, + "epoch": 0.8353572437164642, + "grad_norm": 3.476498658564067, + "learning_rate": 1.6933949934678104e-05, + "loss": 0.9365, "step": 5916 }, { - "epoch": 1.2407213252254141, - "grad_norm": 7.159058781754046, - "learning_rate": 1.3227209486235175e-05, - "loss": 0.7659, + "epoch": 0.8354984467664501, + "grad_norm": 3.6545234020400486, + "learning_rate": 1.6932851397811895e-05, + "loss": 1.0684, "step": 5917 }, { - "epoch": 1.2409310127909414, - "grad_norm": 7.184444666981426, - "learning_rate": 1.3225066650011292e-05, - "loss": 0.7275, + "epoch": 0.835639649816436, + "grad_norm": 3.3254833898298917, + "learning_rate": 1.693175269982921e-05, + "loss": 1.101, "step": 5918 }, { - "epoch": 1.241140700356469, - "grad_norm": 5.427450237819178, - "learning_rate": 1.3222923648497145e-05, - "loss": 0.725, + "epoch": 0.8357808528664219, + "grad_norm": 3.609196813446882, + "learning_rate": 1.6930653840755585e-05, + "loss": 1.2006, "step": 5919 }, { - "epoch": 1.2413503879219963, - "grad_norm": 8.222908120250661, - "learning_rate": 1.3220780481802575e-05, - "loss": 0.8327, + "epoch": 0.8359220559164078, + "grad_norm": 3.5192964447184707, + "learning_rate": 1.692955482061656e-05, + "loss": 1.1923, "step": 5920 }, { - "epoch": 1.2415600754875236, - "grad_norm": 7.200754136202807, - "learning_rate": 1.321863715003742e-05, - "loss": 0.8968, + "epoch": 0.8360632589663937, + "grad_norm": 3.2508983372315354, + "learning_rate": 1.6928455639437677e-05, + "loss": 0.8796, "step": 5921 }, { - "epoch": 1.2417697630530509, - "grad_norm": 6.4969008449583505, - "learning_rate": 1.3216493653311524e-05, - "loss": 0.7659, + "epoch": 0.8362044620163795, + "grad_norm": 4.092745206415775, + "learning_rate": 1.6927356297244474e-05, + "loss": 1.2865, "step": 5922 }, { - "epoch": 1.2419794506185784, - "grad_norm": 7.062470127460296, - "learning_rate": 1.3214349991734752e-05, - "loss": 0.7546, + "epoch": 0.8363456650663654, + "grad_norm": 3.806560014850811, + "learning_rate": 1.69262567940625e-05, + "loss": 1.0871, "step": 5923 }, { - "epoch": 1.2421891381841057, - "grad_norm": 7.564135408119068, - "learning_rate": 1.3212206165416968e-05, - "loss": 0.8186, + "epoch": 0.8364868681163513, + "grad_norm": 3.997069072595441, + "learning_rate": 1.6925157129917308e-05, + "loss": 1.0395, "step": 5924 }, { - "epoch": 1.242398825749633, - "grad_norm": 7.040137155336007, - "learning_rate": 1.3210062174468045e-05, - "loss": 0.9118, + "epoch": 0.8366280711663372, + "grad_norm": 4.682896849101525, + "learning_rate": 1.692405730483446e-05, + "loss": 1.2607, "step": 5925 }, { - "epoch": 1.2426085133151603, - "grad_norm": 5.983356165001147, - "learning_rate": 1.320791801899787e-05, - "loss": 0.6286, + "epoch": 0.8367692742163231, + "grad_norm": 3.4728761779722817, + "learning_rate": 1.692295731883951e-05, + "loss": 1.0828, "step": 5926 }, { - "epoch": 1.2428182008806878, - "grad_norm": 7.993299373583643, - "learning_rate": 1.3205773699116335e-05, - "loss": 1.0812, + "epoch": 0.836910477266309, + "grad_norm": 3.800740244779232, + "learning_rate": 1.692185717195802e-05, + "loss": 1.0595, "step": 5927 }, { - "epoch": 1.2430278884462151, - "grad_norm": 6.637840888719781, - "learning_rate": 1.320362921493333e-05, - "loss": 0.7251, + "epoch": 0.8370516803162948, + "grad_norm": 3.408257638862654, + "learning_rate": 1.6920756864215558e-05, + "loss": 1.0223, "step": 5928 }, { - "epoch": 1.2432375760117425, - "grad_norm": 6.563449298794639, - "learning_rate": 1.320148456655878e-05, - "loss": 0.8709, + "epoch": 0.8371928833662807, + "grad_norm": 3.779486409945569, + "learning_rate": 1.69196563956377e-05, + "loss": 1.2249, "step": 5929 }, { - "epoch": 1.24344726357727, - "grad_norm": 7.581467297966803, - "learning_rate": 1.3199339754102588e-05, - "loss": 0.7732, + "epoch": 0.8373340864162666, + "grad_norm": 3.173967589899312, + "learning_rate": 1.691855576625001e-05, + "loss": 0.8391, "step": 5930 }, { - "epoch": 1.2436569511427973, - "grad_norm": 6.8058803671494665, - "learning_rate": 1.3197194777674686e-05, - "loss": 0.9513, + "epoch": 0.8374752894662525, + "grad_norm": 3.301197248006986, + "learning_rate": 1.691745497607807e-05, + "loss": 0.991, "step": 5931 }, { - "epoch": 1.2438666387083246, - "grad_norm": 6.32267997053899, - "learning_rate": 1.3195049637385009e-05, - "loss": 0.8289, + "epoch": 0.8376164925162384, + "grad_norm": 9.22659136379249, + "learning_rate": 1.6916354025147473e-05, + "loss": 1.3671, "step": 5932 }, { - "epoch": 1.244076326273852, - "grad_norm": 6.875654871593721, - "learning_rate": 1.3192904333343495e-05, - "loss": 0.8299, + "epoch": 0.8377576955662243, + "grad_norm": 4.099686459135056, + "learning_rate": 1.691525291348379e-05, + "loss": 1.1944, "step": 5933 }, { - "epoch": 1.2442860138393792, - "grad_norm": 7.086009104465749, - "learning_rate": 1.3190758865660094e-05, - "loss": 0.8272, + "epoch": 0.8378988986162101, + "grad_norm": 3.4282534497022783, + "learning_rate": 1.691415164111261e-05, + "loss": 0.9133, "step": 5934 }, { - "epoch": 1.2444957014049067, - "grad_norm": 7.251641598268852, - "learning_rate": 1.3188613234444769e-05, - "loss": 0.7386, + "epoch": 0.838040101666196, + "grad_norm": 3.4967592754200263, + "learning_rate": 1.691305020805954e-05, + "loss": 1.079, "step": 5935 }, { - "epoch": 1.244705388970434, - "grad_norm": 6.987856155535251, - "learning_rate": 1.3186467439807485e-05, - "loss": 0.8563, + "epoch": 0.8381813047161819, + "grad_norm": 3.0294058077256234, + "learning_rate": 1.6911948614350165e-05, + "loss": 0.9381, "step": 5936 }, { - "epoch": 1.2449150765359613, - "grad_norm": 6.508274965666494, - "learning_rate": 1.3184321481858215e-05, - "loss": 0.7127, + "epoch": 0.8383225077661678, + "grad_norm": 3.5826042618335108, + "learning_rate": 1.6910846860010095e-05, + "loss": 0.9539, "step": 5937 }, { - "epoch": 1.2451247641014889, - "grad_norm": 7.31527588142438, - "learning_rate": 1.318217536070695e-05, - "loss": 0.8898, + "epoch": 0.8384637108161537, + "grad_norm": 3.1456098157134176, + "learning_rate": 1.6909744945064924e-05, + "loss": 0.7363, "step": 5938 }, { - "epoch": 1.2453344516670162, - "grad_norm": 6.915994760615957, - "learning_rate": 1.3180029076463679e-05, - "loss": 0.8771, + "epoch": 0.8386049138661396, + "grad_norm": 3.456783072159234, + "learning_rate": 1.6908642869540264e-05, + "loss": 0.9508, "step": 5939 }, { - "epoch": 1.2455441392325435, - "grad_norm": 6.828163396740463, - "learning_rate": 1.3177882629238398e-05, - "loss": 0.9628, + "epoch": 0.8387461169161254, + "grad_norm": 3.7624508664004805, + "learning_rate": 1.6907540633461728e-05, + "loss": 1.1674, "step": 5940 }, { - "epoch": 1.2457538267980708, - "grad_norm": 6.068641268380271, - "learning_rate": 1.3175736019141125e-05, - "loss": 0.7807, + "epoch": 0.8388873199661113, + "grad_norm": 3.3876638363264058, + "learning_rate": 1.690643823685493e-05, + "loss": 1.0581, "step": 5941 }, { - "epoch": 1.2459635143635983, - "grad_norm": 7.6292257742566525, - "learning_rate": 1.3173589246281872e-05, - "loss": 1.144, + "epoch": 0.8390285230160971, + "grad_norm": 3.365417200084475, + "learning_rate": 1.690533567974549e-05, + "loss": 1.0508, "step": 5942 }, { - "epoch": 1.2461732019291256, - "grad_norm": 7.133807228112248, - "learning_rate": 1.3171442310770666e-05, - "loss": 0.8635, + "epoch": 0.839169726066083, + "grad_norm": 3.4713892528827284, + "learning_rate": 1.6904232962159034e-05, + "loss": 1.2145, "step": 5943 }, { - "epoch": 1.246382889494653, - "grad_norm": 6.643684556682252, - "learning_rate": 1.316929521271754e-05, - "loss": 0.8486, + "epoch": 0.8393109291160689, + "grad_norm": 3.2365620724164312, + "learning_rate": 1.6903130084121183e-05, + "loss": 0.9824, "step": 5944 }, { - "epoch": 1.2465925770601802, - "grad_norm": 6.992939035955345, - "learning_rate": 1.3167147952232537e-05, - "loss": 0.7983, + "epoch": 0.8394521321660547, + "grad_norm": 3.4112150481914587, + "learning_rate": 1.6902027045657572e-05, + "loss": 1.0141, "step": 5945 }, { - "epoch": 1.2468022646257078, - "grad_norm": 6.585236598907351, - "learning_rate": 1.316500052942571e-05, - "loss": 0.7214, + "epoch": 0.8395933352160406, + "grad_norm": 2.785623176000223, + "learning_rate": 1.6900923846793832e-05, + "loss": 0.8, "step": 5946 }, { - "epoch": 1.247011952191235, - "grad_norm": 8.448339676903569, - "learning_rate": 1.3162852944407112e-05, - "loss": 0.7745, + "epoch": 0.8397345382660265, + "grad_norm": 3.763794435754244, + "learning_rate": 1.6899820487555602e-05, + "loss": 0.9531, "step": 5947 }, { - "epoch": 1.2472216397567624, - "grad_norm": 7.327930690348754, - "learning_rate": 1.3160705197286819e-05, - "loss": 0.868, + "epoch": 0.8398757413160124, + "grad_norm": 3.2025111701403293, + "learning_rate": 1.6898716967968522e-05, + "loss": 0.9809, "step": 5948 }, { - "epoch": 1.24743132732229, - "grad_norm": 7.1969647483099495, - "learning_rate": 1.3158557288174905e-05, - "loss": 0.6885, + "epoch": 0.8400169443659983, + "grad_norm": 3.6536271121254096, + "learning_rate": 1.6897613288058244e-05, + "loss": 0.8872, "step": 5949 }, { - "epoch": 1.2476410148878172, - "grad_norm": 6.961021058922199, - "learning_rate": 1.3156409217181448e-05, - "loss": 0.8385, + "epoch": 0.8401581474159842, + "grad_norm": 3.9588265021045648, + "learning_rate": 1.689650944785041e-05, + "loss": 1.0873, "step": 5950 }, { - "epoch": 1.2478507024533445, - "grad_norm": 10.56644835049462, - "learning_rate": 1.3154260984416542e-05, - "loss": 1.0422, + "epoch": 0.84029935046597, + "grad_norm": 3.652977698607157, + "learning_rate": 1.689540544737067e-05, + "loss": 1.0716, "step": 5951 }, { - "epoch": 1.2480603900188718, - "grad_norm": 6.223421505031867, - "learning_rate": 1.3152112589990293e-05, - "loss": 0.7384, + "epoch": 0.8404405535159559, + "grad_norm": 3.013473970093273, + "learning_rate": 1.689430128664469e-05, + "loss": 0.8796, "step": 5952 }, { - "epoch": 1.2482700775843993, - "grad_norm": 6.9770299030327685, - "learning_rate": 1.3149964034012807e-05, - "loss": 0.8129, + "epoch": 0.8405817565659418, + "grad_norm": 3.5355601929233242, + "learning_rate": 1.6893196965698125e-05, + "loss": 0.9819, "step": 5953 }, { - "epoch": 1.2484797651499266, - "grad_norm": 7.711382813914579, - "learning_rate": 1.31478153165942e-05, - "loss": 0.7714, + "epoch": 0.8407229596159277, + "grad_norm": 3.1864556779129543, + "learning_rate": 1.6892092484556638e-05, + "loss": 0.8219, "step": 5954 }, { - "epoch": 1.248689452715454, - "grad_norm": 8.345489197240461, - "learning_rate": 1.3145666437844597e-05, - "loss": 0.8987, + "epoch": 0.8408641626659136, + "grad_norm": 3.3019546034004628, + "learning_rate": 1.68909878432459e-05, + "loss": 0.9323, "step": 5955 }, { - "epoch": 1.2488991402809813, - "grad_norm": 7.0084718188874655, - "learning_rate": 1.3143517397874137e-05, - "loss": 0.7883, + "epoch": 0.8410053657158995, + "grad_norm": 3.8133673051517465, + "learning_rate": 1.6889883041791578e-05, + "loss": 1.0364, "step": 5956 }, { - "epoch": 1.2491088278465088, - "grad_norm": 8.071885252970802, - "learning_rate": 1.3141368196792954e-05, - "loss": 0.8244, + "epoch": 0.8411465687658853, + "grad_norm": 3.2944054856728227, + "learning_rate": 1.688877808021935e-05, + "loss": 1.101, "step": 5957 }, { - "epoch": 1.249318515412036, - "grad_norm": 7.179253261128992, - "learning_rate": 1.3139218834711205e-05, - "loss": 0.778, + "epoch": 0.8412877718158712, + "grad_norm": 3.0410666942275957, + "learning_rate": 1.68876729585549e-05, + "loss": 0.8554, "step": 5958 }, { - "epoch": 1.2495282029775634, - "grad_norm": 8.735841735426847, - "learning_rate": 1.3137069311739047e-05, - "loss": 1.0267, + "epoch": 0.8414289748658571, + "grad_norm": 3.576712871146497, + "learning_rate": 1.6886567676823897e-05, + "loss": 1.0287, "step": 5959 }, { - "epoch": 1.2497378905430907, - "grad_norm": 8.12252458657569, - "learning_rate": 1.3134919627986645e-05, - "loss": 0.8295, + "epoch": 0.841570177915843, + "grad_norm": 3.8135253175731174, + "learning_rate": 1.6885462235052038e-05, + "loss": 1.2731, "step": 5960 }, { - "epoch": 1.2499475781086182, - "grad_norm": 6.837977156436172, - "learning_rate": 1.3132769783564171e-05, - "loss": 0.7049, + "epoch": 0.8417113809658289, + "grad_norm": 3.198176583481025, + "learning_rate": 1.688435663326501e-05, + "loss": 1.007, "step": 5961 }, { - "epoch": 1.2501572656741455, - "grad_norm": 9.144628938970335, - "learning_rate": 1.3130619778581819e-05, - "loss": 0.9484, + "epoch": 0.8418525840158148, + "grad_norm": 3.6590216916048783, + "learning_rate": 1.688325087148851e-05, + "loss": 1.0171, "step": 5962 }, { - "epoch": 1.2503669532396728, - "grad_norm": 7.0659514310338505, - "learning_rate": 1.3128469613149766e-05, - "loss": 0.7887, + "epoch": 0.8419937870658006, + "grad_norm": 3.0157381286558764, + "learning_rate": 1.688214494974823e-05, + "loss": 0.8299, "step": 5963 }, { - "epoch": 1.2505766408052001, - "grad_norm": 6.622287998410311, - "learning_rate": 1.3126319287378225e-05, - "loss": 0.7791, + "epoch": 0.8421349901157865, + "grad_norm": 3.478256469718886, + "learning_rate": 1.6881038868069875e-05, + "loss": 1.0015, "step": 5964 }, { - "epoch": 1.2507863283707277, - "grad_norm": 6.773965691230501, - "learning_rate": 1.3124168801377395e-05, - "loss": 0.7283, + "epoch": 0.8422761931657724, + "grad_norm": 3.964501313107971, + "learning_rate": 1.687993262647915e-05, + "loss": 0.9969, "step": 5965 }, { - "epoch": 1.250996015936255, - "grad_norm": 7.525730484773722, - "learning_rate": 1.3122018155257497e-05, - "loss": 0.8708, + "epoch": 0.8424173962157583, + "grad_norm": 3.386723761461656, + "learning_rate": 1.6878826225001756e-05, + "loss": 1.065, "step": 5966 }, { - "epoch": 1.2512057035017823, - "grad_norm": 5.8133519404578164, - "learning_rate": 1.3119867349128752e-05, - "loss": 0.6654, + "epoch": 0.8425585992657442, + "grad_norm": 3.4690063991892974, + "learning_rate": 1.6877719663663414e-05, + "loss": 0.9423, "step": 5967 }, { - "epoch": 1.2514153910673098, - "grad_norm": 8.545979295970767, - "learning_rate": 1.3117716383101398e-05, - "loss": 0.967, + "epoch": 0.84269980231573, + "grad_norm": 3.4597259650913434, + "learning_rate": 1.687661294248984e-05, + "loss": 1.03, "step": 5968 }, { - "epoch": 1.2516250786328371, - "grad_norm": 7.940506567516569, - "learning_rate": 1.3115565257285668e-05, - "loss": 0.9418, + "epoch": 0.8428410053657159, + "grad_norm": 3.415388405272459, + "learning_rate": 1.6875506061506746e-05, + "loss": 1.1372, "step": 5969 }, { - "epoch": 1.2518347661983644, - "grad_norm": 7.981152185695404, - "learning_rate": 1.3113413971791818e-05, - "loss": 0.8263, + "epoch": 0.8429822084157018, + "grad_norm": 3.51721928778269, + "learning_rate": 1.6874399020739865e-05, + "loss": 1.0717, "step": 5970 }, { - "epoch": 1.2520444537638917, - "grad_norm": 7.808213978015147, - "learning_rate": 1.31112625267301e-05, - "loss": 0.746, + "epoch": 0.8431234114656877, + "grad_norm": 3.5430361039202727, + "learning_rate": 1.6873291820214917e-05, + "loss": 1.2683, "step": 5971 }, { - "epoch": 1.252254141329419, - "grad_norm": 9.568949866504491, - "learning_rate": 1.310911092221078e-05, - "loss": 1.1228, + "epoch": 0.8432646145156736, + "grad_norm": 3.8615169281944133, + "learning_rate": 1.6872184459957637e-05, + "loss": 1.1621, "step": 5972 }, { - "epoch": 1.2524638288949466, - "grad_norm": 7.515001352412217, - "learning_rate": 1.3106959158344135e-05, - "loss": 0.7434, + "epoch": 0.8434058175656595, + "grad_norm": 3.5055952171117655, + "learning_rate": 1.687107693999376e-05, + "loss": 1.0403, "step": 5973 }, { - "epoch": 1.2526735164604739, - "grad_norm": 6.8584554543040515, - "learning_rate": 1.3104807235240444e-05, - "loss": 0.6662, + "epoch": 0.8435470206156453, + "grad_norm": 3.1519406536692354, + "learning_rate": 1.686996926034902e-05, + "loss": 0.8113, "step": 5974 }, { - "epoch": 1.2528832040260012, - "grad_norm": 6.656636325948288, - "learning_rate": 1.3102655153009996e-05, - "loss": 0.7594, + "epoch": 0.8436882236656312, + "grad_norm": 3.8132977377416832, + "learning_rate": 1.686886142104916e-05, + "loss": 1.2206, "step": 5975 }, { - "epoch": 1.2530928915915287, - "grad_norm": 6.278901699083973, - "learning_rate": 1.3100502911763091e-05, - "loss": 0.7159, + "epoch": 0.843829426715617, + "grad_norm": 3.3530982137714873, + "learning_rate": 1.6867753422119926e-05, + "loss": 1.165, "step": 5976 }, { - "epoch": 1.253302579157056, - "grad_norm": 6.719974560368373, - "learning_rate": 1.3098350511610036e-05, - "loss": 0.6479, + "epoch": 0.8439706297656029, + "grad_norm": 3.0874841179845385, + "learning_rate": 1.686664526358707e-05, + "loss": 0.9611, "step": 5977 }, { - "epoch": 1.2535122667225833, - "grad_norm": 5.770458756407946, - "learning_rate": 1.3096197952661137e-05, - "loss": 0.6464, + "epoch": 0.8441118328155888, + "grad_norm": 3.2815589740995823, + "learning_rate": 1.6865536945476348e-05, + "loss": 0.9514, "step": 5978 }, { - "epoch": 1.2537219542881108, - "grad_norm": 6.626169737696493, - "learning_rate": 1.3094045235026732e-05, - "loss": 0.7278, + "epoch": 0.8442530358655747, + "grad_norm": 3.371518441065829, + "learning_rate": 1.6864428467813506e-05, + "loss": 0.7776, "step": 5979 }, { - "epoch": 1.2539316418536381, - "grad_norm": 6.7624388321809015, - "learning_rate": 1.3091892358817138e-05, - "loss": 0.9228, + "epoch": 0.8443942389155605, + "grad_norm": 3.000636059075726, + "learning_rate": 1.6863319830624313e-05, + "loss": 0.9231, "step": 5980 }, { - "epoch": 1.2541413294191655, - "grad_norm": 6.652753800238416, - "learning_rate": 1.3089739324142696e-05, - "loss": 0.751, + "epoch": 0.8445354419655464, + "grad_norm": 3.891594633559061, + "learning_rate": 1.686221103393453e-05, + "loss": 1.1536, "step": 5981 }, { - "epoch": 1.2543510169846928, - "grad_norm": 7.163766340043406, - "learning_rate": 1.3087586131113756e-05, - "loss": 0.9468, + "epoch": 0.8446766450155323, + "grad_norm": 3.7621053903455772, + "learning_rate": 1.686110207776993e-05, + "loss": 1.1036, "step": 5982 }, { - "epoch": 1.25456070455022, - "grad_norm": 6.314057693819456, - "learning_rate": 1.3085432779840672e-05, - "loss": 0.7903, + "epoch": 0.8448178480655182, + "grad_norm": 3.4883826232171273, + "learning_rate": 1.685999296215628e-05, + "loss": 1.1202, "step": 5983 }, { - "epoch": 1.2547703921157476, - "grad_norm": 7.479376297304569, - "learning_rate": 1.3083279270433806e-05, - "loss": 0.604, + "epoch": 0.8449590511155041, + "grad_norm": 4.3562200750309295, + "learning_rate": 1.6858883687119353e-05, + "loss": 1.2759, "step": 5984 }, { - "epoch": 1.254980079681275, - "grad_norm": 6.846040216219421, - "learning_rate": 1.3081125603003532e-05, - "loss": 0.7936, + "epoch": 0.84510025416549, + "grad_norm": 3.2298711827746254, + "learning_rate": 1.685777425268493e-05, + "loss": 0.9925, "step": 5985 }, { - "epoch": 1.2551897672468022, - "grad_norm": 6.713330089794746, - "learning_rate": 1.3078971777660226e-05, - "loss": 0.7736, + "epoch": 0.8452414572154758, + "grad_norm": 3.650172285953768, + "learning_rate": 1.6856664658878797e-05, + "loss": 1.0035, "step": 5986 }, { - "epoch": 1.2553994548123297, - "grad_norm": 6.142056844816593, - "learning_rate": 1.3076817794514277e-05, - "loss": 0.7146, + "epoch": 0.8453826602654617, + "grad_norm": 2.713121026048089, + "learning_rate": 1.685555490572674e-05, + "loss": 0.7634, "step": 5987 }, { - "epoch": 1.255609142377857, - "grad_norm": 7.448684058330817, - "learning_rate": 1.3074663653676079e-05, - "loss": 0.9735, + "epoch": 0.8455238633154476, + "grad_norm": 3.989183796569977, + "learning_rate": 1.6854444993254547e-05, + "loss": 1.2674, "step": 5988 }, { - "epoch": 1.2558188299433843, - "grad_norm": 6.453839316937063, - "learning_rate": 1.3072509355256033e-05, - "loss": 0.8268, + "epoch": 0.8456650663654335, + "grad_norm": 2.898507928961729, + "learning_rate": 1.6853334921488014e-05, + "loss": 0.7731, "step": 5989 }, { - "epoch": 1.2560285175089116, - "grad_norm": 5.436625043722283, - "learning_rate": 1.3070354899364557e-05, - "loss": 0.664, + "epoch": 0.8458062694154194, + "grad_norm": 3.288237801763016, + "learning_rate": 1.6852224690452937e-05, + "loss": 0.8893, "step": 5990 }, { - "epoch": 1.256238205074439, - "grad_norm": 7.913564847351125, - "learning_rate": 1.3068200286112068e-05, - "loss": 0.8649, + "epoch": 0.8459474724654052, + "grad_norm": 3.5848687969018838, + "learning_rate": 1.6851114300175114e-05, + "loss": 1.018, "step": 5991 }, { - "epoch": 1.2564478926399665, - "grad_norm": 8.355716642123957, - "learning_rate": 1.3066045515608987e-05, - "loss": 1.018, + "epoch": 0.8460886755153911, + "grad_norm": 3.375660919661249, + "learning_rate": 1.685000375068036e-05, + "loss": 0.9585, "step": 5992 }, { - "epoch": 1.2566575802054938, - "grad_norm": 8.504598837953978, - "learning_rate": 1.3063890587965762e-05, - "loss": 0.9613, + "epoch": 0.846229878565377, + "grad_norm": 3.7888331781704117, + "learning_rate": 1.684889304199447e-05, + "loss": 0.8954, "step": 5993 }, { - "epoch": 1.256867267771021, - "grad_norm": 8.082977945167267, - "learning_rate": 1.3061735503292826e-05, - "loss": 1.0331, + "epoch": 0.8463710816153629, + "grad_norm": 3.008288054464839, + "learning_rate": 1.6847782174143264e-05, + "loss": 0.9375, "step": 5994 }, { - "epoch": 1.2570769553365486, - "grad_norm": 7.656673168146104, - "learning_rate": 1.3059580261700637e-05, - "loss": 0.811, + "epoch": 0.8465122846653488, + "grad_norm": 3.9979063036484646, + "learning_rate": 1.6846671147152564e-05, + "loss": 1.2974, "step": 5995 }, { - "epoch": 1.257286642902076, - "grad_norm": 7.68150717223882, - "learning_rate": 1.3057424863299653e-05, - "loss": 0.7938, + "epoch": 0.8466534877153347, + "grad_norm": 3.4565611951760293, + "learning_rate": 1.6845559961048178e-05, + "loss": 0.9289, "step": 5996 }, { - "epoch": 1.2574963304676032, - "grad_norm": 7.483904362855158, - "learning_rate": 1.3055269308200345e-05, - "loss": 0.8793, + "epoch": 0.8467946907653205, + "grad_norm": 3.3792194410367404, + "learning_rate": 1.6844448615855933e-05, + "loss": 1.0626, "step": 5997 }, { - "epoch": 1.2577060180331308, - "grad_norm": 6.496794259275839, - "learning_rate": 1.3053113596513178e-05, - "loss": 0.6599, + "epoch": 0.8469358938153064, + "grad_norm": 3.4065673742013654, + "learning_rate": 1.6843337111601663e-05, + "loss": 0.8975, "step": 5998 }, { - "epoch": 1.257915705598658, - "grad_norm": 7.199334344955251, - "learning_rate": 1.3050957728348652e-05, - "loss": 0.9995, + "epoch": 0.8470770968652923, + "grad_norm": 3.4456765407452123, + "learning_rate": 1.6842225448311193e-05, + "loss": 0.959, "step": 5999 }, { - "epoch": 1.2581253931641854, - "grad_norm": 7.875658972261774, - "learning_rate": 1.3048801703817247e-05, - "loss": 0.8635, + "epoch": 0.8472182999152782, + "grad_norm": 3.516367057735747, + "learning_rate": 1.6841113626010358e-05, + "loss": 0.9892, "step": 6000 }, { - "epoch": 1.2583350807297127, - "grad_norm": 6.627044726569807, - "learning_rate": 1.3046645523029466e-05, - "loss": 0.8329, + "epoch": 0.8473595029652641, + "grad_norm": 3.328190891300491, + "learning_rate": 1.6840001644724993e-05, + "loss": 0.9731, "step": 6001 }, { - "epoch": 1.25854476829524, - "grad_norm": 7.9505949122620745, - "learning_rate": 1.3044489186095819e-05, - "loss": 0.6114, + "epoch": 0.84750070601525, + "grad_norm": 4.283489725670041, + "learning_rate": 1.683888950448095e-05, + "loss": 1.2836, "step": 6002 }, { - "epoch": 1.2587544558607675, - "grad_norm": 6.8941852458443815, - "learning_rate": 1.304233269312682e-05, - "loss": 0.8679, + "epoch": 0.8476419090652358, + "grad_norm": 4.764352510373618, + "learning_rate": 1.6837777205304063e-05, + "loss": 1.2849, "step": 6003 }, { - "epoch": 1.2589641434262948, - "grad_norm": 6.804248132656482, - "learning_rate": 1.3040176044232994e-05, - "loss": 0.862, + "epoch": 0.8477831121152217, + "grad_norm": 3.094746301008526, + "learning_rate": 1.6836664747220193e-05, + "loss": 0.8584, "step": 6004 }, { - "epoch": 1.2591738309918221, - "grad_norm": 6.849484075270946, - "learning_rate": 1.3038019239524873e-05, - "loss": 0.817, + "epoch": 0.8479243151652076, + "grad_norm": 4.095990185460272, + "learning_rate": 1.683555213025518e-05, + "loss": 1.3037, "step": 6005 }, { - "epoch": 1.2593835185573496, - "grad_norm": 7.10564018806051, - "learning_rate": 1.3035862279112998e-05, - "loss": 0.6012, + "epoch": 0.8480655182151935, + "grad_norm": 3.4196961850719725, + "learning_rate": 1.683443935443489e-05, + "loss": 0.9915, "step": 6006 }, { - "epoch": 1.259593206122877, - "grad_norm": 7.75684757656822, - "learning_rate": 1.3033705163107912e-05, - "loss": 0.7785, + "epoch": 0.8482067212651794, + "grad_norm": 3.1140189128516917, + "learning_rate": 1.6833326419785183e-05, + "loss": 0.8285, "step": 6007 }, { - "epoch": 1.2598028936884043, - "grad_norm": 6.913700558192165, - "learning_rate": 1.303154789162018e-05, - "loss": 0.8866, + "epoch": 0.8483479243151653, + "grad_norm": 3.34119361508651, + "learning_rate": 1.6832213326331918e-05, + "loss": 1.0373, "step": 6008 }, { - "epoch": 1.2600125812539316, - "grad_norm": 6.236958463205739, - "learning_rate": 1.3029390464760356e-05, - "loss": 0.6957, + "epoch": 0.8484891273651511, + "grad_norm": 3.539990501877801, + "learning_rate": 1.683110007410097e-05, + "loss": 1.0949, "step": 6009 }, { - "epoch": 1.2602222688194589, - "grad_norm": 8.121305291611979, - "learning_rate": 1.3027232882639021e-05, - "loss": 0.8579, + "epoch": 0.8486303304151369, + "grad_norm": 3.2435457159406864, + "learning_rate": 1.6829986663118203e-05, + "loss": 1.0914, "step": 6010 }, { - "epoch": 1.2604319563849864, - "grad_norm": 6.815267139775197, - "learning_rate": 1.3025075145366747e-05, - "loss": 0.8185, + "epoch": 0.8487715334651228, + "grad_norm": 3.013124626280069, + "learning_rate": 1.6828873093409496e-05, + "loss": 1.0341, "step": 6011 }, { - "epoch": 1.2606416439505137, - "grad_norm": 7.530695160731996, - "learning_rate": 1.3022917253054125e-05, - "loss": 0.9568, + "epoch": 0.8489127365151087, + "grad_norm": 3.2183107993804247, + "learning_rate": 1.682775936500073e-05, + "loss": 1.1383, "step": 6012 }, { - "epoch": 1.260851331516041, - "grad_norm": 6.589340432408899, - "learning_rate": 1.3020759205811752e-05, - "loss": 0.8335, + "epoch": 0.8490539395650946, + "grad_norm": 3.260655044267242, + "learning_rate": 1.6826645477917784e-05, + "loss": 1.0023, "step": 6013 }, { - "epoch": 1.2610610190815685, - "grad_norm": 6.765269403660454, - "learning_rate": 1.3018601003750234e-05, - "loss": 0.7176, + "epoch": 0.8491951426150804, + "grad_norm": 3.7719118747281093, + "learning_rate": 1.6825531432186545e-05, + "loss": 1.2516, "step": 6014 }, { - "epoch": 1.2612707066470958, - "grad_norm": 6.8406173248452715, - "learning_rate": 1.3016442646980172e-05, - "loss": 0.628, + "epoch": 0.8493363456650663, + "grad_norm": 3.132923744419716, + "learning_rate": 1.68244172278329e-05, + "loss": 0.9442, "step": 6015 }, { - "epoch": 1.2614803942126231, - "grad_norm": 6.969548983193765, - "learning_rate": 1.3014284135612197e-05, - "loss": 0.7574, + "epoch": 0.8494775487150522, + "grad_norm": 3.379634604139086, + "learning_rate": 1.6823302864882748e-05, + "loss": 0.9652, "step": 6016 }, { - "epoch": 1.2616900817781507, - "grad_norm": 5.361427532379264, - "learning_rate": 1.3012125469756931e-05, - "loss": 0.5628, + "epoch": 0.8496187517650381, + "grad_norm": 2.9323877865091257, + "learning_rate": 1.6822188343361987e-05, + "loss": 0.9111, "step": 6017 }, { - "epoch": 1.261899769343678, - "grad_norm": 8.301283503428039, - "learning_rate": 1.300996664952501e-05, - "loss": 1.138, + "epoch": 0.849759954815024, + "grad_norm": 3.776131296990772, + "learning_rate": 1.682107366329651e-05, + "loss": 1.1022, "step": 6018 }, { - "epoch": 1.2621094569092053, - "grad_norm": 6.659859284718956, - "learning_rate": 1.3007807675027078e-05, - "loss": 0.5891, + "epoch": 0.8499011578650099, + "grad_norm": 3.4356591301461488, + "learning_rate": 1.681995882471223e-05, + "loss": 0.8991, "step": 6019 }, { - "epoch": 1.2623191444747326, - "grad_norm": 6.643783342648083, - "learning_rate": 1.3005648546373787e-05, - "loss": 0.7563, + "epoch": 0.8500423609149957, + "grad_norm": 3.644600389172983, + "learning_rate": 1.6818843827635052e-05, + "loss": 1.1095, "step": 6020 }, { - "epoch": 1.26252883204026, - "grad_norm": 7.046259218656, - "learning_rate": 1.3003489263675793e-05, - "loss": 0.8667, + "epoch": 0.8501835639649816, + "grad_norm": 3.4491024673137884, + "learning_rate": 1.681772867209089e-05, + "loss": 1.112, "step": 6021 }, { - "epoch": 1.2627385196057874, - "grad_norm": 9.26337847646165, - "learning_rate": 1.3001329827043767e-05, - "loss": 1.0952, + "epoch": 0.8503247670149675, + "grad_norm": 3.5843637280499316, + "learning_rate": 1.6816613358105655e-05, + "loss": 1.1463, "step": 6022 }, { - "epoch": 1.2629482071713147, - "grad_norm": 6.705901126956028, - "learning_rate": 1.2999170236588378e-05, - "loss": 0.7866, + "epoch": 0.8504659700649534, + "grad_norm": 3.7434089887512276, + "learning_rate": 1.6815497885705274e-05, + "loss": 1.0804, "step": 6023 }, { - "epoch": 1.263157894736842, - "grad_norm": 5.90191789854271, - "learning_rate": 1.2997010492420317e-05, - "loss": 0.7158, + "epoch": 0.8506071731149393, + "grad_norm": 2.985040934111276, + "learning_rate": 1.681438225491566e-05, + "loss": 0.9176, "step": 6024 }, { - "epoch": 1.2633675823023696, - "grad_norm": 7.767689446794684, - "learning_rate": 1.2994850594650268e-05, - "loss": 0.8881, + "epoch": 0.8507483761649252, + "grad_norm": 4.2778375455354976, + "learning_rate": 1.6813266465762747e-05, + "loss": 1.0527, "step": 6025 }, { - "epoch": 1.2635772698678969, - "grad_norm": 6.807524862006748, - "learning_rate": 1.2992690543388933e-05, - "loss": 0.828, + "epoch": 0.850889579214911, + "grad_norm": 3.880199394104797, + "learning_rate": 1.6812150518272465e-05, + "loss": 1.1534, "step": 6026 }, { - "epoch": 1.2637869574334242, - "grad_norm": 7.918793122539964, - "learning_rate": 1.2990530338747015e-05, - "loss": 1.0704, + "epoch": 0.8510307822648969, + "grad_norm": 4.566652508726914, + "learning_rate": 1.6811034412470742e-05, + "loss": 1.2007, "step": 6027 }, { - "epoch": 1.2639966449989515, - "grad_norm": 8.641864479256418, - "learning_rate": 1.2988369980835233e-05, - "loss": 0.8381, + "epoch": 0.8511719853148828, + "grad_norm": 2.8532513464112665, + "learning_rate": 1.6809918148383525e-05, + "loss": 0.8738, "step": 6028 }, { - "epoch": 1.264206332564479, - "grad_norm": 6.939859245375967, - "learning_rate": 1.2986209469764304e-05, - "loss": 0.9765, + "epoch": 0.8513131883648687, + "grad_norm": 2.8402562460432494, + "learning_rate": 1.680880172603675e-05, + "loss": 0.8096, "step": 6029 }, { - "epoch": 1.2644160201300063, - "grad_norm": 8.234343725937697, - "learning_rate": 1.2984048805644962e-05, - "loss": 0.8397, + "epoch": 0.8514543914148546, + "grad_norm": 3.4594612663924447, + "learning_rate": 1.680768514545637e-05, + "loss": 1.2129, "step": 6030 }, { - "epoch": 1.2646257076955336, - "grad_norm": 6.5880710535819125, - "learning_rate": 1.2981887988587943e-05, - "loss": 0.6833, + "epoch": 0.8515955944648405, + "grad_norm": 3.623254865202898, + "learning_rate": 1.680656840666832e-05, + "loss": 0.9823, "step": 6031 }, { - "epoch": 1.264835395261061, - "grad_norm": 7.240868543477055, - "learning_rate": 1.2979727018703991e-05, - "loss": 0.7853, + "epoch": 0.8517367975148263, + "grad_norm": 3.282064189799054, + "learning_rate": 1.6805451509698562e-05, + "loss": 0.8775, "step": 6032 }, { - "epoch": 1.2650450828265885, - "grad_norm": 7.148033268239787, - "learning_rate": 1.2977565896103861e-05, - "loss": 0.7809, + "epoch": 0.8518780005648122, + "grad_norm": 3.4803239031762163, + "learning_rate": 1.680433445457305e-05, + "loss": 0.987, "step": 6033 }, { - "epoch": 1.2652547703921158, - "grad_norm": 6.451611374971096, - "learning_rate": 1.2975404620898315e-05, - "loss": 0.5605, + "epoch": 0.8520192036147981, + "grad_norm": 3.7839025260267243, + "learning_rate": 1.680321724131774e-05, + "loss": 0.9519, "step": 6034 }, { - "epoch": 1.265464457957643, - "grad_norm": 6.937914034263406, - "learning_rate": 1.2973243193198126e-05, - "loss": 0.792, + "epoch": 0.852160406664784, + "grad_norm": 3.9929586599219893, + "learning_rate": 1.6802099869958602e-05, + "loss": 1.1953, "step": 6035 }, { - "epoch": 1.2656741455231706, - "grad_norm": 6.747569081053799, - "learning_rate": 1.297108161311406e-05, - "loss": 0.7364, + "epoch": 0.8523016097147699, + "grad_norm": 3.3931530206747915, + "learning_rate": 1.6800982340521605e-05, + "loss": 0.884, "step": 6036 }, { - "epoch": 1.265883833088698, - "grad_norm": 6.656940307998814, - "learning_rate": 1.2968919880756914e-05, - "loss": 1.0242, + "epoch": 0.8524428127647558, + "grad_norm": 3.7606841431835485, + "learning_rate": 1.6799864653032712e-05, + "loss": 1.0551, "step": 6037 }, { - "epoch": 1.2660935206542252, - "grad_norm": 6.191860676734484, - "learning_rate": 1.296675799623747e-05, - "loss": 0.8907, + "epoch": 0.8525840158147416, + "grad_norm": 3.282784215872096, + "learning_rate": 1.67987468075179e-05, + "loss": 1.0403, "step": 6038 }, { - "epoch": 1.2663032082197525, - "grad_norm": 6.493201876888931, - "learning_rate": 1.2964595959666538e-05, - "loss": 0.7854, + "epoch": 0.8527252188647275, + "grad_norm": 3.374610343238798, + "learning_rate": 1.6797628804003148e-05, + "loss": 0.9616, "step": 6039 }, { - "epoch": 1.2665128957852798, - "grad_norm": 6.0704996155040085, - "learning_rate": 1.2962433771154916e-05, - "loss": 0.7155, + "epoch": 0.8528664219147134, + "grad_norm": 3.304789869796052, + "learning_rate": 1.679651064251444e-05, + "loss": 1.0208, "step": 6040 }, { - "epoch": 1.2667225833508073, - "grad_norm": 6.917858259674632, - "learning_rate": 1.296027143081343e-05, - "loss": 0.7123, + "epoch": 0.8530076249646993, + "grad_norm": 4.720467490072776, + "learning_rate": 1.679539232307776e-05, + "loss": 1.305, "step": 6041 }, { - "epoch": 1.2669322709163346, - "grad_norm": 7.460387487135502, - "learning_rate": 1.2958108938752894e-05, - "loss": 0.8623, + "epoch": 0.8531488280146852, + "grad_norm": 3.0000338389703414, + "learning_rate": 1.6794273845719096e-05, + "loss": 0.9501, "step": 6042 }, { - "epoch": 1.267141958481862, - "grad_norm": 7.634917059123814, - "learning_rate": 1.2955946295084147e-05, - "loss": 0.8757, + "epoch": 0.853290031064671, + "grad_norm": 3.504863832783145, + "learning_rate": 1.6793155210464442e-05, + "loss": 1.0481, "step": 6043 }, { - "epoch": 1.2673516460473895, - "grad_norm": 6.328522590226969, - "learning_rate": 1.2953783499918025e-05, - "loss": 0.8166, + "epoch": 0.8534312341146568, + "grad_norm": 3.776899460336821, + "learning_rate": 1.6792036417339797e-05, + "loss": 1.2887, "step": 6044 }, { - "epoch": 1.2675613336129168, - "grad_norm": 6.159955982999825, - "learning_rate": 1.2951620553365375e-05, - "loss": 0.7034, + "epoch": 0.8535724371646427, + "grad_norm": 4.071407234080875, + "learning_rate": 1.6790917466371156e-05, + "loss": 1.2395, "step": 6045 }, { - "epoch": 1.267771021178444, - "grad_norm": 6.750301776951597, - "learning_rate": 1.2949457455537056e-05, - "loss": 0.6254, + "epoch": 0.8537136402146286, + "grad_norm": 3.4107939974973913, + "learning_rate": 1.6789798357584524e-05, + "loss": 0.9579, "step": 6046 }, { - "epoch": 1.2679807087439716, - "grad_norm": 7.668024026960067, - "learning_rate": 1.2947294206543923e-05, - "loss": 0.9864, + "epoch": 0.8538548432646145, + "grad_norm": 3.163487330144368, + "learning_rate": 1.6788679091005918e-05, + "loss": 1.0122, "step": 6047 }, { - "epoch": 1.268190396309499, - "grad_norm": 6.512050588598844, - "learning_rate": 1.294513080649685e-05, - "loss": 0.656, + "epoch": 0.8539960463146004, + "grad_norm": 4.023077855125758, + "learning_rate": 1.6787559666661336e-05, + "loss": 1.2082, "step": 6048 }, { - "epoch": 1.2684000838750262, - "grad_norm": 7.130803554925573, - "learning_rate": 1.2942967255506717e-05, - "loss": 0.9287, + "epoch": 0.8541372493645862, + "grad_norm": 2.777848852805815, + "learning_rate": 1.6786440084576797e-05, + "loss": 1.02, "step": 6049 }, { - "epoch": 1.2686097714405535, - "grad_norm": 7.82416021284202, - "learning_rate": 1.2940803553684407e-05, - "loss": 0.7829, + "epoch": 0.8542784524145721, + "grad_norm": 4.1494539376252, + "learning_rate": 1.6785320344778325e-05, + "loss": 0.9826, "step": 6050 }, { - "epoch": 1.2688194590060808, - "grad_norm": 6.243479781429856, - "learning_rate": 1.2938639701140816e-05, - "loss": 0.632, + "epoch": 0.854419655464558, + "grad_norm": 3.186119293545814, + "learning_rate": 1.6784200447291937e-05, + "loss": 1.1375, "step": 6051 }, { - "epoch": 1.2690291465716084, - "grad_norm": 6.792160871812018, - "learning_rate": 1.2936475697986842e-05, - "loss": 0.6227, + "epoch": 0.8545608585145439, + "grad_norm": 3.182507906369065, + "learning_rate": 1.678308039214366e-05, + "loss": 0.9583, "step": 6052 }, { - "epoch": 1.2692388341371357, - "grad_norm": 7.777499392545686, - "learning_rate": 1.2934311544333393e-05, - "loss": 0.943, + "epoch": 0.8547020615645298, + "grad_norm": 3.5299493249564713, + "learning_rate": 1.6781960179359525e-05, + "loss": 1.0502, "step": 6053 }, { - "epoch": 1.269448521702663, - "grad_norm": 7.6502452832396814, - "learning_rate": 1.293214724029139e-05, - "loss": 0.958, + "epoch": 0.8548432646145157, + "grad_norm": 4.039997484424329, + "learning_rate": 1.6780839808965566e-05, + "loss": 0.9806, "step": 6054 }, { - "epoch": 1.2696582092681905, - "grad_norm": 6.195129361157676, - "learning_rate": 1.2929982785971756e-05, - "loss": 0.7819, + "epoch": 0.8549844676645015, + "grad_norm": 2.8433575939759796, + "learning_rate": 1.677971928098782e-05, + "loss": 0.7921, "step": 6055 }, { - "epoch": 1.2698678968337178, - "grad_norm": 6.4037675961670235, - "learning_rate": 1.2927818181485421e-05, - "loss": 0.691, + "epoch": 0.8551256707144874, + "grad_norm": 2.9517681261817112, + "learning_rate": 1.6778598595452324e-05, + "loss": 0.7332, "step": 6056 }, { - "epoch": 1.2700775843992451, - "grad_norm": 7.9612141730511174, - "learning_rate": 1.2925653426943327e-05, - "loss": 0.84, + "epoch": 0.8552668737644733, + "grad_norm": 3.384608137580897, + "learning_rate": 1.677747775238512e-05, + "loss": 0.9591, "step": 6057 }, { - "epoch": 1.2702872719647724, - "grad_norm": 8.572304176909645, - "learning_rate": 1.2923488522456425e-05, - "loss": 0.9259, + "epoch": 0.8554080768144592, + "grad_norm": 2.732539801205947, + "learning_rate": 1.6776356751812262e-05, + "loss": 0.8074, "step": 6058 }, { - "epoch": 1.2704969595302997, - "grad_norm": 7.159929219365069, - "learning_rate": 1.2921323468135661e-05, - "loss": 0.7909, + "epoch": 0.8555492798644451, + "grad_norm": 3.2611671988352233, + "learning_rate": 1.67752355937598e-05, + "loss": 0.8809, "step": 6059 }, { - "epoch": 1.2707066470958273, - "grad_norm": 8.513035438239559, - "learning_rate": 1.2919158264092001e-05, - "loss": 0.885, + "epoch": 0.855690482914431, + "grad_norm": 2.9810093378248013, + "learning_rate": 1.677411427825379e-05, + "loss": 0.8528, "step": 6060 }, { - "epoch": 1.2709163346613546, - "grad_norm": 7.623441285484064, - "learning_rate": 1.291699291043642e-05, - "loss": 0.9277, + "epoch": 0.8558316859644168, + "grad_norm": 3.3318931141833383, + "learning_rate": 1.677299280532029e-05, + "loss": 0.8783, "step": 6061 }, { - "epoch": 1.2711260222268819, - "grad_norm": 7.498560927209463, - "learning_rate": 1.2914827407279891e-05, - "loss": 0.9728, + "epoch": 0.8559728890144027, + "grad_norm": 3.9847521448834566, + "learning_rate": 1.677187117498536e-05, + "loss": 1.1644, "step": 6062 }, { - "epoch": 1.2713357097924094, - "grad_norm": 7.974983747964251, - "learning_rate": 1.2912661754733403e-05, - "loss": 0.9296, + "epoch": 0.8561140920643886, + "grad_norm": 3.577017818148409, + "learning_rate": 1.6770749387275067e-05, + "loss": 1.1586, "step": 6063 }, { - "epoch": 1.2715453973579367, - "grad_norm": 7.416491805394539, - "learning_rate": 1.2910495952907952e-05, - "loss": 0.838, + "epoch": 0.8562552951143745, + "grad_norm": 2.9874068938635343, + "learning_rate": 1.6769627442215485e-05, + "loss": 0.7475, "step": 6064 }, { - "epoch": 1.271755084923464, - "grad_norm": 8.001422166957868, - "learning_rate": 1.2908330001914527e-05, - "loss": 0.6973, + "epoch": 0.8563964981643604, + "grad_norm": 9.271714399616094, + "learning_rate": 1.6768505339832686e-05, + "loss": 1.183, "step": 6065 }, { - "epoch": 1.2719647724889915, - "grad_norm": 6.360282028382773, - "learning_rate": 1.2906163901864152e-05, - "loss": 0.6531, + "epoch": 0.8565377012143462, + "grad_norm": 3.3077815242553577, + "learning_rate": 1.6767383080152744e-05, + "loss": 0.866, "step": 6066 }, { - "epoch": 1.2721744600545188, - "grad_norm": 7.452211990230422, - "learning_rate": 1.2903997652867833e-05, - "loss": 0.9653, + "epoch": 0.8566789042643321, + "grad_norm": 3.291177333301306, + "learning_rate": 1.6766260663201742e-05, + "loss": 1.0186, "step": 6067 }, { - "epoch": 1.2723841476200461, - "grad_norm": 7.855459341293004, - "learning_rate": 1.2901831255036598e-05, - "loss": 1.06, + "epoch": 0.856820107314318, + "grad_norm": 3.5821401809428295, + "learning_rate": 1.6765138089005765e-05, + "loss": 0.9695, "step": 6068 }, { - "epoch": 1.2725938351855735, - "grad_norm": 8.16281631007094, - "learning_rate": 1.289966470848148e-05, - "loss": 1.1338, + "epoch": 0.8569613103643039, + "grad_norm": 4.049696103271679, + "learning_rate": 1.67640153575909e-05, + "loss": 0.9434, "step": 6069 }, { - "epoch": 1.2728035227511008, - "grad_norm": 7.150712832277037, - "learning_rate": 1.2897498013313516e-05, - "loss": 0.8942, + "epoch": 0.8571025134142898, + "grad_norm": 5.698188451273226, + "learning_rate": 1.6762892468983237e-05, + "loss": 0.9614, "step": 6070 }, { - "epoch": 1.2730132103166283, - "grad_norm": 6.554744247212476, - "learning_rate": 1.2895331169643752e-05, - "loss": 0.7974, + "epoch": 0.8572437164642757, + "grad_norm": 3.3931653026104938, + "learning_rate": 1.6761769423208877e-05, + "loss": 1.0285, "step": 6071 }, { - "epoch": 1.2732228978821556, - "grad_norm": 6.495659659133183, - "learning_rate": 1.2893164177583246e-05, - "loss": 0.8689, + "epoch": 0.8573849195142615, + "grad_norm": 3.3594930758612476, + "learning_rate": 1.6760646220293916e-05, + "loss": 1.0619, "step": 6072 }, { - "epoch": 1.273432585447683, - "grad_norm": 7.7328977585341665, - "learning_rate": 1.2890997037243054e-05, - "loss": 0.856, + "epoch": 0.8575261225642474, + "grad_norm": 3.47578477216948, + "learning_rate": 1.6759522860264457e-05, + "loss": 0.9932, "step": 6073 }, { - "epoch": 1.2736422730132104, - "grad_norm": 6.800678746219489, - "learning_rate": 1.2888829748734256e-05, - "loss": 0.8856, + "epoch": 0.8576673256142333, + "grad_norm": 3.171957061139327, + "learning_rate": 1.6758399343146602e-05, + "loss": 0.8507, "step": 6074 }, { - "epoch": 1.2738519605787377, - "grad_norm": 7.112425812142012, - "learning_rate": 1.2886662312167917e-05, - "loss": 0.7343, + "epoch": 0.8578085286642192, + "grad_norm": 3.2458969600722343, + "learning_rate": 1.6757275668966467e-05, + "loss": 1.0089, "step": 6075 }, { - "epoch": 1.274061648144265, - "grad_norm": 7.3156525836739865, - "learning_rate": 1.2884494727655131e-05, - "loss": 0.8694, + "epoch": 0.8579497317142051, + "grad_norm": 3.2572139943459164, + "learning_rate": 1.6756151837750167e-05, + "loss": 0.8151, "step": 6076 }, { - "epoch": 1.2742713357097923, - "grad_norm": 6.900085487281027, - "learning_rate": 1.2882326995306986e-05, - "loss": 0.7334, + "epoch": 0.858090934764191, + "grad_norm": 3.603855275924478, + "learning_rate": 1.6755027849523812e-05, + "loss": 0.9957, "step": 6077 }, { - "epoch": 1.2744810232753196, - "grad_norm": 7.349181343720241, - "learning_rate": 1.2880159115234587e-05, - "loss": 0.988, + "epoch": 0.8582321378141767, + "grad_norm": 3.0389813342513574, + "learning_rate": 1.6753903704313527e-05, + "loss": 0.888, "step": 6078 }, { - "epoch": 1.2746907108408472, - "grad_norm": 6.673340383981672, - "learning_rate": 1.287799108754903e-05, - "loss": 0.9803, + "epoch": 0.8583733408641626, + "grad_norm": 3.405328259071886, + "learning_rate": 1.6752779402145442e-05, + "loss": 1.0856, "step": 6079 }, { - "epoch": 1.2749003984063745, - "grad_norm": 6.362015148609689, - "learning_rate": 1.2875822912361446e-05, - "loss": 0.7034, + "epoch": 0.8585145439141485, + "grad_norm": 3.9636419871648476, + "learning_rate": 1.6751654943045672e-05, + "loss": 0.9363, "step": 6080 }, { - "epoch": 1.2751100859719018, - "grad_norm": 6.877849565391634, - "learning_rate": 1.2873654589782947e-05, - "loss": 0.8221, + "epoch": 0.8586557469641344, + "grad_norm": 3.1089049886091376, + "learning_rate": 1.6750530327040363e-05, + "loss": 1.0655, "step": 6081 }, { - "epoch": 1.2753197735374293, - "grad_norm": 6.735173092507871, - "learning_rate": 1.2871486119924665e-05, - "loss": 0.8304, + "epoch": 0.8587969500141203, + "grad_norm": 3.9424144043238725, + "learning_rate": 1.674940555415564e-05, + "loss": 0.9753, "step": 6082 }, { - "epoch": 1.2755294611029566, - "grad_norm": 8.650058965209823, - "learning_rate": 1.286931750289774e-05, - "loss": 0.9, + "epoch": 0.8589381530641061, + "grad_norm": 3.1623035837238636, + "learning_rate": 1.674828062441765e-05, + "loss": 1.0934, "step": 6083 }, { - "epoch": 1.275739148668484, - "grad_norm": 8.100644098662862, - "learning_rate": 1.2867148738813318e-05, - "loss": 0.9384, + "epoch": 0.859079356114092, + "grad_norm": 5.1954162679433775, + "learning_rate": 1.674715553785253e-05, + "loss": 1.0995, "step": 6084 }, { - "epoch": 1.2759488362340115, - "grad_norm": 6.930392269531282, - "learning_rate": 1.2864979827782547e-05, - "loss": 0.8729, + "epoch": 0.8592205591640779, + "grad_norm": 3.175457894432726, + "learning_rate": 1.6746030294486434e-05, + "loss": 0.9036, "step": 6085 }, { - "epoch": 1.2761585237995388, - "grad_norm": 7.1686501662433315, - "learning_rate": 1.2862810769916592e-05, - "loss": 0.8384, + "epoch": 0.8593617622140638, + "grad_norm": 3.161042377726736, + "learning_rate": 1.6744904894345504e-05, + "loss": 0.7929, "step": 6086 }, { - "epoch": 1.276368211365066, - "grad_norm": 7.729322065311478, - "learning_rate": 1.2860641565326622e-05, - "loss": 0.9509, + "epoch": 0.8595029652640497, + "grad_norm": 3.2320082661026643, + "learning_rate": 1.6743779337455896e-05, + "loss": 0.9879, "step": 6087 }, { - "epoch": 1.2765778989305934, - "grad_norm": 7.613119550576094, - "learning_rate": 1.2858472214123806e-05, - "loss": 0.9419, + "epoch": 0.8596441683140356, + "grad_norm": 2.9807276603167674, + "learning_rate": 1.674265362384377e-05, + "loss": 0.9225, "step": 6088 }, { - "epoch": 1.2767875864961207, - "grad_norm": 7.36653954638884, - "learning_rate": 1.2856302716419335e-05, - "loss": 0.7743, + "epoch": 0.8597853713640214, + "grad_norm": 3.6519290063731344, + "learning_rate": 1.6741527753535285e-05, + "loss": 1.0014, "step": 6089 }, { - "epoch": 1.2769972740616482, - "grad_norm": 6.390528093042797, - "learning_rate": 1.2854133072324396e-05, - "loss": 0.7955, + "epoch": 0.8599265744140073, + "grad_norm": 3.4737802403651714, + "learning_rate": 1.6740401726556608e-05, + "loss": 0.8189, "step": 6090 }, { - "epoch": 1.2772069616271755, - "grad_norm": 5.628202163015353, - "learning_rate": 1.2851963281950184e-05, - "loss": 0.7212, + "epoch": 0.8600677774639932, + "grad_norm": 4.038730007206835, + "learning_rate": 1.6739275542933905e-05, + "loss": 1.1654, "step": 6091 }, { - "epoch": 1.2774166491927028, - "grad_norm": 7.256642709037587, - "learning_rate": 1.2849793345407907e-05, - "loss": 0.9678, + "epoch": 0.8602089805139791, + "grad_norm": 3.2774203227422554, + "learning_rate": 1.6738149202693347e-05, + "loss": 1.1676, "step": 6092 }, { - "epoch": 1.2776263367582303, - "grad_norm": 8.203907299432196, - "learning_rate": 1.2847623262808784e-05, - "loss": 0.9656, + "epoch": 0.860350183563965, + "grad_norm": 3.613306478703026, + "learning_rate": 1.6737022705861113e-05, + "loss": 1.0239, "step": 6093 }, { - "epoch": 1.2778360243237576, - "grad_norm": 7.653669620694143, - "learning_rate": 1.2845453034264023e-05, - "loss": 0.9931, + "epoch": 0.8604913866139509, + "grad_norm": 3.6493783776799287, + "learning_rate": 1.6735896052463384e-05, + "loss": 1.1252, "step": 6094 }, { - "epoch": 1.278045711889285, - "grad_norm": 5.2018417891389594, - "learning_rate": 1.2843282659884863e-05, - "loss": 0.6141, + "epoch": 0.8606325896639367, + "grad_norm": 3.873179412488859, + "learning_rate": 1.6734769242526336e-05, + "loss": 1.1744, "step": 6095 }, { - "epoch": 1.2782553994548123, - "grad_norm": 6.491677574933637, - "learning_rate": 1.2841112139782535e-05, - "loss": 0.6638, + "epoch": 0.8607737927139226, + "grad_norm": 3.3223407311056743, + "learning_rate": 1.673364227607616e-05, + "loss": 1.0525, "step": 6096 }, { - "epoch": 1.2784650870203396, - "grad_norm": 7.8962487830287476, - "learning_rate": 1.2838941474068278e-05, - "loss": 1.0162, + "epoch": 0.8609149957639085, + "grad_norm": 3.0565736397021794, + "learning_rate": 1.6732515153139048e-05, + "loss": 0.8606, "step": 6097 }, { - "epoch": 1.278674774585867, - "grad_norm": 6.257543263376296, - "learning_rate": 1.2836770662853351e-05, - "loss": 0.7475, + "epoch": 0.8610561988138944, + "grad_norm": 4.134945252275968, + "learning_rate": 1.673138787374119e-05, + "loss": 1.2448, "step": 6098 }, { - "epoch": 1.2788844621513944, - "grad_norm": 7.291020476653607, - "learning_rate": 1.2834599706249007e-05, - "loss": 0.8212, + "epoch": 0.8611974018638803, + "grad_norm": 2.949351626146648, + "learning_rate": 1.6730260437908782e-05, + "loss": 0.8708, "step": 6099 }, { - "epoch": 1.2790941497169217, - "grad_norm": 8.860685239849312, - "learning_rate": 1.283242860436651e-05, - "loss": 0.9843, + "epoch": 0.8613386049138662, + "grad_norm": 3.5318671757120392, + "learning_rate": 1.672913284566803e-05, + "loss": 1.0523, "step": 6100 }, { - "epoch": 1.2793038372824492, - "grad_norm": 6.262127315021293, - "learning_rate": 1.2830257357317138e-05, - "loss": 0.8299, + "epoch": 0.861479807963852, + "grad_norm": 3.602606330278198, + "learning_rate": 1.6728005097045134e-05, + "loss": 1.1229, "step": 6101 }, { - "epoch": 1.2795135248479765, - "grad_norm": 6.7897875001443015, - "learning_rate": 1.282808596521216e-05, - "loss": 0.7245, + "epoch": 0.8616210110138379, + "grad_norm": 3.849911338314744, + "learning_rate": 1.672687719206631e-05, + "loss": 0.8684, "step": 6102 }, { - "epoch": 1.2797232124135038, - "grad_norm": 7.236661491488742, - "learning_rate": 1.2825914428162878e-05, - "loss": 0.8534, + "epoch": 0.8617622140638238, + "grad_norm": 4.071488764660193, + "learning_rate": 1.6725749130757766e-05, + "loss": 1.2061, "step": 6103 }, { - "epoch": 1.2799328999790314, - "grad_norm": 7.727077872182112, - "learning_rate": 1.282374274628058e-05, - "loss": 0.8479, + "epoch": 0.8619034171138097, + "grad_norm": 4.675815070823948, + "learning_rate": 1.672462091314571e-05, + "loss": 1.1088, "step": 6104 }, { - "epoch": 1.2801425875445587, - "grad_norm": 7.554489709844892, - "learning_rate": 1.2821570919676565e-05, - "loss": 0.9154, + "epoch": 0.8620446201637956, + "grad_norm": 3.5494945156867135, + "learning_rate": 1.672349253925637e-05, + "loss": 1.0842, "step": 6105 }, { - "epoch": 1.280352275110086, - "grad_norm": 7.912321894382789, - "learning_rate": 1.2819398948462147e-05, - "loss": 0.9591, + "epoch": 0.8621858232137815, + "grad_norm": 3.9219828419128335, + "learning_rate": 1.672236400911597e-05, + "loss": 1.0711, "step": 6106 }, { - "epoch": 1.2805619626756133, - "grad_norm": 6.882910961027078, - "learning_rate": 1.2817226832748646e-05, - "loss": 0.7885, + "epoch": 0.8623270262637673, + "grad_norm": 3.4306431927102827, + "learning_rate": 1.6721235322750735e-05, + "loss": 0.9623, "step": 6107 }, { - "epoch": 1.2807716502411406, - "grad_norm": 6.7016453850866515, - "learning_rate": 1.281505457264738e-05, - "loss": 0.8226, + "epoch": 0.8624682293137532, + "grad_norm": 3.982466018882758, + "learning_rate": 1.672010648018689e-05, + "loss": 1.1817, "step": 6108 }, { - "epoch": 1.2809813378066681, - "grad_norm": 7.422305512992405, - "learning_rate": 1.281288216826969e-05, - "loss": 0.8435, + "epoch": 0.8626094323637391, + "grad_norm": 2.9927740312941, + "learning_rate": 1.6718977481450675e-05, + "loss": 0.8796, "step": 6109 }, { - "epoch": 1.2811910253721954, - "grad_norm": 6.648104880251532, - "learning_rate": 1.2810709619726906e-05, - "loss": 0.8304, + "epoch": 0.862750635413725, + "grad_norm": 3.8395106386995823, + "learning_rate": 1.6717848326568327e-05, + "loss": 0.9777, "step": 6110 }, { - "epoch": 1.2814007129377227, - "grad_norm": 5.778646604397303, - "learning_rate": 1.280853692713038e-05, - "loss": 0.6677, + "epoch": 0.8628918384637109, + "grad_norm": 3.4804166702288253, + "learning_rate": 1.671671901556608e-05, + "loss": 0.9834, "step": 6111 }, { - "epoch": 1.2816104005032503, - "grad_norm": 6.710588260429533, - "learning_rate": 1.2806364090591466e-05, - "loss": 0.6831, + "epoch": 0.8630330415136966, + "grad_norm": 2.9115105955432723, + "learning_rate": 1.6715589548470187e-05, + "loss": 0.7626, "step": 6112 }, { - "epoch": 1.2818200880687776, - "grad_norm": 7.879129650124248, - "learning_rate": 1.2804191110221521e-05, - "loss": 0.8493, + "epoch": 0.8631742445636825, + "grad_norm": 3.810005976076049, + "learning_rate": 1.671445992530689e-05, + "loss": 1.0885, "step": 6113 }, { - "epoch": 1.2820297756343049, - "grad_norm": 6.883871276067967, - "learning_rate": 1.2802017986131922e-05, - "loss": 1.0, + "epoch": 0.8633154476136684, + "grad_norm": 3.2895221802070136, + "learning_rate": 1.6713330146102447e-05, + "loss": 1.0593, "step": 6114 }, { - "epoch": 1.2822394631998322, - "grad_norm": 6.494667860298186, - "learning_rate": 1.279984471843404e-05, - "loss": 0.7792, + "epoch": 0.8634566506636543, + "grad_norm": 3.531837170542692, + "learning_rate": 1.6712200210883112e-05, + "loss": 1.0891, "step": 6115 }, { - "epoch": 1.2824491507653595, - "grad_norm": 6.35644516407903, - "learning_rate": 1.2797671307239264e-05, - "loss": 0.593, + "epoch": 0.8635978537136402, + "grad_norm": 3.189103198412703, + "learning_rate": 1.6711070119675138e-05, + "loss": 0.9089, "step": 6116 }, { - "epoch": 1.282658838330887, - "grad_norm": 7.156173154624485, - "learning_rate": 1.2795497752658974e-05, - "loss": 0.8694, + "epoch": 0.8637390567636261, + "grad_norm": 3.2312887887411788, + "learning_rate": 1.6709939872504794e-05, + "loss": 0.9333, "step": 6117 }, { - "epoch": 1.2828685258964143, - "grad_norm": 6.284506067279038, - "learning_rate": 1.2793324054804584e-05, - "loss": 0.6494, + "epoch": 0.8638802598136119, + "grad_norm": 2.914353376719712, + "learning_rate": 1.6708809469398347e-05, + "loss": 0.7367, "step": 6118 }, { - "epoch": 1.2830782134619416, - "grad_norm": 6.769965149791385, - "learning_rate": 1.2791150213787484e-05, - "loss": 0.7165, + "epoch": 0.8640214628635978, + "grad_norm": 3.3414042749116337, + "learning_rate": 1.6707678910382066e-05, + "loss": 0.7874, "step": 6119 }, { - "epoch": 1.2832879010274691, - "grad_norm": 5.818174607031319, - "learning_rate": 1.27889762297191e-05, - "loss": 0.7371, + "epoch": 0.8641626659135837, + "grad_norm": 3.2793259080259576, + "learning_rate": 1.6706548195482222e-05, + "loss": 1.0546, "step": 6120 }, { - "epoch": 1.2834975885929965, - "grad_norm": 6.349746504024223, - "learning_rate": 1.2786802102710842e-05, - "loss": 0.7428, + "epoch": 0.8643038689635696, + "grad_norm": 3.261014368967626, + "learning_rate": 1.6705417324725094e-05, + "loss": 1.0215, "step": 6121 }, { - "epoch": 1.2837072761585238, - "grad_norm": 7.247691257801813, - "learning_rate": 1.2784627832874148e-05, - "loss": 0.9457, + "epoch": 0.8644450720135555, + "grad_norm": 3.489065743662714, + "learning_rate": 1.6704286298136966e-05, + "loss": 1.2098, "step": 6122 }, { - "epoch": 1.2839169637240513, - "grad_norm": 7.162199129832679, - "learning_rate": 1.2782453420320442e-05, - "loss": 0.7769, + "epoch": 0.8645862750635414, + "grad_norm": 3.563452513261642, + "learning_rate": 1.6703155115744118e-05, + "loss": 1.0723, "step": 6123 }, { - "epoch": 1.2841266512895786, - "grad_norm": 9.86637734570439, - "learning_rate": 1.2780278865161177e-05, - "loss": 1.0192, + "epoch": 0.8647274781135272, + "grad_norm": 3.1035699785872226, + "learning_rate": 1.670202377757284e-05, + "loss": 0.8516, "step": 6124 }, { - "epoch": 1.284336338855106, - "grad_norm": 6.877951487785994, - "learning_rate": 1.2778104167507797e-05, - "loss": 0.7039, + "epoch": 0.8648686811635131, + "grad_norm": 3.874686455997525, + "learning_rate": 1.6700892283649426e-05, + "loss": 0.9156, "step": 6125 }, { - "epoch": 1.2845460264206332, - "grad_norm": 7.543852311829568, - "learning_rate": 1.2775929327471759e-05, - "loss": 0.7649, + "epoch": 0.865009884213499, + "grad_norm": 3.1560848951164404, + "learning_rate": 1.6699760634000166e-05, + "loss": 0.8749, "step": 6126 }, { - "epoch": 1.2847557139861605, - "grad_norm": 7.855847615017012, - "learning_rate": 1.2773754345164529e-05, - "loss": 0.7525, + "epoch": 0.8651510872634849, + "grad_norm": 3.682451365047172, + "learning_rate": 1.6698628828651363e-05, + "loss": 1.1906, "step": 6127 }, { - "epoch": 1.284965401551688, - "grad_norm": 6.954972784567074, - "learning_rate": 1.277157922069758e-05, - "loss": 0.87, + "epoch": 0.8652922903134708, + "grad_norm": 3.7129858653629384, + "learning_rate": 1.669749686762932e-05, + "loss": 1.1444, "step": 6128 }, { - "epoch": 1.2851750891172153, - "grad_norm": 6.775680557506945, - "learning_rate": 1.2769403954182383e-05, - "loss": 0.7465, + "epoch": 0.8654334933634567, + "grad_norm": 3.066719762723464, + "learning_rate": 1.6696364750960342e-05, + "loss": 0.7812, "step": 6129 }, { - "epoch": 1.2853847766827426, - "grad_norm": 8.964950437471938, - "learning_rate": 1.2767228545730435e-05, - "loss": 0.947, + "epoch": 0.8655746964134425, + "grad_norm": 4.179279510605568, + "learning_rate": 1.669523247867074e-05, + "loss": 1.1416, "step": 6130 }, { - "epoch": 1.2855944642482702, - "grad_norm": 8.494369027695468, - "learning_rate": 1.2765052995453223e-05, - "loss": 0.8859, + "epoch": 0.8657158994634284, + "grad_norm": 3.225081554794639, + "learning_rate": 1.669410005078682e-05, + "loss": 0.9832, "step": 6131 }, { - "epoch": 1.2858041518137975, - "grad_norm": 8.734689756328109, - "learning_rate": 1.2762877303462248e-05, - "loss": 0.9103, + "epoch": 0.8658571025134143, + "grad_norm": 3.560434730120382, + "learning_rate": 1.6692967467334915e-05, + "loss": 1.0603, "step": 6132 }, { - "epoch": 1.2860138393793248, - "grad_norm": 7.247235156077307, - "learning_rate": 1.2760701469869021e-05, - "loss": 0.8347, + "epoch": 0.8659983055634002, + "grad_norm": 3.4413839665158106, + "learning_rate": 1.6691834728341332e-05, + "loss": 0.9875, "step": 6133 }, { - "epoch": 1.286223526944852, - "grad_norm": 5.8549710878830385, - "learning_rate": 1.2758525494785055e-05, - "loss": 0.7539, + "epoch": 0.8661395086133861, + "grad_norm": 2.8964039482171304, + "learning_rate": 1.6690701833832398e-05, + "loss": 0.7732, "step": 6134 }, { - "epoch": 1.2864332145103794, - "grad_norm": 6.86861172810041, - "learning_rate": 1.2756349378321873e-05, - "loss": 0.6946, + "epoch": 0.866280711663372, + "grad_norm": 3.744634570405144, + "learning_rate": 1.668956878383445e-05, + "loss": 1.125, "step": 6135 }, { - "epoch": 1.286642902075907, - "grad_norm": 6.9489395083889285, - "learning_rate": 1.2754173120591005e-05, - "loss": 0.8046, + "epoch": 0.8664219147133578, + "grad_norm": 4.0361579114896395, + "learning_rate": 1.668843557837381e-05, + "loss": 1.3351, "step": 6136 }, { - "epoch": 1.2868525896414342, - "grad_norm": 7.692398615364792, - "learning_rate": 1.2751996721703987e-05, - "loss": 1.0384, + "epoch": 0.8665631177633437, + "grad_norm": 3.0918469729158597, + "learning_rate": 1.6687302217476808e-05, + "loss": 0.9888, "step": 6137 }, { - "epoch": 1.2870622772069615, - "grad_norm": 6.780551861828205, - "learning_rate": 1.2749820181772365e-05, - "loss": 0.7138, + "epoch": 0.8667043208133296, + "grad_norm": 4.38969457858748, + "learning_rate": 1.6686168701169797e-05, + "loss": 1.2701, "step": 6138 }, { - "epoch": 1.287271964772489, - "grad_norm": 5.9624238111068735, - "learning_rate": 1.274764350090769e-05, - "loss": 0.732, + "epoch": 0.8668455238633155, + "grad_norm": 2.9671297091334092, + "learning_rate": 1.6685035029479114e-05, + "loss": 0.9548, "step": 6139 }, { - "epoch": 1.2874816523380164, - "grad_norm": 7.050752486754102, - "learning_rate": 1.2745466679221518e-05, - "loss": 0.8291, + "epoch": 0.8669867269133014, + "grad_norm": 3.702093809567638, + "learning_rate": 1.66839012024311e-05, + "loss": 1.1245, "step": 6140 }, { - "epoch": 1.2876913399035437, - "grad_norm": 6.030339450243202, - "learning_rate": 1.2743289716825418e-05, - "loss": 0.5257, + "epoch": 0.8671279299632872, + "grad_norm": 3.0508530686611066, + "learning_rate": 1.668276722005211e-05, + "loss": 0.8615, "step": 6141 }, { - "epoch": 1.2879010274690712, - "grad_norm": 5.989484226177406, - "learning_rate": 1.2741112613830962e-05, - "loss": 0.6679, + "epoch": 0.8672691330132731, + "grad_norm": 3.4664240345204225, + "learning_rate": 1.66816330823685e-05, + "loss": 1.333, "step": 6142 }, { - "epoch": 1.2881107150345985, - "grad_norm": 7.534290183116791, - "learning_rate": 1.273893537034973e-05, - "loss": 0.7587, + "epoch": 0.867410336063259, + "grad_norm": 3.1170923116270677, + "learning_rate": 1.6680498789406618e-05, + "loss": 1.0463, "step": 6143 }, { - "epoch": 1.2883204026001258, - "grad_norm": 7.687929943174706, - "learning_rate": 1.2736757986493309e-05, - "loss": 0.7274, + "epoch": 0.8675515391132449, + "grad_norm": 2.772677464869212, + "learning_rate": 1.667936434119283e-05, + "loss": 0.8761, "step": 6144 }, { - "epoch": 1.2885300901656531, - "grad_norm": 7.798303100421033, - "learning_rate": 1.2734580462373299e-05, - "loss": 0.9392, + "epoch": 0.8676927421632308, + "grad_norm": 3.829142119377285, + "learning_rate": 1.6678229737753498e-05, + "loss": 1.1472, "step": 6145 }, { - "epoch": 1.2887397777311804, - "grad_norm": 7.489515855238279, - "learning_rate": 1.273240279810129e-05, - "loss": 0.7213, + "epoch": 0.8678339452132166, + "grad_norm": 3.1825534932746886, + "learning_rate": 1.6677094979114993e-05, + "loss": 0.9769, "step": 6146 }, { - "epoch": 1.288949465296708, - "grad_norm": 6.583220140184706, - "learning_rate": 1.2730224993788904e-05, - "loss": 0.8241, + "epoch": 0.8679751482632024, + "grad_norm": 3.7709957933903873, + "learning_rate": 1.6675960065303684e-05, + "loss": 1.0858, "step": 6147 }, { - "epoch": 1.2891591528622353, - "grad_norm": 8.243555067153068, - "learning_rate": 1.2728047049547749e-05, - "loss": 1.1078, + "epoch": 0.8681163513131883, + "grad_norm": 3.5298875632061986, + "learning_rate": 1.6674824996345947e-05, + "loss": 0.9852, "step": 6148 }, { - "epoch": 1.2893688404277626, - "grad_norm": 8.54262841754231, - "learning_rate": 1.2725868965489452e-05, - "loss": 0.8575, + "epoch": 0.8682575543631742, + "grad_norm": 3.1141362057383963, + "learning_rate": 1.6673689772268157e-05, + "loss": 0.8774, "step": 6149 }, { - "epoch": 1.28957852799329, - "grad_norm": 7.626448356444029, - "learning_rate": 1.2723690741725643e-05, - "loss": 1.0221, + "epoch": 0.8683987574131601, + "grad_norm": 3.715323879230092, + "learning_rate": 1.66725543930967e-05, + "loss": 1.2924, "step": 6150 }, { - "epoch": 1.2897882155588174, - "grad_norm": 7.467976606899438, - "learning_rate": 1.2721512378367962e-05, - "loss": 0.8748, + "epoch": 0.868539960463146, + "grad_norm": 3.122059165368689, + "learning_rate": 1.6671418858857965e-05, + "loss": 1.0271, "step": 6151 }, { - "epoch": 1.2899979031243447, - "grad_norm": 6.404718249216459, - "learning_rate": 1.2719333875528046e-05, - "loss": 0.7069, + "epoch": 0.8686811635131318, + "grad_norm": 3.212771913201258, + "learning_rate": 1.6670283169578333e-05, + "loss": 0.7822, "step": 6152 }, { - "epoch": 1.290207590689872, - "grad_norm": 7.3896180902530695, - "learning_rate": 1.2717155233317557e-05, - "loss": 0.9844, + "epoch": 0.8688223665631177, + "grad_norm": 4.0291150677539775, + "learning_rate": 1.66691473252842e-05, + "loss": 0.9919, "step": 6153 }, { - "epoch": 1.2904172782553995, - "grad_norm": 6.653349139353941, - "learning_rate": 1.2714976451848147e-05, - "loss": 0.7254, + "epoch": 0.8689635696131036, + "grad_norm": 3.4402579336887227, + "learning_rate": 1.6668011326001962e-05, + "loss": 0.99, "step": 6154 }, { - "epoch": 1.2906269658209268, - "grad_norm": 7.098454842492728, - "learning_rate": 1.2712797531231487e-05, - "loss": 0.8049, + "epoch": 0.8691047726630895, + "grad_norm": 4.6638851077472685, + "learning_rate": 1.6666875171758024e-05, + "loss": 1.3781, "step": 6155 }, { - "epoch": 1.2908366533864541, - "grad_norm": 6.965573618403277, - "learning_rate": 1.2710618471579247e-05, - "loss": 0.7915, + "epoch": 0.8692459757130754, + "grad_norm": 3.0642228246860013, + "learning_rate": 1.6665738862578783e-05, + "loss": 0.7877, "step": 6156 }, { - "epoch": 1.2910463409519815, - "grad_norm": 8.366233220298378, - "learning_rate": 1.270843927300311e-05, - "loss": 0.7872, + "epoch": 0.8693871787630613, + "grad_norm": 3.8116689461326434, + "learning_rate": 1.6664602398490653e-05, + "loss": 1.0016, "step": 6157 }, { - "epoch": 1.291256028517509, - "grad_norm": 8.97733366528058, - "learning_rate": 1.270625993561476e-05, - "loss": 0.9534, + "epoch": 0.8695283818130471, + "grad_norm": 3.3748572862791124, + "learning_rate": 1.6663465779520042e-05, + "loss": 0.8441, "step": 6158 }, { - "epoch": 1.2914657160830363, - "grad_norm": 7.951734529966697, - "learning_rate": 1.2704080459525899e-05, - "loss": 0.893, + "epoch": 0.869669584863033, + "grad_norm": 3.677776215052552, + "learning_rate": 1.666232900569336e-05, + "loss": 0.932, "step": 6159 }, { - "epoch": 1.2916754036485636, - "grad_norm": 7.530115672316366, - "learning_rate": 1.270190084484822e-05, - "loss": 0.7286, + "epoch": 0.8698107879130189, + "grad_norm": 3.2630293030495925, + "learning_rate": 1.666119207703703e-05, + "loss": 0.9645, "step": 6160 }, { - "epoch": 1.2918850912140911, - "grad_norm": 8.3191676028206, - "learning_rate": 1.269972109169344e-05, - "loss": 0.8012, + "epoch": 0.8699519909630048, + "grad_norm": 3.8471797987768714, + "learning_rate": 1.6660054993577478e-05, + "loss": 0.9793, "step": 6161 }, { - "epoch": 1.2920947787796184, - "grad_norm": 5.866122689971722, - "learning_rate": 1.269754120017327e-05, - "loss": 0.6695, + "epoch": 0.8700931940129907, + "grad_norm": 3.8806058029924113, + "learning_rate": 1.665891775534112e-05, + "loss": 1.0751, "step": 6162 }, { - "epoch": 1.2923044663451457, - "grad_norm": 9.438116207239645, - "learning_rate": 1.2695361170399434e-05, - "loss": 0.862, + "epoch": 0.8702343970629766, + "grad_norm": 3.61055876628945, + "learning_rate": 1.6657780362354386e-05, + "loss": 0.832, "step": 6163 }, { - "epoch": 1.292514153910673, - "grad_norm": 7.538726566420118, - "learning_rate": 1.2693181002483664e-05, - "loss": 0.7418, + "epoch": 0.8703756001129624, + "grad_norm": 3.7411962708995095, + "learning_rate": 1.6656642814643716e-05, + "loss": 1.0581, "step": 6164 }, { - "epoch": 1.2927238414762003, - "grad_norm": 7.098749012874384, - "learning_rate": 1.2691000696537696e-05, - "loss": 0.7402, + "epoch": 0.8705168031629483, + "grad_norm": 4.0424798529708355, + "learning_rate": 1.6655505112235545e-05, + "loss": 1.2854, "step": 6165 }, { - "epoch": 1.2929335290417279, - "grad_norm": 6.69861885754718, - "learning_rate": 1.2688820252673275e-05, - "loss": 0.9062, + "epoch": 0.8706580062129342, + "grad_norm": 3.1263876317703923, + "learning_rate": 1.6654367255156303e-05, + "loss": 0.9762, "step": 6166 }, { - "epoch": 1.2931432166072552, - "grad_norm": 6.7637693582362015, - "learning_rate": 1.2686639671002152e-05, - "loss": 0.8883, + "epoch": 0.8707992092629201, + "grad_norm": 3.4313831745773005, + "learning_rate": 1.6653229243432442e-05, + "loss": 1.2639, "step": 6167 }, { - "epoch": 1.2933529041727825, - "grad_norm": 7.192430401791591, - "learning_rate": 1.2684458951636089e-05, - "loss": 0.9579, + "epoch": 0.870940412312906, + "grad_norm": 4.337287401905371, + "learning_rate": 1.6652091077090405e-05, + "loss": 1.0013, "step": 6168 }, { - "epoch": 1.29356259173831, - "grad_norm": 8.0021445281955, - "learning_rate": 1.2682278094686849e-05, - "loss": 0.9076, + "epoch": 0.8710816153628919, + "grad_norm": 3.8162229848757225, + "learning_rate": 1.6650952756156645e-05, + "loss": 0.8968, "step": 6169 }, { - "epoch": 1.2937722793038373, - "grad_norm": 6.995822728720604, - "learning_rate": 1.2680097100266201e-05, - "loss": 0.9147, + "epoch": 0.8712228184128777, + "grad_norm": 3.116915704494657, + "learning_rate": 1.664981428065762e-05, + "loss": 0.9348, "step": 6170 }, { - "epoch": 1.2939819668693646, - "grad_norm": 6.614901004069066, - "learning_rate": 1.267791596848593e-05, - "loss": 0.8785, + "epoch": 0.8713640214628636, + "grad_norm": 3.224253361841439, + "learning_rate": 1.664867565061978e-05, + "loss": 1.0271, "step": 6171 }, { - "epoch": 1.2941916544348921, - "grad_norm": 6.191145878951838, - "learning_rate": 1.267573469945782e-05, - "loss": 0.5883, + "epoch": 0.8715052245128495, + "grad_norm": 3.802448027088647, + "learning_rate": 1.6647536866069587e-05, + "loss": 0.897, "step": 6172 }, { - "epoch": 1.2944013420004195, - "grad_norm": 5.692107098234349, - "learning_rate": 1.2673553293293667e-05, - "loss": 0.5437, + "epoch": 0.8716464275628354, + "grad_norm": 4.811224768398756, + "learning_rate": 1.6646397927033507e-05, + "loss": 1.3397, "step": 6173 }, { - "epoch": 1.2946110295659468, - "grad_norm": 6.263618631972242, - "learning_rate": 1.2671371750105272e-05, - "loss": 0.7545, + "epoch": 0.8717876306128213, + "grad_norm": 3.258466040667012, + "learning_rate": 1.6645258833538015e-05, + "loss": 0.8724, "step": 6174 }, { - "epoch": 1.294820717131474, - "grad_norm": 8.84086188893648, - "learning_rate": 1.2669190070004437e-05, - "loss": 0.8851, + "epoch": 0.8719288336628072, + "grad_norm": 3.5506554081928305, + "learning_rate": 1.664411958560957e-05, + "loss": 0.9896, "step": 6175 }, { - "epoch": 1.2950304046970014, - "grad_norm": 5.77569231474779, - "learning_rate": 1.2667008253102987e-05, - "loss": 0.4937, + "epoch": 0.872070036712793, + "grad_norm": 3.270455537782497, + "learning_rate": 1.6642980183274665e-05, + "loss": 0.9597, "step": 6176 }, { - "epoch": 1.295240092262529, - "grad_norm": 5.27394289420634, - "learning_rate": 1.2664826299512733e-05, - "loss": 0.6302, + "epoch": 0.8722112397627789, + "grad_norm": 3.9113333412561153, + "learning_rate": 1.664184062655976e-05, + "loss": 1.0502, "step": 6177 }, { - "epoch": 1.2954497798280562, - "grad_norm": 6.966634249306382, - "learning_rate": 1.2662644209345514e-05, - "loss": 0.6364, + "epoch": 0.8723524428127648, + "grad_norm": 4.529919288588592, + "learning_rate": 1.6640700915491354e-05, + "loss": 1.2265, "step": 6178 }, { - "epoch": 1.2956594673935835, - "grad_norm": 6.656312641528478, - "learning_rate": 1.2660461982713157e-05, - "loss": 0.8662, + "epoch": 0.8724936458627507, + "grad_norm": 3.8357593319019547, + "learning_rate": 1.6639561050095926e-05, + "loss": 1.0841, "step": 6179 }, { - "epoch": 1.295869154959111, - "grad_norm": 6.660210323673568, - "learning_rate": 1.2658279619727513e-05, - "loss": 0.6697, + "epoch": 0.8726348489127365, + "grad_norm": 3.565858855707741, + "learning_rate": 1.6638421030399962e-05, + "loss": 1.1105, "step": 6180 }, { - "epoch": 1.2960788425246383, - "grad_norm": 6.992780659599443, - "learning_rate": 1.2656097120500424e-05, - "loss": 0.638, + "epoch": 0.8727760519627223, + "grad_norm": 4.061910119463994, + "learning_rate": 1.6637280856429964e-05, + "loss": 1.1354, "step": 6181 }, { - "epoch": 1.2962885300901656, - "grad_norm": 7.253678146980893, - "learning_rate": 1.2653914485143754e-05, - "loss": 0.8442, + "epoch": 0.8729172550127082, + "grad_norm": 3.516603525418769, + "learning_rate": 1.6636140528212427e-05, + "loss": 1.0001, "step": 6182 }, { - "epoch": 1.296498217655693, - "grad_norm": 6.780408377707726, - "learning_rate": 1.2651731713769361e-05, - "loss": 0.8438, + "epoch": 0.8730584580626941, + "grad_norm": 3.62786730936123, + "learning_rate": 1.6635000045773843e-05, + "loss": 1.0323, "step": 6183 }, { - "epoch": 1.2967079052212203, - "grad_norm": 8.44902355711107, - "learning_rate": 1.264954880648912e-05, - "loss": 1.0467, + "epoch": 0.87319966111268, + "grad_norm": 3.302388724996381, + "learning_rate": 1.663385940914073e-05, + "loss": 0.9775, "step": 6184 }, { - "epoch": 1.2969175927867478, - "grad_norm": 7.298844215461077, - "learning_rate": 1.2647365763414908e-05, - "loss": 0.8692, + "epoch": 0.8733408641626659, + "grad_norm": 4.270190674313833, + "learning_rate": 1.6632718618339584e-05, + "loss": 1.2565, "step": 6185 }, { - "epoch": 1.297127280352275, - "grad_norm": 7.996619592315084, - "learning_rate": 1.2645182584658611e-05, - "loss": 0.9413, + "epoch": 0.8734820672126518, + "grad_norm": 3.2573198540867874, + "learning_rate": 1.6631577673396925e-05, + "loss": 0.945, "step": 6186 }, { - "epoch": 1.2973369679178024, - "grad_norm": 8.22172170864594, - "learning_rate": 1.2642999270332113e-05, - "loss": 0.9664, + "epoch": 0.8736232702626376, + "grad_norm": 3.9717069534940226, + "learning_rate": 1.6630436574339266e-05, + "loss": 1.181, "step": 6187 }, { - "epoch": 1.29754665548333, - "grad_norm": 6.6840205404660775, - "learning_rate": 1.2640815820547323e-05, - "loss": 0.6972, + "epoch": 0.8737644733126235, + "grad_norm": 3.0512203039525967, + "learning_rate": 1.6629295321193125e-05, + "loss": 0.8565, "step": 6188 }, { - "epoch": 1.2977563430488572, - "grad_norm": 6.021940935070364, - "learning_rate": 1.263863223541614e-05, - "loss": 0.6601, + "epoch": 0.8739056763626094, + "grad_norm": 3.942823204000492, + "learning_rate": 1.662815391398502e-05, + "loss": 1.035, "step": 6189 }, { - "epoch": 1.2979660306143845, - "grad_norm": 7.9848398702695, - "learning_rate": 1.2636448515050477e-05, - "loss": 0.9068, + "epoch": 0.8740468794125953, + "grad_norm": 4.908340785805079, + "learning_rate": 1.6627012352741482e-05, + "loss": 1.4643, "step": 6190 }, { - "epoch": 1.298175718179912, - "grad_norm": 7.849313248200001, - "learning_rate": 1.2634264659562257e-05, - "loss": 0.9521, + "epoch": 0.8741880824625812, + "grad_norm": 3.28615509718307, + "learning_rate": 1.662587063748904e-05, + "loss": 0.992, "step": 6191 }, { - "epoch": 1.2983854057454394, - "grad_norm": 7.042980585495604, - "learning_rate": 1.2632080669063403e-05, - "loss": 0.7827, + "epoch": 0.8743292855125671, + "grad_norm": 3.6132859459871685, + "learning_rate": 1.6624728768254225e-05, + "loss": 0.9488, "step": 6192 }, { - "epoch": 1.2985950933109667, - "grad_norm": 5.8306174754017945, - "learning_rate": 1.2629896543665852e-05, - "loss": 0.6933, + "epoch": 0.8744704885625529, + "grad_norm": 3.0247386337033517, + "learning_rate": 1.6623586745063573e-05, + "loss": 1.0996, "step": 6193 }, { - "epoch": 1.298804780876494, - "grad_norm": 6.567095279000707, - "learning_rate": 1.262771228348154e-05, - "loss": 0.8111, + "epoch": 0.8746116916125388, + "grad_norm": 2.8386863019358963, + "learning_rate": 1.6622444567943627e-05, + "loss": 0.8484, "step": 6194 }, { - "epoch": 1.2990144684420213, - "grad_norm": 8.034754786432387, - "learning_rate": 1.2625527888622416e-05, - "loss": 0.6704, + "epoch": 0.8747528946625247, + "grad_norm": 4.232051628092746, + "learning_rate": 1.6621302236920928e-05, + "loss": 1.3728, "step": 6195 }, { - "epoch": 1.2992241560075488, - "grad_norm": 8.564244070731304, - "learning_rate": 1.2623343359200433e-05, - "loss": 0.9161, + "epoch": 0.8748940977125106, + "grad_norm": 3.8467378085212016, + "learning_rate": 1.662015975202203e-05, + "loss": 0.8706, "step": 6196 }, { - "epoch": 1.2994338435730761, - "grad_norm": 8.158401103285374, - "learning_rate": 1.2621158695327556e-05, - "loss": 0.8502, + "epoch": 0.8750353007624965, + "grad_norm": 3.2642407940941203, + "learning_rate": 1.6619017113273473e-05, + "loss": 0.9245, "step": 6197 }, { - "epoch": 1.2996435311386034, - "grad_norm": 6.6271004349862555, - "learning_rate": 1.2618973897115746e-05, - "loss": 0.7858, + "epoch": 0.8751765038124824, + "grad_norm": 3.2589085965369518, + "learning_rate": 1.6617874320701813e-05, + "loss": 1.1047, "step": 6198 }, { - "epoch": 1.299853218704131, - "grad_norm": 8.191276910140028, - "learning_rate": 1.2616788964676982e-05, - "loss": 0.8722, + "epoch": 0.8753177068624682, + "grad_norm": 5.474735334788356, + "learning_rate": 1.6616731374333622e-05, + "loss": 0.9211, "step": 6199 }, { - "epoch": 1.3000629062696583, - "grad_norm": 7.467146373614771, - "learning_rate": 1.2614603898123248e-05, - "loss": 0.814, + "epoch": 0.8754589099124541, + "grad_norm": 5.154954649488404, + "learning_rate": 1.6615588274195445e-05, + "loss": 1.1485, "step": 6200 }, { - "epoch": 1.3002725938351856, - "grad_norm": 7.7613071979043005, - "learning_rate": 1.2612418697566527e-05, - "loss": 0.9195, + "epoch": 0.87560011296244, + "grad_norm": 3.696517917696646, + "learning_rate": 1.6614445020313854e-05, + "loss": 1.1142, "step": 6201 }, { - "epoch": 1.3004822814007129, - "grad_norm": 7.133067589477989, - "learning_rate": 1.2610233363118815e-05, - "loss": 0.8636, + "epoch": 0.8757413160124259, + "grad_norm": 4.0640408154069405, + "learning_rate": 1.661330161271542e-05, + "loss": 1.1884, "step": 6202 }, { - "epoch": 1.3006919689662402, - "grad_norm": 6.937473894995899, - "learning_rate": 1.2608047894892122e-05, - "loss": 0.8048, + "epoch": 0.8758825190624118, + "grad_norm": 2.7530014081140717, + "learning_rate": 1.661215805142671e-05, + "loss": 0.7921, "step": 6203 }, { - "epoch": 1.3009016565317677, - "grad_norm": 7.002110125166155, - "learning_rate": 1.2605862292998443e-05, - "loss": 0.7577, + "epoch": 0.8760237221123977, + "grad_norm": 3.0952352581142852, + "learning_rate": 1.6611014336474303e-05, + "loss": 0.7491, "step": 6204 }, { - "epoch": 1.301111344097295, - "grad_norm": 7.178718517851093, - "learning_rate": 1.2603676557549809e-05, - "loss": 0.8693, + "epoch": 0.8761649251623835, + "grad_norm": 3.511557754079026, + "learning_rate": 1.6609870467884777e-05, + "loss": 0.9773, "step": 6205 }, { - "epoch": 1.3013210316628223, - "grad_norm": 7.050765313582229, - "learning_rate": 1.260149068865823e-05, - "loss": 0.6457, + "epoch": 0.8763061282123694, + "grad_norm": 3.371115043111173, + "learning_rate": 1.6608726445684715e-05, + "loss": 0.9392, "step": 6206 }, { - "epoch": 1.3015307192283498, - "grad_norm": 6.737620128872615, - "learning_rate": 1.2599304686435745e-05, - "loss": 0.9038, + "epoch": 0.8764473312623553, + "grad_norm": 4.027351241653331, + "learning_rate": 1.6607582269900707e-05, + "loss": 1.2513, "step": 6207 }, { - "epoch": 1.3017404067938771, - "grad_norm": 6.801114865686833, - "learning_rate": 1.2597118550994384e-05, - "loss": 0.7783, + "epoch": 0.8765885343123412, + "grad_norm": 3.2201505817912834, + "learning_rate": 1.6606437940559342e-05, + "loss": 0.8706, "step": 6208 }, { - "epoch": 1.3019500943594045, - "grad_norm": 6.88166367174607, - "learning_rate": 1.2594932282446197e-05, - "loss": 0.9352, + "epoch": 0.8767297373623271, + "grad_norm": 3.624801513001558, + "learning_rate": 1.6605293457687212e-05, + "loss": 0.9893, "step": 6209 }, { - "epoch": 1.302159781924932, - "grad_norm": 6.8814549051690586, - "learning_rate": 1.2592745880903224e-05, - "loss": 0.7024, + "epoch": 0.876870940412313, + "grad_norm": 3.491949512786377, + "learning_rate": 1.6604148821310912e-05, + "loss": 1.0759, "step": 6210 }, { - "epoch": 1.3023694694904593, - "grad_norm": 5.9316108255074305, - "learning_rate": 1.2590559346477533e-05, - "loss": 0.6481, + "epoch": 0.8770121434622988, + "grad_norm": 4.721126249596484, + "learning_rate": 1.6603004031457043e-05, + "loss": 1.3192, "step": 6211 }, { - "epoch": 1.3025791570559866, - "grad_norm": 7.619384966572234, - "learning_rate": 1.258837267928118e-05, - "loss": 0.7787, + "epoch": 0.8771533465122847, + "grad_norm": 3.0614910327264315, + "learning_rate": 1.6601859088152215e-05, + "loss": 0.7274, "step": 6212 }, { - "epoch": 1.302788844621514, - "grad_norm": 7.902218059628242, - "learning_rate": 1.2586185879426239e-05, - "loss": 0.9446, + "epoch": 0.8772945495622706, + "grad_norm": 3.958111122229993, + "learning_rate": 1.6600713991423036e-05, + "loss": 0.8517, "step": 6213 }, { - "epoch": 1.3029985321870412, - "grad_norm": 8.260981574887053, - "learning_rate": 1.2583998947024783e-05, - "loss": 0.939, + "epoch": 0.8774357526122564, + "grad_norm": 3.2474422359233115, + "learning_rate": 1.6599568741296112e-05, + "loss": 1.0641, "step": 6214 }, { - "epoch": 1.3032082197525687, - "grad_norm": 8.948175006028508, - "learning_rate": 1.2581811882188901e-05, - "loss": 1.0613, + "epoch": 0.8775769556622423, + "grad_norm": 3.4350127734173834, + "learning_rate": 1.659842333779806e-05, + "loss": 1.0228, "step": 6215 }, { - "epoch": 1.303417907318096, - "grad_norm": 7.731358091462994, - "learning_rate": 1.2579624685030682e-05, - "loss": 0.8221, + "epoch": 0.8777181587122281, + "grad_norm": 4.329636253840294, + "learning_rate": 1.6597277780955502e-05, + "loss": 1.2182, "step": 6216 }, { - "epoch": 1.3036275948836233, - "grad_norm": 7.9562591511478225, - "learning_rate": 1.2577437355662225e-05, - "loss": 0.8381, + "epoch": 0.877859361762214, + "grad_norm": 2.8523648955721166, + "learning_rate": 1.6596132070795054e-05, + "loss": 0.7745, "step": 6217 }, { - "epoch": 1.3038372824491509, - "grad_norm": 7.931766029245781, - "learning_rate": 1.2575249894195634e-05, - "loss": 0.9988, + "epoch": 0.8780005648121999, + "grad_norm": 4.1008291301997595, + "learning_rate": 1.6594986207343343e-05, + "loss": 1.1768, "step": 6218 }, { - "epoch": 1.3040469700146782, - "grad_norm": 6.916426259421991, - "learning_rate": 1.2573062300743018e-05, - "loss": 0.7585, + "epoch": 0.8781417678621858, + "grad_norm": 3.443255216507718, + "learning_rate": 1.6593840190627007e-05, + "loss": 0.9041, "step": 6219 }, { - "epoch": 1.3042566575802055, - "grad_norm": 7.897788449144269, - "learning_rate": 1.2570874575416494e-05, - "loss": 0.8848, + "epoch": 0.8782829709121717, + "grad_norm": 3.1302332857071873, + "learning_rate": 1.6592694020672667e-05, + "loss": 0.7826, "step": 6220 }, { - "epoch": 1.3044663451457328, - "grad_norm": 5.580257956304535, - "learning_rate": 1.256868671832819e-05, - "loss": 0.5303, + "epoch": 0.8784241739621576, + "grad_norm": 3.582102981258048, + "learning_rate": 1.659154769750697e-05, + "loss": 1.1242, "step": 6221 }, { - "epoch": 1.30467603271126, - "grad_norm": 6.703353118238242, - "learning_rate": 1.2566498729590237e-05, - "loss": 0.7187, + "epoch": 0.8785653770121434, + "grad_norm": 4.042865594327094, + "learning_rate": 1.659040122115655e-05, + "loss": 1.3694, "step": 6222 }, { - "epoch": 1.3048857202767876, - "grad_norm": 5.478876745082544, - "learning_rate": 1.2564310609314774e-05, - "loss": 0.5515, + "epoch": 0.8787065800621293, + "grad_norm": 3.5673801196002475, + "learning_rate": 1.658925459164805e-05, + "loss": 1.0865, "step": 6223 }, { - "epoch": 1.305095407842315, - "grad_norm": 5.963225542703108, - "learning_rate": 1.2562122357613944e-05, - "loss": 0.6011, + "epoch": 0.8788477831121152, + "grad_norm": 5.1780488700183005, + "learning_rate": 1.658810780900812e-05, + "loss": 1.0968, "step": 6224 }, { - "epoch": 1.3053050954078422, - "grad_norm": 7.443856457198042, - "learning_rate": 1.2559933974599899e-05, - "loss": 0.8735, + "epoch": 0.8789889861621011, + "grad_norm": 3.594870583517368, + "learning_rate": 1.6586960873263412e-05, + "loss": 0.9476, "step": 6225 }, { - "epoch": 1.3055147829733698, - "grad_norm": 6.518360011590578, - "learning_rate": 1.25577454603848e-05, - "loss": 0.7934, + "epoch": 0.879130189212087, + "grad_norm": 3.179824230257961, + "learning_rate": 1.6585813784440575e-05, + "loss": 0.7881, "step": 6226 }, { - "epoch": 1.305724470538897, - "grad_norm": 6.859765635274767, - "learning_rate": 1.2555556815080805e-05, - "loss": 0.7529, + "epoch": 0.8792713922620728, + "grad_norm": 3.797779309762246, + "learning_rate": 1.658466654256627e-05, + "loss": 0.9513, "step": 6227 }, { - "epoch": 1.3059341581044244, - "grad_norm": 7.174672001320122, - "learning_rate": 1.2553368038800096e-05, - "loss": 0.8242, + "epoch": 0.8794125953120587, + "grad_norm": 3.3224478700011626, + "learning_rate": 1.6583519147667157e-05, + "loss": 0.9803, "step": 6228 }, { - "epoch": 1.306143845669952, - "grad_norm": 7.334078835896211, - "learning_rate": 1.2551179131654845e-05, - "loss": 0.925, + "epoch": 0.8795537983620446, + "grad_norm": 3.4719079045151906, + "learning_rate": 1.6582371599769908e-05, + "loss": 1.0535, "step": 6229 }, { - "epoch": 1.3063535332354792, - "grad_norm": 8.84104936623884, - "learning_rate": 1.2548990093757239e-05, - "loss": 1.0665, + "epoch": 0.8796950014120305, + "grad_norm": 3.4991666084095825, + "learning_rate": 1.658122389890118e-05, + "loss": 1.1369, "step": 6230 }, { - "epoch": 1.3065632208010065, - "grad_norm": 6.683764237453939, - "learning_rate": 1.2546800925219467e-05, - "loss": 0.8067, + "epoch": 0.8798362044620164, + "grad_norm": 3.724741789072935, + "learning_rate": 1.658007604508765e-05, + "loss": 1.0079, "step": 6231 }, { - "epoch": 1.3067729083665338, - "grad_norm": 7.561359685548278, - "learning_rate": 1.2544611626153736e-05, - "loss": 0.8613, + "epoch": 0.8799774075120023, + "grad_norm": 4.516147467819665, + "learning_rate": 1.6578928038355998e-05, + "loss": 1.0425, "step": 6232 }, { - "epoch": 1.3069825959320611, - "grad_norm": 6.3516183897025185, - "learning_rate": 1.2542422196672237e-05, - "loss": 0.6975, + "epoch": 0.8801186105619881, + "grad_norm": 3.4418366505744973, + "learning_rate": 1.65777798787329e-05, + "loss": 1.0312, "step": 6233 }, { - "epoch": 1.3071922834975886, - "grad_norm": 8.256583682673003, - "learning_rate": 1.25402326368872e-05, - "loss": 1.1666, + "epoch": 0.880259813611974, + "grad_norm": 3.4156719793020582, + "learning_rate": 1.6576631566245037e-05, + "loss": 1.0794, "step": 6234 }, { - "epoch": 1.307401971063116, - "grad_norm": 7.320410852219375, - "learning_rate": 1.253804294691083e-05, - "loss": 0.8327, + "epoch": 0.8804010166619599, + "grad_norm": 3.336970565056934, + "learning_rate": 1.6575483100919094e-05, + "loss": 0.8873, "step": 6235 }, { - "epoch": 1.3076116586286433, - "grad_norm": 6.919852334485194, - "learning_rate": 1.2535853126855355e-05, - "loss": 0.8599, + "epoch": 0.8805422197119458, + "grad_norm": 3.139629455628113, + "learning_rate": 1.6574334482781768e-05, + "loss": 0.8668, "step": 6236 }, { - "epoch": 1.3078213461941708, - "grad_norm": 6.233577757217376, - "learning_rate": 1.2533663176833012e-05, - "loss": 0.6749, + "epoch": 0.8806834227619317, + "grad_norm": 3.6281350729788513, + "learning_rate": 1.6573185711859748e-05, + "loss": 1.1227, "step": 6237 }, { - "epoch": 1.308031033759698, - "grad_norm": 6.912195929497092, - "learning_rate": 1.2531473096956038e-05, - "loss": 0.7492, + "epoch": 0.8808246258119176, + "grad_norm": 3.537772902473077, + "learning_rate": 1.6572036788179728e-05, + "loss": 1.006, "step": 6238 }, { - "epoch": 1.3082407213252254, - "grad_norm": 6.7561583098560325, - "learning_rate": 1.2529282887336673e-05, - "loss": 0.815, + "epoch": 0.8809658288619034, + "grad_norm": 3.74578119799036, + "learning_rate": 1.657088771176841e-05, + "loss": 1.1073, "step": 6239 }, { - "epoch": 1.3084504088907527, - "grad_norm": 6.528387055380406, - "learning_rate": 1.2527092548087177e-05, - "loss": 0.649, + "epoch": 0.8811070319118893, + "grad_norm": 3.524889044419798, + "learning_rate": 1.65697384826525e-05, + "loss": 1.0663, "step": 6240 }, { - "epoch": 1.30866009645628, - "grad_norm": 7.171033651995904, - "learning_rate": 1.2524902079319802e-05, - "loss": 0.9021, + "epoch": 0.8812482349618752, + "grad_norm": 3.0092418835995725, + "learning_rate": 1.6568589100858706e-05, + "loss": 1.0693, "step": 6241 }, { - "epoch": 1.3088697840218075, - "grad_norm": 9.72475951462515, - "learning_rate": 1.2522711481146818e-05, - "loss": 1.1956, + "epoch": 0.8813894380118611, + "grad_norm": 2.9137986035655126, + "learning_rate": 1.6567439566413737e-05, + "loss": 0.9155, "step": 6242 }, { - "epoch": 1.3090794715873348, - "grad_norm": 7.2600096596647505, - "learning_rate": 1.2520520753680497e-05, - "loss": 0.8849, + "epoch": 0.881530641061847, + "grad_norm": 3.4154734288751807, + "learning_rate": 1.6566289879344314e-05, + "loss": 0.9957, "step": 6243 }, { - "epoch": 1.3092891591528621, - "grad_norm": 5.913500284944334, - "learning_rate": 1.2518329897033112e-05, - "loss": 0.6277, + "epoch": 0.8816718441118329, + "grad_norm": 3.2052559608773095, + "learning_rate": 1.6565140039677142e-05, + "loss": 0.8239, "step": 6244 }, { - "epoch": 1.3094988467183897, - "grad_norm": 8.174090455288136, - "learning_rate": 1.2516138911316954e-05, - "loss": 0.8386, + "epoch": 0.8818130471618187, + "grad_norm": 3.8599966111681385, + "learning_rate": 1.6563990047438956e-05, + "loss": 1.1778, "step": 6245 }, { - "epoch": 1.309708534283917, - "grad_norm": 6.4104434727183826, - "learning_rate": 1.251394779664431e-05, - "loss": 0.7419, + "epoch": 0.8819542502118046, + "grad_norm": 4.088854771231255, + "learning_rate": 1.6562839902656476e-05, + "loss": 1.1967, "step": 6246 }, { - "epoch": 1.3099182218494443, - "grad_norm": 6.827930737515556, - "learning_rate": 1.2511756553127486e-05, - "loss": 0.8364, + "epoch": 0.8820954532617905, + "grad_norm": 4.1941405937695135, + "learning_rate": 1.656168960535643e-05, + "loss": 1.169, "step": 6247 }, { - "epoch": 1.3101279094149718, - "grad_norm": 6.023726820027417, - "learning_rate": 1.250956518087878e-05, - "loss": 0.8679, + "epoch": 0.8822366563117763, + "grad_norm": 3.2051947384646247, + "learning_rate": 1.656053915556555e-05, + "loss": 0.9834, "step": 6248 }, { - "epoch": 1.3103375969804991, - "grad_norm": 7.290356646120866, - "learning_rate": 1.2507373680010506e-05, - "loss": 0.8436, + "epoch": 0.8823778593617622, + "grad_norm": 3.030973388127168, + "learning_rate": 1.6559388553310574e-05, + "loss": 0.7882, "step": 6249 }, { - "epoch": 1.3105472845460264, - "grad_norm": 7.045952828052618, - "learning_rate": 1.250518205063498e-05, - "loss": 0.8245, + "epoch": 0.882519062411748, + "grad_norm": 4.068506592870806, + "learning_rate": 1.6558237798618243e-05, + "loss": 1.1806, "step": 6250 }, { - "epoch": 1.3107569721115537, - "grad_norm": 7.929375944688133, - "learning_rate": 1.250299029286453e-05, - "loss": 0.875, + "epoch": 0.8826602654617339, + "grad_norm": 3.6023619537525855, + "learning_rate": 1.6557086891515295e-05, + "loss": 1.1076, "step": 6251 }, { - "epoch": 1.310966659677081, - "grad_norm": 6.2361467197973655, - "learning_rate": 1.2500798406811487e-05, - "loss": 0.6945, + "epoch": 0.8828014685117198, + "grad_norm": 3.0954901244041415, + "learning_rate": 1.655593583202848e-05, + "loss": 0.9574, "step": 6252 }, { - "epoch": 1.3111763472426086, - "grad_norm": 7.88603689770897, - "learning_rate": 1.2498606392588188e-05, - "loss": 0.9239, + "epoch": 0.8829426715617057, + "grad_norm": 2.72457559893778, + "learning_rate": 1.6554784620184546e-05, + "loss": 0.766, "step": 6253 }, { - "epoch": 1.3113860348081359, - "grad_norm": 6.805703368438138, - "learning_rate": 1.2496414250306978e-05, - "loss": 0.8467, + "epoch": 0.8830838746116916, + "grad_norm": 3.9988340871038295, + "learning_rate": 1.6553633256010254e-05, + "loss": 1.2421, "step": 6254 }, { - "epoch": 1.3115957223736632, - "grad_norm": 7.2557971101967, - "learning_rate": 1.2494221980080208e-05, - "loss": 0.7178, + "epoch": 0.8832250776616775, + "grad_norm": 2.8220170000937888, + "learning_rate": 1.655248173953235e-05, + "loss": 0.8059, "step": 6255 }, { - "epoch": 1.3118054099391907, - "grad_norm": 6.126031817438328, - "learning_rate": 1.2492029582020234e-05, - "loss": 0.5402, + "epoch": 0.8833662807116633, + "grad_norm": 3.7668186768315275, + "learning_rate": 1.6551330070777603e-05, + "loss": 1.1064, "step": 6256 }, { - "epoch": 1.312015097504718, - "grad_norm": 9.042391314927801, - "learning_rate": 1.2489837056239425e-05, - "loss": 0.9687, + "epoch": 0.8835074837616492, + "grad_norm": 3.664217130695584, + "learning_rate": 1.6550178249772773e-05, + "loss": 1.0728, "step": 6257 }, { - "epoch": 1.3122247850702453, - "grad_norm": 7.70388123918556, - "learning_rate": 1.2487644402850145e-05, - "loss": 0.755, + "epoch": 0.8836486868116351, + "grad_norm": 3.6203477488565583, + "learning_rate": 1.6549026276544627e-05, + "loss": 1.0805, "step": 6258 }, { - "epoch": 1.3124344726357726, - "grad_norm": 7.957026053713945, - "learning_rate": 1.2485451621964777e-05, - "loss": 0.8996, + "epoch": 0.883789889861621, + "grad_norm": 3.152262362399964, + "learning_rate": 1.654787415111994e-05, + "loss": 0.9683, "step": 6259 }, { - "epoch": 1.3126441602013, - "grad_norm": 5.407750929804625, - "learning_rate": 1.24832587136957e-05, - "loss": 0.6771, + "epoch": 0.8839310929116069, + "grad_norm": 3.9068956326584763, + "learning_rate": 1.6546721873525488e-05, + "loss": 1.0462, "step": 6260 }, { - "epoch": 1.3128538477668275, - "grad_norm": 8.659960773380002, - "learning_rate": 1.248106567815531e-05, - "loss": 0.807, + "epoch": 0.8840722959615928, + "grad_norm": 3.7651160621958146, + "learning_rate": 1.6545569443788047e-05, + "loss": 1.0376, "step": 6261 }, { - "epoch": 1.3130635353323548, - "grad_norm": 10.173007790676033, - "learning_rate": 1.2478872515455998e-05, - "loss": 1.0889, + "epoch": 0.8842134990115786, + "grad_norm": 3.633450733541613, + "learning_rate": 1.65444168619344e-05, + "loss": 0.9889, "step": 6262 }, { - "epoch": 1.313273222897882, - "grad_norm": 9.373569317172365, - "learning_rate": 1.2476679225710173e-05, - "loss": 1.1735, + "epoch": 0.8843547020615645, + "grad_norm": 3.126164312983101, + "learning_rate": 1.6543264127991326e-05, + "loss": 0.9411, "step": 6263 }, { - "epoch": 1.3134829104634096, - "grad_norm": 8.332991228407897, - "learning_rate": 1.2474485809030241e-05, - "loss": 1.0026, + "epoch": 0.8844959051115504, + "grad_norm": 3.230417965774497, + "learning_rate": 1.6542111241985623e-05, + "loss": 0.7864, "step": 6264 }, { - "epoch": 1.313692598028937, - "grad_norm": 7.650601959912915, - "learning_rate": 1.2472292265528619e-05, - "loss": 0.8604, + "epoch": 0.8846371081615363, + "grad_norm": 3.2550130678261016, + "learning_rate": 1.6540958203944078e-05, + "loss": 0.9913, "step": 6265 }, { - "epoch": 1.3139022855944642, - "grad_norm": 7.591348670661808, - "learning_rate": 1.2470098595317731e-05, - "loss": 0.872, + "epoch": 0.8847783112115222, + "grad_norm": 3.429588042849031, + "learning_rate": 1.6539805013893493e-05, + "loss": 0.9504, "step": 6266 }, { - "epoch": 1.3141119731599917, - "grad_norm": 7.620015234230331, - "learning_rate": 1.2467904798510007e-05, - "loss": 0.9644, + "epoch": 0.8849195142615081, + "grad_norm": 3.4760504327165593, + "learning_rate": 1.6538651671860663e-05, + "loss": 0.9265, "step": 6267 }, { - "epoch": 1.314321660725519, - "grad_norm": 6.799257880734605, - "learning_rate": 1.2465710875217878e-05, - "loss": 0.8346, + "epoch": 0.8850607173114939, + "grad_norm": 4.009079652428015, + "learning_rate": 1.653749817787239e-05, + "loss": 1.1381, "step": 6268 }, { - "epoch": 1.3145313482910463, - "grad_norm": 6.465629735286245, - "learning_rate": 1.2463516825553795e-05, - "loss": 0.7088, + "epoch": 0.8852019203614798, + "grad_norm": 3.5665924349938902, + "learning_rate": 1.653634453195548e-05, + "loss": 1.0588, "step": 6269 }, { - "epoch": 1.3147410358565736, - "grad_norm": 8.147248487687248, - "learning_rate": 1.2461322649630198e-05, - "loss": 0.8596, + "epoch": 0.8853431234114657, + "grad_norm": 3.757490811110777, + "learning_rate": 1.653519073413675e-05, + "loss": 1.1374, "step": 6270 }, { - "epoch": 1.314950723422101, - "grad_norm": 6.740167444057646, - "learning_rate": 1.2459128347559545e-05, - "loss": 0.7263, + "epoch": 0.8854843264614516, + "grad_norm": 3.230801478874337, + "learning_rate": 1.653403678444301e-05, + "loss": 0.9248, "step": 6271 }, { - "epoch": 1.3151604109876285, - "grad_norm": 7.608792824182061, - "learning_rate": 1.2456933919454304e-05, - "loss": 0.9543, + "epoch": 0.8856255295114375, + "grad_norm": 3.7232276018239854, + "learning_rate": 1.6532882682901076e-05, + "loss": 1.14, "step": 6272 }, { - "epoch": 1.3153700985531558, - "grad_norm": 9.134980162498588, - "learning_rate": 1.2454739365426933e-05, - "loss": 1.2386, + "epoch": 0.8857667325614234, + "grad_norm": 3.7306297520162564, + "learning_rate": 1.6531728429537766e-05, + "loss": 0.8855, "step": 6273 }, { - "epoch": 1.315579786118683, - "grad_norm": 8.115495131305103, - "learning_rate": 1.2452544685589916e-05, - "loss": 0.9329, + "epoch": 0.8859079356114092, + "grad_norm": 3.080527210910394, + "learning_rate": 1.6530574024379915e-05, + "loss": 0.7426, "step": 6274 }, { - "epoch": 1.3157894736842106, - "grad_norm": 7.003016527845287, - "learning_rate": 1.245034988005573e-05, - "loss": 0.8459, + "epoch": 0.8860491386613951, + "grad_norm": 3.7346279813889955, + "learning_rate": 1.652941946745434e-05, + "loss": 1.0093, "step": 6275 }, { - "epoch": 1.315999161249738, - "grad_norm": 6.16954346933709, - "learning_rate": 1.2448154948936862e-05, - "loss": 0.8558, + "epoch": 0.886190341711381, + "grad_norm": 3.5884784765022526, + "learning_rate": 1.6528264758787876e-05, + "loss": 1.09, "step": 6276 }, { - "epoch": 1.3162088488152652, - "grad_norm": 6.982659948598512, - "learning_rate": 1.2445959892345805e-05, - "loss": 0.8158, + "epoch": 0.8863315447613669, + "grad_norm": 3.635761130255006, + "learning_rate": 1.652710989840736e-05, + "loss": 1.2248, "step": 6277 }, { - "epoch": 1.3164185363807925, - "grad_norm": 6.710069696713367, - "learning_rate": 1.2443764710395063e-05, - "loss": 0.8759, + "epoch": 0.8864727478113528, + "grad_norm": 2.686935252804271, + "learning_rate": 1.652595488633963e-05, + "loss": 0.8449, "step": 6278 }, { - "epoch": 1.31662822394632, - "grad_norm": 7.61940215903079, - "learning_rate": 1.244156940319714e-05, - "loss": 0.7999, + "epoch": 0.8866139508613387, + "grad_norm": 3.2603137955830066, + "learning_rate": 1.6524799722611524e-05, + "loss": 0.9368, "step": 6279 }, { - "epoch": 1.3168379115118474, - "grad_norm": 7.783529227160778, - "learning_rate": 1.2439373970864552e-05, - "loss": 1.0536, + "epoch": 0.8867551539113245, + "grad_norm": 3.9035804986769294, + "learning_rate": 1.6523644407249893e-05, + "loss": 1.1666, "step": 6280 }, { - "epoch": 1.3170475990773747, - "grad_norm": 7.7734975778951405, - "learning_rate": 1.2437178413509816e-05, - "loss": 0.8884, + "epoch": 0.8868963569613104, + "grad_norm": 3.063690458925986, + "learning_rate": 1.652248894028158e-05, + "loss": 0.8948, "step": 6281 }, { - "epoch": 1.317257286642902, - "grad_norm": 5.945537235183072, - "learning_rate": 1.2434982731245462e-05, - "loss": 0.6851, + "epoch": 0.8870375600112962, + "grad_norm": 7.6355713880990015, + "learning_rate": 1.652133332173344e-05, + "loss": 1.0238, "step": 6282 }, { - "epoch": 1.3174669742084295, - "grad_norm": 6.721230408887563, - "learning_rate": 1.2432786924184016e-05, - "loss": 0.8461, + "epoch": 0.8871787630612821, + "grad_norm": 3.2125999286457834, + "learning_rate": 1.6520177551632333e-05, + "loss": 0.9862, "step": 6283 }, { - "epoch": 1.3176766617739568, - "grad_norm": 6.90454831103914, - "learning_rate": 1.2430590992438023e-05, - "loss": 0.7176, + "epoch": 0.887319966111268, + "grad_norm": 3.8834751827599714, + "learning_rate": 1.6519021630005115e-05, + "loss": 1.2326, "step": 6284 }, { - "epoch": 1.3178863493394841, - "grad_norm": 7.320784860157427, - "learning_rate": 1.242839493612002e-05, - "loss": 0.7848, + "epoch": 0.8874611691612538, + "grad_norm": 2.8396215264957454, + "learning_rate": 1.651786555687865e-05, + "loss": 0.7942, "step": 6285 }, { - "epoch": 1.3180960369050116, - "grad_norm": 8.33224399177018, - "learning_rate": 1.2426198755342574e-05, - "loss": 0.9776, + "epoch": 0.8876023722112397, + "grad_norm": 2.8171848274427944, + "learning_rate": 1.6516709332279806e-05, + "loss": 0.6937, "step": 6286 }, { - "epoch": 1.318305724470539, - "grad_norm": 6.085017141732459, - "learning_rate": 1.2424002450218229e-05, - "loss": 0.8089, + "epoch": 0.8877435752612256, + "grad_norm": 3.5458306517057836, + "learning_rate": 1.651555295623545e-05, + "loss": 1.0868, "step": 6287 }, { - "epoch": 1.3185154120360663, - "grad_norm": 6.730832934143617, - "learning_rate": 1.2421806020859552e-05, - "loss": 0.8583, + "epoch": 0.8878847783112115, + "grad_norm": 4.041983846009704, + "learning_rate": 1.6514396428772457e-05, + "loss": 1.0189, "step": 6288 }, { - "epoch": 1.3187250996015936, - "grad_norm": 8.112140020359531, - "learning_rate": 1.241960946737912e-05, - "loss": 0.7246, + "epoch": 0.8880259813611974, + "grad_norm": 3.6233959175061727, + "learning_rate": 1.6513239749917702e-05, + "loss": 1.0256, "step": 6289 }, { - "epoch": 1.3189347871671209, - "grad_norm": 7.18437059551874, - "learning_rate": 1.2417412789889506e-05, - "loss": 0.9671, + "epoch": 0.8881671844111833, + "grad_norm": 3.995777922733096, + "learning_rate": 1.6512082919698072e-05, + "loss": 1.0383, "step": 6290 }, { - "epoch": 1.3191444747326484, - "grad_norm": 9.315549163660037, - "learning_rate": 1.2415215988503291e-05, - "loss": 1.2177, + "epoch": 0.8883083874611691, + "grad_norm": 3.046837862698834, + "learning_rate": 1.6510925938140444e-05, + "loss": 0.9265, "step": 6291 }, { - "epoch": 1.3193541622981757, - "grad_norm": 9.26430064011248, - "learning_rate": 1.241301906333307e-05, - "loss": 1.2192, + "epoch": 0.888449590511155, + "grad_norm": 4.440050379900637, + "learning_rate": 1.650976880527171e-05, + "loss": 1.0376, "step": 6292 }, { - "epoch": 1.319563849863703, - "grad_norm": 5.868347478708639, - "learning_rate": 1.2410822014491438e-05, - "loss": 0.6913, + "epoch": 0.8885907935611409, + "grad_norm": 2.9934852108745553, + "learning_rate": 1.6508611521118762e-05, + "loss": 0.9242, "step": 6293 }, { - "epoch": 1.3197735374292305, - "grad_norm": 6.40220282808585, - "learning_rate": 1.2408624842090994e-05, - "loss": 0.5633, + "epoch": 0.8887319966111268, + "grad_norm": 3.6436000456338444, + "learning_rate": 1.650745408570849e-05, + "loss": 1.1386, "step": 6294 }, { - "epoch": 1.3199832249947578, - "grad_norm": 7.273805561853192, - "learning_rate": 1.2406427546244349e-05, - "loss": 0.9107, + "epoch": 0.8888731996611127, + "grad_norm": 3.431257868082384, + "learning_rate": 1.6506296499067798e-05, + "loss": 1.2067, "step": 6295 }, { - "epoch": 1.3201929125602851, - "grad_norm": 7.1618487126270525, - "learning_rate": 1.2404230127064118e-05, - "loss": 0.9079, + "epoch": 0.8890144027110986, + "grad_norm": 4.428262472203916, + "learning_rate": 1.6505138761223586e-05, + "loss": 1.2847, "step": 6296 }, { - "epoch": 1.3204026001258127, - "grad_norm": 5.863546169625546, - "learning_rate": 1.2402032584662925e-05, - "loss": 0.7765, + "epoch": 0.8891556057610844, + "grad_norm": 3.241582776240133, + "learning_rate": 1.6503980872202757e-05, + "loss": 0.8599, "step": 6297 }, { - "epoch": 1.32061228769134, - "grad_norm": 7.93214834814956, - "learning_rate": 1.2399834919153396e-05, - "loss": 0.7991, + "epoch": 0.8892968088110703, + "grad_norm": 3.4401444635380707, + "learning_rate": 1.650282283203222e-05, + "loss": 1.1032, "step": 6298 }, { - "epoch": 1.3208219752568673, - "grad_norm": 5.172872739625734, - "learning_rate": 1.2397637130648163e-05, - "loss": 0.5715, + "epoch": 0.8894380118610562, + "grad_norm": 4.50436054943897, + "learning_rate": 1.650166464073889e-05, + "loss": 1.2196, "step": 6299 }, { - "epoch": 1.3210316628223946, - "grad_norm": 6.96435165217951, - "learning_rate": 1.239543921925987e-05, - "loss": 0.9448, + "epoch": 0.8895792149110421, + "grad_norm": 3.7664110732326077, + "learning_rate": 1.6500506298349682e-05, + "loss": 1.1037, "step": 6300 }, { - "epoch": 1.321241350387922, - "grad_norm": 7.346502662725581, - "learning_rate": 1.2393241185101162e-05, - "loss": 0.9498, + "epoch": 0.889720417961028, + "grad_norm": 3.1933587658407454, + "learning_rate": 1.6499347804891515e-05, + "loss": 1.0194, "step": 6301 }, { - "epoch": 1.3214510379534494, - "grad_norm": 7.082736286632278, - "learning_rate": 1.2391043028284693e-05, - "loss": 0.8144, + "epoch": 0.8898616210110138, + "grad_norm": 3.279234947720154, + "learning_rate": 1.649818916039131e-05, + "loss": 1.145, "step": 6302 }, { - "epoch": 1.3216607255189767, - "grad_norm": 7.377703071815039, - "learning_rate": 1.238884474892312e-05, - "loss": 1.0032, + "epoch": 0.8900028240609997, + "grad_norm": 3.9515885490145, + "learning_rate": 1.6497030364876e-05, + "loss": 1.1405, "step": 6303 }, { - "epoch": 1.321870413084504, - "grad_norm": 8.853614857485917, - "learning_rate": 1.2386646347129111e-05, - "loss": 1.0391, + "epoch": 0.8901440271109856, + "grad_norm": 3.659629283958127, + "learning_rate": 1.6495871418372503e-05, + "loss": 0.9084, "step": 6304 }, { - "epoch": 1.3220801006500316, - "grad_norm": 8.22072462014297, - "learning_rate": 1.238444782301534e-05, - "loss": 0.8437, + "epoch": 0.8902852301609715, + "grad_norm": 3.6700228729188553, + "learning_rate": 1.6494712320907766e-05, + "loss": 1.0685, "step": 6305 }, { - "epoch": 1.3222897882155589, - "grad_norm": 6.871814652013581, - "learning_rate": 1.2382249176694478e-05, - "loss": 0.9364, + "epoch": 0.8904264332109574, + "grad_norm": 3.1427972040192818, + "learning_rate": 1.6493553072508716e-05, + "loss": 0.894, "step": 6306 }, { - "epoch": 1.3224994757810862, - "grad_norm": 5.9843673399423025, - "learning_rate": 1.2380050408279218e-05, - "loss": 0.7063, + "epoch": 0.8905676362609433, + "grad_norm": 2.854101334893658, + "learning_rate": 1.6492393673202297e-05, + "loss": 0.7566, "step": 6307 }, { - "epoch": 1.3227091633466135, - "grad_norm": 7.544942755081471, - "learning_rate": 1.2377851517882244e-05, - "loss": 1.0531, + "epoch": 0.8907088393109291, + "grad_norm": 3.1010356633771394, + "learning_rate": 1.6491234123015454e-05, + "loss": 0.8636, "step": 6308 }, { - "epoch": 1.3229188509121408, - "grad_norm": 6.516591888122146, - "learning_rate": 1.2375652505616258e-05, - "loss": 0.7674, + "epoch": 0.890850042360915, + "grad_norm": 3.4964583465196473, + "learning_rate": 1.6490074421975137e-05, + "loss": 1.0554, "step": 6309 }, { - "epoch": 1.3231285384776683, - "grad_norm": 5.882246292901434, - "learning_rate": 1.2373453371593958e-05, - "loss": 0.5633, + "epoch": 0.8909912454109009, + "grad_norm": 3.9622664320674446, + "learning_rate": 1.6488914570108287e-05, + "loss": 1.0025, "step": 6310 }, { - "epoch": 1.3233382260431956, - "grad_norm": 8.486226793480947, - "learning_rate": 1.237125411592806e-05, - "loss": 0.9849, + "epoch": 0.8911324484608868, + "grad_norm": 3.7087121384663795, + "learning_rate": 1.6487754567441868e-05, + "loss": 1.0529, "step": 6311 }, { - "epoch": 1.323547913608723, - "grad_norm": 6.134628721869582, - "learning_rate": 1.236905473873127e-05, - "loss": 0.7074, + "epoch": 0.8912736515108727, + "grad_norm": 3.5587988751041504, + "learning_rate": 1.6486594414002836e-05, + "loss": 1.097, "step": 6312 }, { - "epoch": 1.3237576011742505, - "grad_norm": 7.628740572680764, - "learning_rate": 1.236685524011632e-05, - "loss": 0.8129, + "epoch": 0.8914148545608586, + "grad_norm": 3.5219557298608124, + "learning_rate": 1.6485434109818146e-05, + "loss": 1.0799, "step": 6313 }, { - "epoch": 1.3239672887397778, - "grad_norm": 7.077167196447652, - "learning_rate": 1.236465562019593e-05, - "loss": 0.7938, + "epoch": 0.8915560576108444, + "grad_norm": 3.701205319253776, + "learning_rate": 1.6484273654914772e-05, + "loss": 1.0741, "step": 6314 }, { - "epoch": 1.324176976305305, - "grad_norm": 6.952295180426609, - "learning_rate": 1.2362455879082844e-05, - "loss": 0.888, + "epoch": 0.8916972606608303, + "grad_norm": 2.9816190895284223, + "learning_rate": 1.6483113049319676e-05, + "loss": 0.8864, "step": 6315 }, { - "epoch": 1.3243866638708326, - "grad_norm": 6.521558790019405, - "learning_rate": 1.2360256016889793e-05, - "loss": 0.7781, + "epoch": 0.8918384637108161, + "grad_norm": 4.082276936410888, + "learning_rate": 1.6481952293059835e-05, + "loss": 1.3575, "step": 6316 }, { - "epoch": 1.32459635143636, - "grad_norm": 7.056200840472986, - "learning_rate": 1.2358056033729527e-05, - "loss": 0.7345, + "epoch": 0.891979666760802, + "grad_norm": 3.437347889876703, + "learning_rate": 1.6480791386162224e-05, + "loss": 1.2365, "step": 6317 }, { - "epoch": 1.3248060390018872, - "grad_norm": 6.851196852108395, - "learning_rate": 1.23558559297148e-05, - "loss": 0.6554, + "epoch": 0.8921208698107879, + "grad_norm": 3.576584225009612, + "learning_rate": 1.6479630328653814e-05, + "loss": 0.8913, "step": 6318 }, { - "epoch": 1.3250157265674145, - "grad_norm": 9.267435237156828, - "learning_rate": 1.2353655704958373e-05, - "loss": 1.1985, + "epoch": 0.8922620728607737, + "grad_norm": 2.7439594880680938, + "learning_rate": 1.64784691205616e-05, + "loss": 0.781, "step": 6319 }, { - "epoch": 1.3252254141329418, - "grad_norm": 6.879265880065429, - "learning_rate": 1.2351455359573007e-05, - "loss": 0.6786, + "epoch": 0.8924032759107596, + "grad_norm": 4.433358041759848, + "learning_rate": 1.6477307761912555e-05, + "loss": 1.1811, "step": 6320 }, { - "epoch": 1.3254351016984693, - "grad_norm": 7.7026664890493866, - "learning_rate": 1.2349254893671475e-05, - "loss": 0.9577, + "epoch": 0.8925444789607455, + "grad_norm": 4.13092979689268, + "learning_rate": 1.647614625273368e-05, + "loss": 1.1199, "step": 6321 }, { - "epoch": 1.3256447892639966, - "grad_norm": 7.602072685651666, - "learning_rate": 1.2347054307366555e-05, - "loss": 0.863, + "epoch": 0.8926856820107314, + "grad_norm": 4.016729430615833, + "learning_rate": 1.6474984593051965e-05, + "loss": 1.0831, "step": 6322 }, { - "epoch": 1.325854476829524, - "grad_norm": 7.014685597640618, - "learning_rate": 1.2344853600771032e-05, - "loss": 0.661, + "epoch": 0.8928268850607173, + "grad_norm": 3.346769759328968, + "learning_rate": 1.6473822782894398e-05, + "loss": 0.9427, "step": 6323 }, { - "epoch": 1.3260641643950515, - "grad_norm": 5.680576666929746, - "learning_rate": 1.2342652773997694e-05, - "loss": 0.6455, + "epoch": 0.8929680881107032, + "grad_norm": 3.4067678797248937, + "learning_rate": 1.6472660822287987e-05, + "loss": 0.7522, "step": 6324 }, { - "epoch": 1.3262738519605788, - "grad_norm": 6.1321473145104965, - "learning_rate": 1.234045182715934e-05, - "loss": 0.6388, + "epoch": 0.893109291160689, + "grad_norm": 2.715021915462966, + "learning_rate": 1.6471498711259733e-05, + "loss": 0.7023, "step": 6325 }, { - "epoch": 1.326483539526106, - "grad_norm": 7.224141368985771, - "learning_rate": 1.2338250760368768e-05, - "loss": 0.8501, + "epoch": 0.8932504942106749, + "grad_norm": 2.8418434408515703, + "learning_rate": 1.647033644983665e-05, + "loss": 0.827, "step": 6326 }, { - "epoch": 1.3266932270916334, - "grad_norm": 8.731481236114174, - "learning_rate": 1.2336049573738791e-05, - "loss": 1.0735, + "epoch": 0.8933916972606608, + "grad_norm": 2.7713515769696717, + "learning_rate": 1.6469174038045735e-05, + "loss": 0.7298, "step": 6327 }, { - "epoch": 1.3269029146571607, - "grad_norm": 7.569071014910265, - "learning_rate": 1.2333848267382223e-05, - "loss": 0.8619, + "epoch": 0.8935329003106467, + "grad_norm": 3.151798026246386, + "learning_rate": 1.6468011475914015e-05, + "loss": 1.0759, "step": 6328 }, { - "epoch": 1.3271126022226882, - "grad_norm": 8.22828417293366, - "learning_rate": 1.2331646841411879e-05, - "loss": 0.9784, + "epoch": 0.8936741033606326, + "grad_norm": 3.3023395772130857, + "learning_rate": 1.6466848763468496e-05, + "loss": 0.8557, "step": 6329 }, { - "epoch": 1.3273222897882155, - "grad_norm": 8.502076770287367, - "learning_rate": 1.2329445295940593e-05, - "loss": 0.6743, + "epoch": 0.8938153064106185, + "grad_norm": 3.9831663290601513, + "learning_rate": 1.6465685900736204e-05, + "loss": 0.9985, "step": 6330 }, { - "epoch": 1.3275319773537428, - "grad_norm": 6.770356632730843, - "learning_rate": 1.2327243631081196e-05, - "loss": 0.7733, + "epoch": 0.8939565094606043, + "grad_norm": 4.528716139246703, + "learning_rate": 1.646452288774417e-05, + "loss": 0.9884, "step": 6331 }, { - "epoch": 1.3277416649192704, - "grad_norm": 8.349799986331119, - "learning_rate": 1.2325041846946523e-05, - "loss": 1.0666, + "epoch": 0.8940977125105902, + "grad_norm": 3.730476661333404, + "learning_rate": 1.6463359724519413e-05, + "loss": 0.9398, "step": 6332 }, { - "epoch": 1.3279513524847977, - "grad_norm": 7.585850026640918, - "learning_rate": 1.2322839943649427e-05, - "loss": 0.8339, + "epoch": 0.8942389155605761, + "grad_norm": 3.510438103015362, + "learning_rate": 1.6462196411088968e-05, + "loss": 0.8778, "step": 6333 }, { - "epoch": 1.328161040050325, - "grad_norm": 7.225764883975388, - "learning_rate": 1.2320637921302756e-05, - "loss": 0.701, + "epoch": 0.894380118610562, + "grad_norm": 4.173387206457739, + "learning_rate": 1.646103294747987e-05, + "loss": 1.1937, "step": 6334 }, { - "epoch": 1.3283707276158525, - "grad_norm": 8.291193251846932, - "learning_rate": 1.2318435780019363e-05, - "loss": 1.0607, + "epoch": 0.8945213216605479, + "grad_norm": 3.429067510848518, + "learning_rate": 1.6459869333719157e-05, + "loss": 0.9763, "step": 6335 }, { - "epoch": 1.3285804151813798, - "grad_norm": 6.879350407442858, - "learning_rate": 1.2316233519912117e-05, - "loss": 0.7429, + "epoch": 0.8946625247105338, + "grad_norm": 3.429202642859671, + "learning_rate": 1.6458705569833866e-05, + "loss": 1.0889, "step": 6336 }, { - "epoch": 1.3287901027469071, - "grad_norm": 7.037351227203288, - "learning_rate": 1.2314031141093886e-05, - "loss": 0.7068, + "epoch": 0.8948037277605196, + "grad_norm": 3.7146330251530095, + "learning_rate": 1.645754165585105e-05, + "loss": 1.0187, "step": 6337 }, { - "epoch": 1.3289997903124344, - "grad_norm": 8.644403595893284, - "learning_rate": 1.2311828643677543e-05, - "loss": 1.0035, + "epoch": 0.8949449308105055, + "grad_norm": 2.9399779957588024, + "learning_rate": 1.6456377591797754e-05, + "loss": 0.8477, "step": 6338 }, { - "epoch": 1.3292094778779617, - "grad_norm": 5.734301025286374, - "learning_rate": 1.2309626027775973e-05, - "loss": 0.5971, + "epoch": 0.8950861338604914, + "grad_norm": 4.1504427829263975, + "learning_rate": 1.645521337770103e-05, + "loss": 0.9921, "step": 6339 }, { - "epoch": 1.3294191654434893, - "grad_norm": 8.881082823094001, - "learning_rate": 1.2307423293502066e-05, - "loss": 0.972, + "epoch": 0.8952273369104773, + "grad_norm": 4.229539108225094, + "learning_rate": 1.645404901358794e-05, + "loss": 1.1716, "step": 6340 }, { - "epoch": 1.3296288530090166, - "grad_norm": 7.080786453067711, - "learning_rate": 1.2305220440968708e-05, - "loss": 0.83, + "epoch": 0.8953685399604632, + "grad_norm": 3.5251081612203694, + "learning_rate": 1.645288449948553e-05, + "loss": 1.0646, "step": 6341 }, { - "epoch": 1.3298385405745439, - "grad_norm": 7.059360822935252, - "learning_rate": 1.2303017470288812e-05, - "loss": 0.7991, + "epoch": 0.895509743010449, + "grad_norm": 3.4672418999283563, + "learning_rate": 1.645171983542088e-05, + "loss": 0.9936, "step": 6342 }, { - "epoch": 1.3300482281400714, - "grad_norm": 6.174910090915003, - "learning_rate": 1.230081438157527e-05, - "loss": 0.6918, + "epoch": 0.8956509460604349, + "grad_norm": 3.6989111517391886, + "learning_rate": 1.645055502142104e-05, + "loss": 1.0971, "step": 6343 }, { - "epoch": 1.3302579157055987, - "grad_norm": 6.684230746079871, - "learning_rate": 1.2298611174941002e-05, - "loss": 0.7191, + "epoch": 0.8957921491104208, + "grad_norm": 3.0487158756534347, + "learning_rate": 1.644939005751309e-05, + "loss": 0.8226, "step": 6344 }, { - "epoch": 1.330467603271126, - "grad_norm": 8.175134555093182, - "learning_rate": 1.2296407850498925e-05, - "loss": 0.938, + "epoch": 0.8959333521604067, + "grad_norm": 3.706532851992003, + "learning_rate": 1.64482249437241e-05, + "loss": 1.0046, "step": 6345 }, { - "epoch": 1.3306772908366533, - "grad_norm": 6.587376230369217, - "learning_rate": 1.2294204408361962e-05, - "loss": 0.8823, + "epoch": 0.8960745552103926, + "grad_norm": 3.2382411927351065, + "learning_rate": 1.644705968008115e-05, + "loss": 0.9275, "step": 6346 }, { - "epoch": 1.3308869784021806, - "grad_norm": 6.933278242642745, - "learning_rate": 1.2292000848643044e-05, - "loss": 0.9722, + "epoch": 0.8962157582603785, + "grad_norm": 3.718491852024694, + "learning_rate": 1.6445894266611313e-05, + "loss": 1.1884, "step": 6347 }, { - "epoch": 1.3310966659677081, - "grad_norm": 7.88074255954808, - "learning_rate": 1.2289797171455109e-05, - "loss": 0.8897, + "epoch": 0.8963569613103644, + "grad_norm": 3.5715238342309634, + "learning_rate": 1.644472870334168e-05, + "loss": 0.8239, "step": 6348 }, { - "epoch": 1.3313063535332355, - "grad_norm": 6.633974776496631, - "learning_rate": 1.2287593376911096e-05, - "loss": 0.8409, + "epoch": 0.8964981643603502, + "grad_norm": 3.214774016627596, + "learning_rate": 1.644356299029933e-05, + "loss": 0.8592, "step": 6349 }, { - "epoch": 1.3315160410987628, - "grad_norm": 7.255480027582813, - "learning_rate": 1.2285389465123956e-05, - "loss": 0.9073, + "epoch": 0.896639367410336, + "grad_norm": 3.0772126412715934, + "learning_rate": 1.6442397127511366e-05, + "loss": 0.874, "step": 6350 }, { - "epoch": 1.3317257286642903, - "grad_norm": 7.101523480423554, - "learning_rate": 1.2283185436206642e-05, - "loss": 0.5874, + "epoch": 0.8967805704603219, + "grad_norm": 3.494614123521099, + "learning_rate": 1.644123111500487e-05, + "loss": 1.0229, "step": 6351 }, { - "epoch": 1.3319354162298176, - "grad_norm": 7.549834765619955, - "learning_rate": 1.2280981290272112e-05, - "loss": 1.0618, + "epoch": 0.8969217735103078, + "grad_norm": 3.6543780808568127, + "learning_rate": 1.644006495280695e-05, + "loss": 0.9777, "step": 6352 }, { - "epoch": 1.332145103795345, - "grad_norm": 7.029536159181028, - "learning_rate": 1.2278777027433334e-05, - "loss": 0.8477, + "epoch": 0.8970629765602937, + "grad_norm": 3.275269209723654, + "learning_rate": 1.6438898640944695e-05, + "loss": 0.9526, "step": 6353 }, { - "epoch": 1.3323547913608724, - "grad_norm": 7.517146697267649, - "learning_rate": 1.2276572647803284e-05, - "loss": 0.7355, + "epoch": 0.8972041796102795, + "grad_norm": 4.240065361708412, + "learning_rate": 1.6437732179445222e-05, + "loss": 1.3762, "step": 6354 }, { - "epoch": 1.3325644789263997, - "grad_norm": 6.562714521557585, - "learning_rate": 1.2274368151494937e-05, - "loss": 0.8249, + "epoch": 0.8973453826602654, + "grad_norm": 3.7080799987620074, + "learning_rate": 1.6436565568335627e-05, + "loss": 1.0319, "step": 6355 }, { - "epoch": 1.332774166491927, - "grad_norm": 7.206542570712602, - "learning_rate": 1.2272163538621278e-05, - "loss": 0.8267, + "epoch": 0.8974865857102513, + "grad_norm": 3.6766847484700036, + "learning_rate": 1.6435398807643035e-05, + "loss": 0.7831, "step": 6356 }, { - "epoch": 1.3329838540574543, - "grad_norm": 6.2312876764065255, - "learning_rate": 1.2269958809295296e-05, - "loss": 0.6921, + "epoch": 0.8976277887602372, + "grad_norm": 3.348770560170033, + "learning_rate": 1.643423189739455e-05, + "loss": 0.9921, "step": 6357 }, { - "epoch": 1.3331935416229816, - "grad_norm": 6.137754752903873, - "learning_rate": 1.226775396362999e-05, - "loss": 0.798, + "epoch": 0.8977689918102231, + "grad_norm": 3.0691067507615624, + "learning_rate": 1.6433064837617294e-05, + "loss": 0.8643, "step": 6358 }, { - "epoch": 1.3334032291885092, - "grad_norm": 7.2518022117251135, - "learning_rate": 1.2265549001738358e-05, - "loss": 0.812, + "epoch": 0.897910194860209, + "grad_norm": 3.447078341860875, + "learning_rate": 1.6431897628338388e-05, + "loss": 0.8723, "step": 6359 }, { - "epoch": 1.3336129167540365, - "grad_norm": 5.488058748011243, - "learning_rate": 1.2263343923733408e-05, - "loss": 0.5067, + "epoch": 0.8980513979101948, + "grad_norm": 4.739890138777377, + "learning_rate": 1.6430730269584963e-05, + "loss": 1.0103, "step": 6360 }, { - "epoch": 1.3338226043195638, - "grad_norm": 7.215295401337767, - "learning_rate": 1.226113872972816e-05, - "loss": 0.7912, + "epoch": 0.8981926009601807, + "grad_norm": 3.2959118190204126, + "learning_rate": 1.6429562761384142e-05, + "loss": 1.035, "step": 6361 }, { - "epoch": 1.3340322918850913, - "grad_norm": 6.296936005428166, - "learning_rate": 1.2258933419835628e-05, - "loss": 0.6197, + "epoch": 0.8983338040101666, + "grad_norm": 3.5041806149249295, + "learning_rate": 1.642839510376306e-05, + "loss": 0.9645, "step": 6362 }, { - "epoch": 1.3342419794506186, - "grad_norm": 7.106974057993462, - "learning_rate": 1.2256727994168843e-05, - "loss": 0.7379, + "epoch": 0.8984750070601525, + "grad_norm": 3.7831602354830043, + "learning_rate": 1.642722729674885e-05, + "loss": 1.1624, "step": 6363 }, { - "epoch": 1.334451667016146, - "grad_norm": 5.747317717355062, - "learning_rate": 1.2254522452840828e-05, - "loss": 0.6364, + "epoch": 0.8986162101101384, + "grad_norm": 5.190905920885287, + "learning_rate": 1.6426059340368653e-05, + "loss": 0.9601, "step": 6364 }, { - "epoch": 1.3346613545816732, - "grad_norm": 8.347439591305442, - "learning_rate": 1.2252316795964636e-05, - "loss": 0.8809, + "epoch": 0.8987574131601243, + "grad_norm": 3.5973357315364494, + "learning_rate": 1.642489123464962e-05, + "loss": 1.1092, "step": 6365 }, { - "epoch": 1.3348710421472005, - "grad_norm": 7.409370748224249, - "learning_rate": 1.2250111023653295e-05, - "loss": 0.6623, + "epoch": 0.8988986162101101, + "grad_norm": 3.37458317385293, + "learning_rate": 1.6423722979618883e-05, + "loss": 1.0815, "step": 6366 }, { - "epoch": 1.335080729712728, - "grad_norm": 7.391833095726885, - "learning_rate": 1.2247905136019865e-05, - "loss": 0.7512, + "epoch": 0.899039819260096, + "grad_norm": 3.930104267676092, + "learning_rate": 1.6422554575303594e-05, + "loss": 1.1814, "step": 6367 }, { - "epoch": 1.3352904172782554, - "grad_norm": 6.626935521070651, - "learning_rate": 1.2245699133177395e-05, - "loss": 0.6602, + "epoch": 0.8991810223100819, + "grad_norm": 3.149866409520708, + "learning_rate": 1.6421386021730915e-05, + "loss": 0.9405, "step": 6368 }, { - "epoch": 1.3355001048437827, - "grad_norm": 7.3282605346253025, - "learning_rate": 1.2243493015238954e-05, - "loss": 0.8294, + "epoch": 0.8993222253600678, + "grad_norm": 3.4876782820594414, + "learning_rate": 1.6420217318928e-05, + "loss": 1.039, "step": 6369 }, { - "epoch": 1.3357097924093102, - "grad_norm": 6.415595360205275, - "learning_rate": 1.2241286782317601e-05, - "loss": 0.6436, + "epoch": 0.8994634284100537, + "grad_norm": 3.103993832696666, + "learning_rate": 1.6419048466922004e-05, + "loss": 0.8383, "step": 6370 }, { - "epoch": 1.3359194799748375, - "grad_norm": 7.417513561947137, - "learning_rate": 1.2239080434526414e-05, - "loss": 0.7973, + "epoch": 0.8996046314600395, + "grad_norm": 3.3437203932875077, + "learning_rate": 1.6417879465740094e-05, + "loss": 0.9043, "step": 6371 }, { - "epoch": 1.3361291675403648, - "grad_norm": 6.453143399047254, - "learning_rate": 1.2236873971978472e-05, - "loss": 0.7547, + "epoch": 0.8997458345100254, + "grad_norm": 3.272905173825727, + "learning_rate": 1.6416710315409437e-05, + "loss": 0.9486, "step": 6372 }, { - "epoch": 1.3363388551058923, - "grad_norm": 8.321888873040013, - "learning_rate": 1.2234667394786859e-05, - "loss": 0.7975, + "epoch": 0.8998870375600113, + "grad_norm": 3.6542381565208717, + "learning_rate": 1.6415541015957207e-05, + "loss": 1.0517, "step": 6373 }, { - "epoch": 1.3365485426714196, - "grad_norm": 6.716772455735117, - "learning_rate": 1.2232460703064664e-05, - "loss": 0.9022, + "epoch": 0.9000282406099972, + "grad_norm": 3.885777518290856, + "learning_rate": 1.641437156741057e-05, + "loss": 1.2471, "step": 6374 }, { - "epoch": 1.336758230236947, - "grad_norm": 6.724140798235005, - "learning_rate": 1.223025389692499e-05, - "loss": 0.7211, + "epoch": 0.9001694436599831, + "grad_norm": 4.535231305909152, + "learning_rate": 1.641320196979671e-05, + "loss": 1.2171, "step": 6375 }, { - "epoch": 1.3369679178024743, - "grad_norm": 7.869258431892303, - "learning_rate": 1.2228046976480932e-05, - "loss": 0.9175, + "epoch": 0.900310646709969, + "grad_norm": 2.824397457729353, + "learning_rate": 1.6412032223142806e-05, + "loss": 0.8448, "step": 6376 }, { - "epoch": 1.3371776053680016, - "grad_norm": 7.924518384950294, - "learning_rate": 1.2225839941845603e-05, - "loss": 0.7832, + "epoch": 0.9004518497599548, + "grad_norm": 3.541347898156463, + "learning_rate": 1.641086232747604e-05, + "loss": 1.1564, "step": 6377 }, { - "epoch": 1.337387292933529, - "grad_norm": 8.510591336006046, - "learning_rate": 1.2223632793132116e-05, - "loss": 0.7884, + "epoch": 0.9005930528099407, + "grad_norm": 3.0358873033964446, + "learning_rate": 1.6409692282823604e-05, + "loss": 0.9569, "step": 6378 }, { - "epoch": 1.3375969804990564, - "grad_norm": 9.622231681010305, - "learning_rate": 1.2221425530453595e-05, - "loss": 1.0785, + "epoch": 0.9007342558599266, + "grad_norm": 4.063892402208464, + "learning_rate": 1.6408522089212685e-05, + "loss": 1.1203, "step": 6379 }, { - "epoch": 1.3378066680645837, - "grad_norm": 7.931903944174604, - "learning_rate": 1.221921815392316e-05, - "loss": 0.7796, + "epoch": 0.9008754589099125, + "grad_norm": 3.2849444068540845, + "learning_rate": 1.6407351746670484e-05, + "loss": 0.9515, "step": 6380 }, { - "epoch": 1.3380163556301112, - "grad_norm": 6.775823015907663, - "learning_rate": 1.2217010663653945e-05, - "loss": 0.6253, + "epoch": 0.9010166619598984, + "grad_norm": 3.397551892032349, + "learning_rate": 1.640618125522419e-05, + "loss": 0.9084, "step": 6381 }, { - "epoch": 1.3382260431956385, - "grad_norm": 7.0558142876389605, - "learning_rate": 1.2214803059759088e-05, - "loss": 0.918, + "epoch": 0.9011578650098843, + "grad_norm": 4.052224756407713, + "learning_rate": 1.6405010614901017e-05, + "loss": 1.1574, "step": 6382 }, { - "epoch": 1.3384357307611658, - "grad_norm": 7.522407203709215, - "learning_rate": 1.2212595342351734e-05, - "loss": 0.9256, + "epoch": 0.9012990680598701, + "grad_norm": 3.600648772363036, + "learning_rate": 1.640383982572816e-05, + "loss": 1.1406, "step": 6383 }, { - "epoch": 1.3386454183266931, - "grad_norm": 7.931084352932363, - "learning_rate": 1.2210387511545032e-05, - "loss": 0.8428, + "epoch": 0.901440271109856, + "grad_norm": 2.8889817916854654, + "learning_rate": 1.6402668887732833e-05, + "loss": 0.8786, "step": 6384 }, { - "epoch": 1.3388551058922205, - "grad_norm": 6.371398574382828, - "learning_rate": 1.2208179567452134e-05, - "loss": 0.6863, + "epoch": 0.9015814741598418, + "grad_norm": 2.993633294464633, + "learning_rate": 1.6401497800942246e-05, + "loss": 0.7332, "step": 6385 }, { - "epoch": 1.339064793457748, - "grad_norm": 9.453617554040981, - "learning_rate": 1.2205971510186208e-05, - "loss": 1.0095, + "epoch": 0.9017226772098277, + "grad_norm": 3.7582617790608843, + "learning_rate": 1.6400326565383614e-05, + "loss": 1.0162, "step": 6386 }, { - "epoch": 1.3392744810232753, - "grad_norm": 7.132838109713118, - "learning_rate": 1.2203763339860408e-05, - "loss": 0.8155, + "epoch": 0.9018638802598136, + "grad_norm": 3.896201774926858, + "learning_rate": 1.6399155181084156e-05, + "loss": 1.2324, "step": 6387 }, { - "epoch": 1.3394841685888026, - "grad_norm": 6.8325518687272275, - "learning_rate": 1.2201555056587922e-05, - "loss": 0.7387, + "epoch": 0.9020050833097994, + "grad_norm": 4.690828935486476, + "learning_rate": 1.6397983648071093e-05, + "loss": 1.3361, "step": 6388 }, { - "epoch": 1.3396938561543301, - "grad_norm": 6.710857920457937, - "learning_rate": 1.2199346660481919e-05, - "loss": 0.7284, + "epoch": 0.9021462863597853, + "grad_norm": 3.524546553263179, + "learning_rate": 1.639681196637166e-05, + "loss": 0.9704, "step": 6389 }, { - "epoch": 1.3399035437198574, - "grad_norm": 7.065039756417791, - "learning_rate": 1.2197138151655584e-05, - "loss": 0.7537, + "epoch": 0.9022874894097712, + "grad_norm": 3.0796935274743493, + "learning_rate": 1.6395640136013073e-05, + "loss": 0.8122, "step": 6390 }, { - "epoch": 1.3401132312853847, - "grad_norm": 7.1288391755488965, - "learning_rate": 1.2194929530222107e-05, - "loss": 0.791, + "epoch": 0.9024286924597571, + "grad_norm": 3.901693627711233, + "learning_rate": 1.6394468157022574e-05, + "loss": 1.0836, "step": 6391 }, { - "epoch": 1.3403229188509123, - "grad_norm": 6.374731983833911, - "learning_rate": 1.2192720796294688e-05, - "loss": 0.6228, + "epoch": 0.902569895509743, + "grad_norm": 3.0025643110490106, + "learning_rate": 1.6393296029427395e-05, + "loss": 0.9572, "step": 6392 }, { - "epoch": 1.3405326064164396, - "grad_norm": 8.434942651550807, - "learning_rate": 1.219051194998652e-05, - "loss": 0.808, + "epoch": 0.9027110985597289, + "grad_norm": 2.7189192621917915, + "learning_rate": 1.6392123753254777e-05, + "loss": 0.718, "step": 6393 }, { - "epoch": 1.3407422939819669, - "grad_norm": 5.737027653113215, - "learning_rate": 1.2188302991410821e-05, - "loss": 0.5594, + "epoch": 0.9028523016097147, + "grad_norm": 3.1977006042337, + "learning_rate": 1.6390951328531966e-05, + "loss": 0.9446, "step": 6394 }, { - "epoch": 1.3409519815474942, - "grad_norm": 6.47801744183265, - "learning_rate": 1.2186093920680794e-05, - "loss": 0.5943, + "epoch": 0.9029935046597006, + "grad_norm": 3.8478735722850828, + "learning_rate": 1.6389778755286204e-05, + "loss": 1.0463, "step": 6395 }, { - "epoch": 1.3411616691130215, - "grad_norm": 6.630583631122121, - "learning_rate": 1.2183884737909664e-05, - "loss": 0.689, + "epoch": 0.9031347077096865, + "grad_norm": 3.5958454306218717, + "learning_rate": 1.6388606033544745e-05, + "loss": 1.2908, "step": 6396 }, { - "epoch": 1.341371356678549, - "grad_norm": 7.937045736014091, - "learning_rate": 1.2181675443210654e-05, - "loss": 0.768, + "epoch": 0.9032759107596724, + "grad_norm": 3.609194486480678, + "learning_rate": 1.638743316333484e-05, + "loss": 0.9803, "step": 6397 }, { - "epoch": 1.3415810442440763, - "grad_norm": 9.476028501241416, - "learning_rate": 1.2179466036696994e-05, - "loss": 1.0816, + "epoch": 0.9034171138096583, + "grad_norm": 2.951844735983867, + "learning_rate": 1.6386260144683744e-05, + "loss": 0.8944, "step": 6398 }, { - "epoch": 1.3417907318096036, - "grad_norm": 8.727766426721434, - "learning_rate": 1.2177256518481915e-05, - "loss": 1.1069, + "epoch": 0.9035583168596442, + "grad_norm": 3.462846387715468, + "learning_rate": 1.6385086977618724e-05, + "loss": 0.9576, "step": 6399 }, { - "epoch": 1.3420004193751311, - "grad_norm": 9.04448713877718, - "learning_rate": 1.2175046888678668e-05, - "loss": 0.9713, + "epoch": 0.90369951990963, + "grad_norm": 3.3113407718054737, + "learning_rate": 1.638391366216704e-05, + "loss": 0.909, "step": 6400 }, { - "epoch": 1.3422101069406585, - "grad_norm": 6.3406632649384305, - "learning_rate": 1.2172837147400493e-05, - "loss": 0.8137, + "epoch": 0.9038407229596159, + "grad_norm": 5.592687748915697, + "learning_rate": 1.638274019835596e-05, + "loss": 0.8976, "step": 6401 }, { - "epoch": 1.3424197945061858, - "grad_norm": 6.503425996822555, - "learning_rate": 1.217062729476065e-05, - "loss": 0.5634, + "epoch": 0.9039819260096018, + "grad_norm": 3.292297921098729, + "learning_rate": 1.6381566586212752e-05, + "loss": 0.8168, "step": 6402 }, { - "epoch": 1.342629482071713, - "grad_norm": 7.485520643998392, - "learning_rate": 1.2168417330872386e-05, - "loss": 0.6971, + "epoch": 0.9041231290595877, + "grad_norm": 2.995104054977495, + "learning_rate": 1.6380392825764693e-05, + "loss": 0.9421, "step": 6403 }, { - "epoch": 1.3428391696372406, - "grad_norm": 6.900948390990102, - "learning_rate": 1.2166207255848978e-05, - "loss": 0.8474, + "epoch": 0.9042643321095736, + "grad_norm": 3.967572988229277, + "learning_rate": 1.637921891703906e-05, + "loss": 1.336, "step": 6404 }, { - "epoch": 1.343048857202768, - "grad_norm": 8.168064706358221, - "learning_rate": 1.216399706980369e-05, - "loss": 0.9946, + "epoch": 0.9044055351595595, + "grad_norm": 3.3793536427564352, + "learning_rate": 1.6378044860063135e-05, + "loss": 0.9428, "step": 6405 }, { - "epoch": 1.3432585447682952, - "grad_norm": 6.172145807535987, - "learning_rate": 1.21617867728498e-05, - "loss": 0.6357, + "epoch": 0.9045467382095453, + "grad_norm": 3.4864630281757933, + "learning_rate": 1.63768706548642e-05, + "loss": 1.0383, "step": 6406 }, { - "epoch": 1.3434682323338225, - "grad_norm": 6.728455847487092, - "learning_rate": 1.2159576365100584e-05, - "loss": 0.6908, + "epoch": 0.9046879412595312, + "grad_norm": 3.269324864408698, + "learning_rate": 1.637569630146955e-05, + "loss": 0.9502, "step": 6407 }, { - "epoch": 1.34367791989935, - "grad_norm": 6.9947057828290164, - "learning_rate": 1.2157365846669337e-05, - "loss": 0.8733, + "epoch": 0.9048291443095171, + "grad_norm": 3.932156779301153, + "learning_rate": 1.6374521799906468e-05, + "loss": 1.1213, "step": 6408 }, { - "epoch": 1.3438876074648773, - "grad_norm": 8.27785496575513, - "learning_rate": 1.215515521766935e-05, - "loss": 0.9759, + "epoch": 0.904970347359503, + "grad_norm": 3.488737161696404, + "learning_rate": 1.6373347150202252e-05, + "loss": 1.0736, "step": 6409 }, { - "epoch": 1.3440972950304046, - "grad_norm": 6.430511938133592, - "learning_rate": 1.215294447821392e-05, - "loss": 0.6646, + "epoch": 0.9051115504094889, + "grad_norm": 3.45691299305735, + "learning_rate": 1.63721723523842e-05, + "loss": 1.323, "step": 6410 }, { - "epoch": 1.3443069825959322, - "grad_norm": 5.14303280616504, - "learning_rate": 1.215073362841635e-05, - "loss": 0.5087, + "epoch": 0.9052527534594748, + "grad_norm": 4.123122204528794, + "learning_rate": 1.6370997406479617e-05, + "loss": 1.088, "step": 6411 }, { - "epoch": 1.3445166701614595, - "grad_norm": 6.680342091727118, - "learning_rate": 1.214852266838995e-05, - "loss": 0.7957, + "epoch": 0.9053939565094606, + "grad_norm": 2.932696925657219, + "learning_rate": 1.6369822312515805e-05, + "loss": 0.9449, "step": 6412 }, { - "epoch": 1.3447263577269868, - "grad_norm": 8.794716128425481, - "learning_rate": 1.2146311598248037e-05, - "loss": 0.8562, + "epoch": 0.9055351595594465, + "grad_norm": 3.5924038027489833, + "learning_rate": 1.6368647070520073e-05, + "loss": 0.9957, "step": 6413 }, { - "epoch": 1.344936045292514, - "grad_norm": 6.686944028909398, - "learning_rate": 1.2144100418103933e-05, - "loss": 0.6927, + "epoch": 0.9056763626094324, + "grad_norm": 3.0452533661273677, + "learning_rate": 1.6367471680519734e-05, + "loss": 0.7521, "step": 6414 }, { - "epoch": 1.3451457328580414, - "grad_norm": 7.705282594170068, - "learning_rate": 1.2141889128070965e-05, - "loss": 0.9205, + "epoch": 0.9058175656594183, + "grad_norm": 3.430979167473585, + "learning_rate": 1.63662961425421e-05, + "loss": 0.9996, "step": 6415 }, { - "epoch": 1.345355420423569, - "grad_norm": 7.9855756472785, - "learning_rate": 1.2139677728262462e-05, - "loss": 0.8751, + "epoch": 0.9059587687094042, + "grad_norm": 3.2118397964166343, + "learning_rate": 1.6365120456614498e-05, + "loss": 0.8012, "step": 6416 }, { - "epoch": 1.3455651079890962, - "grad_norm": 6.169298783562788, - "learning_rate": 1.2137466218791767e-05, - "loss": 0.6533, + "epoch": 0.90609997175939, + "grad_norm": 3.4687346856841765, + "learning_rate": 1.6363944622764242e-05, + "loss": 0.9832, "step": 6417 }, { - "epoch": 1.3457747955546235, - "grad_norm": 7.866182801275056, - "learning_rate": 1.2135254599772219e-05, - "loss": 0.9997, + "epoch": 0.9062411748093759, + "grad_norm": 3.1850841632678533, + "learning_rate": 1.6362768641018662e-05, + "loss": 0.8534, "step": 6418 }, { - "epoch": 1.345984483120151, - "grad_norm": 7.112278664963097, - "learning_rate": 1.213304287131717e-05, - "loss": 0.7735, + "epoch": 0.9063823778593617, + "grad_norm": 3.8570785053852004, + "learning_rate": 1.6361592511405087e-05, + "loss": 1.1803, "step": 6419 }, { - "epoch": 1.3461941706856784, - "grad_norm": 8.535440582934076, - "learning_rate": 1.2130831033539974e-05, - "loss": 0.8853, + "epoch": 0.9065235809093476, + "grad_norm": 3.798911937195144, + "learning_rate": 1.636041623395085e-05, + "loss": 1.1614, "step": 6420 }, { - "epoch": 1.3464038582512057, - "grad_norm": 6.795340600781241, - "learning_rate": 1.2128619086553994e-05, - "loss": 0.804, + "epoch": 0.9066647839593335, + "grad_norm": 3.2804090491860083, + "learning_rate": 1.6359239808683284e-05, + "loss": 0.9358, "step": 6421 }, { - "epoch": 1.3466135458167332, - "grad_norm": 8.144412468197274, - "learning_rate": 1.2126407030472588e-05, - "loss": 0.9034, + "epoch": 0.9068059870093194, + "grad_norm": 3.242869213817217, + "learning_rate": 1.6358063235629733e-05, + "loss": 0.958, "step": 6422 }, { - "epoch": 1.3468232333822605, - "grad_norm": 6.560874991517755, - "learning_rate": 1.2124194865409141e-05, - "loss": 0.8112, + "epoch": 0.9069471900593052, + "grad_norm": 3.610011364677211, + "learning_rate": 1.635688651481754e-05, + "loss": 0.9925, "step": 6423 }, { - "epoch": 1.3470329209477878, - "grad_norm": 7.879052556797238, - "learning_rate": 1.212198259147702e-05, - "loss": 0.8889, + "epoch": 0.9070883931092911, + "grad_norm": 3.5019880820239147, + "learning_rate": 1.6355709646274048e-05, + "loss": 0.937, "step": 6424 }, { - "epoch": 1.3472426085133151, - "grad_norm": 7.184043146537791, - "learning_rate": 1.211977020878961e-05, - "loss": 0.7173, + "epoch": 0.907229596159277, + "grad_norm": 3.374026007544711, + "learning_rate": 1.6354532630026608e-05, + "loss": 1.0366, "step": 6425 }, { - "epoch": 1.3474522960788424, - "grad_norm": 6.831071393072331, - "learning_rate": 1.21175577174603e-05, - "loss": 0.7138, + "epoch": 0.9073707992092629, + "grad_norm": 3.454867641995901, + "learning_rate": 1.6353355466102575e-05, + "loss": 1.0522, "step": 6426 }, { - "epoch": 1.34766198364437, - "grad_norm": 7.102510532461891, - "learning_rate": 1.2115345117602487e-05, - "loss": 0.8524, + "epoch": 0.9075120022592488, + "grad_norm": 3.074664317801325, + "learning_rate": 1.63521781545293e-05, + "loss": 0.7329, "step": 6427 }, { - "epoch": 1.3478716712098973, - "grad_norm": 7.455383826442386, - "learning_rate": 1.2113132409329561e-05, - "loss": 0.941, + "epoch": 0.9076532053092347, + "grad_norm": 3.68905918591236, + "learning_rate": 1.6351000695334157e-05, + "loss": 1.1899, "step": 6428 }, { - "epoch": 1.3480813587754246, - "grad_norm": 8.64800744206449, - "learning_rate": 1.2110919592754942e-05, - "loss": 0.9091, + "epoch": 0.9077944083592205, + "grad_norm": 3.240610260937898, + "learning_rate": 1.6349823088544494e-05, + "loss": 1.0243, "step": 6429 }, { - "epoch": 1.348291046340952, - "grad_norm": 7.821744815761876, - "learning_rate": 1.2108706667992027e-05, - "loss": 0.8368, + "epoch": 0.9079356114092064, + "grad_norm": 3.782862579173547, + "learning_rate": 1.6348645334187686e-05, + "loss": 1.1277, "step": 6430 }, { - "epoch": 1.3485007339064794, - "grad_norm": 7.7635782432908, - "learning_rate": 1.2106493635154241e-05, - "loss": 1.0088, + "epoch": 0.9080768144591923, + "grad_norm": 3.611453837528714, + "learning_rate": 1.6347467432291103e-05, + "loss": 1.1065, "step": 6431 }, { - "epoch": 1.3487104214720067, - "grad_norm": 6.625058904731215, - "learning_rate": 1.2104280494355e-05, - "loss": 0.6071, + "epoch": 0.9082180175091782, + "grad_norm": 3.3992537941464525, + "learning_rate": 1.6346289382882117e-05, + "loss": 1.0409, "step": 6432 }, { - "epoch": 1.348920109037534, - "grad_norm": 5.698745091138195, - "learning_rate": 1.2102067245707736e-05, - "loss": 0.6146, + "epoch": 0.9083592205591641, + "grad_norm": 3.941759431897247, + "learning_rate": 1.634511118598811e-05, + "loss": 1.0057, "step": 6433 }, { - "epoch": 1.3491297966030613, - "grad_norm": 7.124762109969999, - "learning_rate": 1.209985388932588e-05, - "loss": 0.838, + "epoch": 0.90850042360915, + "grad_norm": 3.9713725755904443, + "learning_rate": 1.6343932841636455e-05, + "loss": 1.4145, "step": 6434 }, { - "epoch": 1.3493394841685888, - "grad_norm": 7.005309830326782, - "learning_rate": 1.209764042532287e-05, - "loss": 0.7887, + "epoch": 0.9086416266591358, + "grad_norm": 3.225655718159319, + "learning_rate": 1.634275434985454e-05, + "loss": 0.8825, "step": 6435 }, { - "epoch": 1.3495491717341161, - "grad_norm": 6.906327197292887, - "learning_rate": 1.209542685381215e-05, - "loss": 0.6673, + "epoch": 0.9087828297091217, + "grad_norm": 2.953734803908207, + "learning_rate": 1.6341575710669758e-05, + "loss": 0.9377, "step": 6436 }, { - "epoch": 1.3497588592996435, - "grad_norm": 6.542585245536598, - "learning_rate": 1.209321317490717e-05, - "loss": 0.7678, + "epoch": 0.9089240327591076, + "grad_norm": 2.7080328796005197, + "learning_rate": 1.6340396924109492e-05, + "loss": 0.8174, "step": 6437 }, { - "epoch": 1.349968546865171, - "grad_norm": 5.235220682094636, - "learning_rate": 1.2090999388721382e-05, - "loss": 0.4319, + "epoch": 0.9090652358090935, + "grad_norm": 3.808070280708418, + "learning_rate": 1.633921799020114e-05, + "loss": 1.1486, "step": 6438 }, { - "epoch": 1.3501782344306983, - "grad_norm": 6.115632969242415, - "learning_rate": 1.2088785495368252e-05, - "loss": 0.7086, + "epoch": 0.9092064388590794, + "grad_norm": 3.4553732972963607, + "learning_rate": 1.6338038908972102e-05, + "loss": 1.0062, "step": 6439 }, { - "epoch": 1.3503879219962256, - "grad_norm": 6.768467139053652, - "learning_rate": 1.2086571494961239e-05, - "loss": 0.796, + "epoch": 0.9093476419090653, + "grad_norm": 3.357020716821186, + "learning_rate": 1.6336859680449773e-05, + "loss": 0.8977, "step": 6440 }, { - "epoch": 1.3505976095617531, - "grad_norm": 6.0479325799244545, - "learning_rate": 1.2084357387613821e-05, - "loss": 0.6299, + "epoch": 0.9094888449590511, + "grad_norm": 4.364238974953833, + "learning_rate": 1.6335680304661568e-05, + "loss": 1.2124, "step": 6441 }, { - "epoch": 1.3508072971272804, - "grad_norm": 7.589600298761827, - "learning_rate": 1.208214317343947e-05, - "loss": 0.7612, + "epoch": 0.909630048009037, + "grad_norm": 4.553917791555002, + "learning_rate": 1.633450078163488e-05, + "loss": 1.408, "step": 6442 }, { - "epoch": 1.3510169846928077, - "grad_norm": 7.324217942047075, - "learning_rate": 1.207992885255167e-05, - "loss": 0.8527, + "epoch": 0.9097712510590229, + "grad_norm": 3.1867507924473073, + "learning_rate": 1.6333321111397137e-05, + "loss": 0.9386, "step": 6443 }, { - "epoch": 1.351226672258335, - "grad_norm": 8.168365694404505, - "learning_rate": 1.2077714425063911e-05, - "loss": 0.9359, + "epoch": 0.9099124541090088, + "grad_norm": 2.988064502235517, + "learning_rate": 1.6332141293975742e-05, + "loss": 0.8516, "step": 6444 }, { - "epoch": 1.3514363598238623, - "grad_norm": 8.216643646648595, - "learning_rate": 1.207549989108968e-05, - "loss": 0.8615, + "epoch": 0.9100536571589947, + "grad_norm": 2.8275460070170513, + "learning_rate": 1.633096132939812e-05, + "loss": 0.7627, "step": 6445 }, { - "epoch": 1.3516460473893899, - "grad_norm": 7.387445932479297, - "learning_rate": 1.2073285250742483e-05, - "loss": 0.7637, + "epoch": 0.9101948602089805, + "grad_norm": 3.3866323421143414, + "learning_rate": 1.632978121769169e-05, + "loss": 1.1489, "step": 6446 }, { - "epoch": 1.3518557349549172, - "grad_norm": 6.752606635320653, - "learning_rate": 1.207107050413582e-05, - "loss": 0.7572, + "epoch": 0.9103360632589664, + "grad_norm": 3.7166489280905832, + "learning_rate": 1.632860095888388e-05, + "loss": 1.2273, "step": 6447 }, { - "epoch": 1.3520654225204445, - "grad_norm": 6.9863794957832575, - "learning_rate": 1.2068855651383203e-05, - "loss": 0.6041, + "epoch": 0.9104772663089523, + "grad_norm": 2.887425020153329, + "learning_rate": 1.6327420553002113e-05, + "loss": 0.8809, "step": 6448 }, { - "epoch": 1.352275110085972, - "grad_norm": 7.802703222849968, - "learning_rate": 1.2066640692598145e-05, - "loss": 0.7171, + "epoch": 0.9106184693589382, + "grad_norm": 3.7485365862294633, + "learning_rate": 1.632624000007383e-05, + "loss": 0.9296, "step": 6449 }, { - "epoch": 1.3524847976514993, - "grad_norm": 8.625470177129106, - "learning_rate": 1.2064425627894164e-05, - "loss": 1.0682, + "epoch": 0.9107596724089241, + "grad_norm": 3.09669815331168, + "learning_rate": 1.632505930012646e-05, + "loss": 0.8432, "step": 6450 }, { - "epoch": 1.3526944852170266, - "grad_norm": 7.300274909959484, - "learning_rate": 1.2062210457384791e-05, - "loss": 0.7515, + "epoch": 0.91090087545891, + "grad_norm": 3.2999396426940484, + "learning_rate": 1.632387845318744e-05, + "loss": 0.8602, "step": 6451 }, { - "epoch": 1.352904172782554, - "grad_norm": 9.046372541127317, - "learning_rate": 1.2059995181183557e-05, - "loss": 1.0834, + "epoch": 0.9110420785088958, + "grad_norm": 3.2303337219903034, + "learning_rate": 1.632269745928422e-05, + "loss": 0.9958, "step": 6452 }, { - "epoch": 1.3531138603480812, - "grad_norm": 9.38341953118875, - "learning_rate": 1.2057779799403998e-05, - "loss": 0.9231, + "epoch": 0.9111832815588816, + "grad_norm": 3.723849588378819, + "learning_rate": 1.6321516318444235e-05, + "loss": 1.0423, "step": 6453 }, { - "epoch": 1.3533235479136088, - "grad_norm": 6.331139161768647, - "learning_rate": 1.2055564312159651e-05, - "loss": 0.5789, + "epoch": 0.9113244846088675, + "grad_norm": 3.57385878663657, + "learning_rate": 1.632033503069495e-05, + "loss": 1.0368, "step": 6454 }, { - "epoch": 1.353533235479136, - "grad_norm": 9.529222757466018, - "learning_rate": 1.205334871956407e-05, - "loss": 0.821, + "epoch": 0.9114656876588534, + "grad_norm": 3.492467131366321, + "learning_rate": 1.6319153596063803e-05, + "loss": 1.0658, "step": 6455 }, { - "epoch": 1.3537429230446634, - "grad_norm": 7.744715875135254, - "learning_rate": 1.2051133021730807e-05, - "loss": 0.6338, + "epoch": 0.9116068907088393, + "grad_norm": 3.9088152109391907, + "learning_rate": 1.6317972014578252e-05, + "loss": 1.1752, "step": 6456 }, { - "epoch": 1.353952610610191, - "grad_norm": 8.629793055923283, - "learning_rate": 1.2048917218773415e-05, - "loss": 0.9461, + "epoch": 0.9117480937588252, + "grad_norm": 3.0977364498140596, + "learning_rate": 1.6316790286265764e-05, + "loss": 0.8803, "step": 6457 }, { - "epoch": 1.3541622981757182, - "grad_norm": 7.105329218055937, - "learning_rate": 1.2046701310805466e-05, - "loss": 0.7343, + "epoch": 0.911889296808811, + "grad_norm": 2.579159624840554, + "learning_rate": 1.63156084111538e-05, + "loss": 0.7622, "step": 6458 }, { - "epoch": 1.3543719857412455, - "grad_norm": 7.18579049201621, - "learning_rate": 1.2044485297940524e-05, - "loss": 0.7233, + "epoch": 0.9120304998587969, + "grad_norm": 3.164945781703793, + "learning_rate": 1.6314426389269822e-05, + "loss": 0.9472, "step": 6459 }, { - "epoch": 1.354581673306773, - "grad_norm": 7.313754544047871, - "learning_rate": 1.2042269180292166e-05, - "loss": 0.7028, + "epoch": 0.9121717029087828, + "grad_norm": 3.29705428750325, + "learning_rate": 1.6313244220641304e-05, + "loss": 1.0124, "step": 6460 }, { - "epoch": 1.3547913608723003, - "grad_norm": 7.754111303601662, - "learning_rate": 1.2040052957973967e-05, - "loss": 0.848, + "epoch": 0.9123129059587687, + "grad_norm": 2.8569896134220794, + "learning_rate": 1.631206190529571e-05, + "loss": 0.6594, "step": 6461 }, { - "epoch": 1.3550010484378276, - "grad_norm": 8.795921450170088, - "learning_rate": 1.2037836631099518e-05, - "loss": 0.865, + "epoch": 0.9124541090087546, + "grad_norm": 3.669410298196866, + "learning_rate": 1.631087944326053e-05, + "loss": 1.037, "step": 6462 }, { - "epoch": 1.355210736003355, - "grad_norm": 8.845688286286869, - "learning_rate": 1.2035620199782406e-05, - "loss": 1.0648, + "epoch": 0.9125953120587404, + "grad_norm": 3.6779842569744243, + "learning_rate": 1.6309696834563236e-05, + "loss": 1.053, "step": 6463 }, { - "epoch": 1.3554204235688823, - "grad_norm": 6.461648798388557, - "learning_rate": 1.2033403664136227e-05, - "loss": 0.7458, + "epoch": 0.9127365151087263, + "grad_norm": 3.6417646976897022, + "learning_rate": 1.630851407923131e-05, + "loss": 1.0478, "step": 6464 }, { - "epoch": 1.3556301111344098, - "grad_norm": 6.961432123646212, - "learning_rate": 1.2031187024274586e-05, - "loss": 0.9058, + "epoch": 0.9128777181587122, + "grad_norm": 3.5583779349343794, + "learning_rate": 1.630733117729224e-05, + "loss": 0.937, "step": 6465 }, { - "epoch": 1.355839798699937, - "grad_norm": 7.396042813381256, - "learning_rate": 1.2028970280311086e-05, - "loss": 0.9224, + "epoch": 0.9130189212086981, + "grad_norm": 3.395730746753288, + "learning_rate": 1.6306148128773522e-05, + "loss": 0.8451, "step": 6466 }, { - "epoch": 1.3560494862654644, - "grad_norm": 7.580249542149914, - "learning_rate": 1.2026753432359341e-05, - "loss": 0.8751, + "epoch": 0.913160124258684, + "grad_norm": 2.9769673979264826, + "learning_rate": 1.630496493370264e-05, + "loss": 0.8601, "step": 6467 }, { - "epoch": 1.356259173830992, - "grad_norm": 6.734508260976132, - "learning_rate": 1.2024536480532965e-05, - "loss": 0.8256, + "epoch": 0.9133013273086699, + "grad_norm": 4.6383332448709655, + "learning_rate": 1.6303781592107102e-05, + "loss": 0.9726, "step": 6468 }, { - "epoch": 1.3564688613965192, - "grad_norm": 6.4974724746756705, - "learning_rate": 1.2022319424945585e-05, - "loss": 0.6532, + "epoch": 0.9134425303586557, + "grad_norm": 3.4450015016292137, + "learning_rate": 1.63025981040144e-05, + "loss": 0.785, "step": 6469 }, { - "epoch": 1.3566785489620465, - "grad_norm": 6.162735406962014, - "learning_rate": 1.2020102265710826e-05, - "loss": 0.7621, + "epoch": 0.9135837334086416, + "grad_norm": 2.688717599412168, + "learning_rate": 1.6301414469452037e-05, + "loss": 0.6726, "step": 6470 }, { - "epoch": 1.3568882365275738, - "grad_norm": 6.902084218873949, - "learning_rate": 1.2017885002942323e-05, - "loss": 0.8191, + "epoch": 0.9137249364586275, + "grad_norm": 3.881157902900692, + "learning_rate": 1.6300230688447528e-05, + "loss": 1.0401, "step": 6471 }, { - "epoch": 1.3570979240931011, - "grad_norm": 7.53779500821685, - "learning_rate": 1.2015667636753714e-05, - "loss": 0.9587, + "epoch": 0.9138661395086134, + "grad_norm": 3.4931998160486564, + "learning_rate": 1.6299046761028373e-05, + "loss": 0.9349, "step": 6472 }, { - "epoch": 1.3573076116586287, - "grad_norm": 8.31024045278443, - "learning_rate": 1.2013450167258642e-05, - "loss": 1.0757, + "epoch": 0.9140073425585993, + "grad_norm": 3.729040506184297, + "learning_rate": 1.6297862687222097e-05, + "loss": 1.135, "step": 6473 }, { - "epoch": 1.357517299224156, - "grad_norm": 7.124762292842106, - "learning_rate": 1.2011232594570754e-05, - "loss": 0.9512, + "epoch": 0.9141485456085852, + "grad_norm": 2.8563495841444784, + "learning_rate": 1.629667846705621e-05, + "loss": 0.9413, "step": 6474 }, { - "epoch": 1.3577269867896833, - "grad_norm": 7.44272313042543, - "learning_rate": 1.2009014918803712e-05, - "loss": 0.7056, + "epoch": 0.914289748658571, + "grad_norm": 4.498214571334865, + "learning_rate": 1.629549410055823e-05, + "loss": 1.3015, "step": 6475 }, { - "epoch": 1.3579366743552108, - "grad_norm": 7.672889390881073, - "learning_rate": 1.2006797140071169e-05, - "loss": 0.8154, + "epoch": 0.9144309517085569, + "grad_norm": 2.9692374649824487, + "learning_rate": 1.6294309587755693e-05, + "loss": 0.7861, "step": 6476 }, { - "epoch": 1.3581463619207381, - "grad_norm": 7.028042465650182, - "learning_rate": 1.2004579258486793e-05, - "loss": 0.7657, + "epoch": 0.9145721547585428, + "grad_norm": 2.8417696624316298, + "learning_rate": 1.6293124928676112e-05, + "loss": 0.842, "step": 6477 }, { - "epoch": 1.3583560494862654, - "grad_norm": 6.489262918135886, - "learning_rate": 1.2002361274164252e-05, - "loss": 0.8441, + "epoch": 0.9147133578085287, + "grad_norm": 3.4988101096698196, + "learning_rate": 1.6291940123347033e-05, + "loss": 1.1867, "step": 6478 }, { - "epoch": 1.358565737051793, - "grad_norm": 6.340888648852408, - "learning_rate": 1.2000143187217224e-05, - "loss": 0.6964, + "epoch": 0.9148545608585146, + "grad_norm": 4.143335881245334, + "learning_rate": 1.629075517179598e-05, + "loss": 1.0987, "step": 6479 }, { - "epoch": 1.3587754246173203, - "grad_norm": 5.752937650051768, - "learning_rate": 1.1997924997759385e-05, - "loss": 0.707, + "epoch": 0.9149957639085005, + "grad_norm": 2.8784451802518185, + "learning_rate": 1.6289570074050492e-05, + "loss": 0.8046, "step": 6480 }, { - "epoch": 1.3589851121828476, - "grad_norm": 7.239678776006695, - "learning_rate": 1.1995706705904428e-05, - "loss": 0.906, + "epoch": 0.9151369669584863, + "grad_norm": 3.544265192779974, + "learning_rate": 1.6288384830138114e-05, + "loss": 1.1424, "step": 6481 }, { - "epoch": 1.3591947997483749, - "grad_norm": 7.64144017228882, - "learning_rate": 1.1993488311766041e-05, - "loss": 0.813, + "epoch": 0.9152781700084722, + "grad_norm": 3.9949656730225502, + "learning_rate": 1.628719944008639e-05, + "loss": 1.111, "step": 6482 }, { - "epoch": 1.3594044873139022, - "grad_norm": 6.54028863617211, - "learning_rate": 1.199126981545792e-05, - "loss": 0.7876, + "epoch": 0.9154193730584581, + "grad_norm": 3.62174297133545, + "learning_rate": 1.628601390392286e-05, + "loss": 1.2106, "step": 6483 }, { - "epoch": 1.3596141748794297, - "grad_norm": 7.012584194285227, - "learning_rate": 1.1989051217093766e-05, - "loss": 0.97, + "epoch": 0.915560576108444, + "grad_norm": 2.856197374657836, + "learning_rate": 1.6284828221675085e-05, + "loss": 0.8514, "step": 6484 }, { - "epoch": 1.359823862444957, - "grad_norm": 8.063412388983604, - "learning_rate": 1.198683251678729e-05, - "loss": 0.9218, + "epoch": 0.9157017791584299, + "grad_norm": 3.131734457577334, + "learning_rate": 1.6283642393370618e-05, + "loss": 0.8376, "step": 6485 }, { - "epoch": 1.3600335500104843, - "grad_norm": 8.908621547150124, - "learning_rate": 1.1984613714652198e-05, - "loss": 1.1506, + "epoch": 0.9158429822084158, + "grad_norm": 2.5955052020340856, + "learning_rate": 1.6282456419037013e-05, + "loss": 0.6893, "step": 6486 }, { - "epoch": 1.3602432375760118, - "grad_norm": 6.41822774933903, - "learning_rate": 1.1982394810802213e-05, - "loss": 0.8226, + "epoch": 0.9159841852584015, + "grad_norm": 3.6320334985753027, + "learning_rate": 1.6281270298701836e-05, + "loss": 0.9257, "step": 6487 }, { - "epoch": 1.3604529251415391, - "grad_norm": 6.989330817839758, - "learning_rate": 1.1980175805351055e-05, - "loss": 0.8343, + "epoch": 0.9161253883083874, + "grad_norm": 3.0782148265217257, + "learning_rate": 1.628008403239265e-05, + "loss": 0.983, "step": 6488 }, { - "epoch": 1.3606626127070665, - "grad_norm": 6.872273175290653, - "learning_rate": 1.1977956698412454e-05, - "loss": 0.8181, + "epoch": 0.9162665913583733, + "grad_norm": 3.466755041653721, + "learning_rate": 1.627889762013702e-05, + "loss": 0.9602, "step": 6489 }, { - "epoch": 1.3608723002725938, - "grad_norm": 6.302555376329823, - "learning_rate": 1.1975737490100139e-05, - "loss": 0.5962, + "epoch": 0.9164077944083592, + "grad_norm": 3.998983868221303, + "learning_rate": 1.6277711061962525e-05, + "loss": 1.0819, "step": 6490 }, { - "epoch": 1.361081987838121, - "grad_norm": 9.38762671515917, - "learning_rate": 1.1973518180527852e-05, - "loss": 1.0459, + "epoch": 0.9165489974583451, + "grad_norm": 3.5704280116184925, + "learning_rate": 1.627652435789673e-05, + "loss": 1.2447, "step": 6491 }, { - "epoch": 1.3612916754036486, - "grad_norm": 9.107687670358956, - "learning_rate": 1.1971298769809336e-05, - "loss": 0.9862, + "epoch": 0.9166902005083309, + "grad_norm": 3.3586997527248745, + "learning_rate": 1.6275337507967228e-05, + "loss": 1.1726, "step": 6492 }, { - "epoch": 1.361501362969176, - "grad_norm": 7.1577075495267355, - "learning_rate": 1.1969079258058338e-05, - "loss": 0.795, + "epoch": 0.9168314035583168, + "grad_norm": 3.188302054712984, + "learning_rate": 1.6274150512201586e-05, + "loss": 0.84, "step": 6493 }, { - "epoch": 1.3617110505347032, - "grad_norm": 10.223138145907203, - "learning_rate": 1.1966859645388616e-05, - "loss": 0.9621, + "epoch": 0.9169726066083027, + "grad_norm": 3.1356474658379256, + "learning_rate": 1.6272963370627398e-05, + "loss": 0.9393, "step": 6494 }, { - "epoch": 1.3619207381002307, - "grad_norm": 6.145630463673914, - "learning_rate": 1.1964639931913921e-05, - "loss": 0.6194, + "epoch": 0.9171138096582886, + "grad_norm": 2.9653126666027445, + "learning_rate": 1.627177608327225e-05, + "loss": 0.8941, "step": 6495 }, { - "epoch": 1.362130425665758, - "grad_norm": 6.453689442702025, - "learning_rate": 1.1962420117748026e-05, - "loss": 0.8461, + "epoch": 0.9172550127082745, + "grad_norm": 3.2943340292705687, + "learning_rate": 1.6270588650163737e-05, + "loss": 1.0362, "step": 6496 }, { - "epoch": 1.3623401132312853, - "grad_norm": 7.745999489324701, - "learning_rate": 1.1960200203004695e-05, - "loss": 0.9723, + "epoch": 0.9173962157582604, + "grad_norm": 3.563154733280506, + "learning_rate": 1.6269401071329447e-05, + "loss": 0.9116, "step": 6497 }, { - "epoch": 1.3625498007968129, - "grad_norm": 8.2748131648448, - "learning_rate": 1.1957980187797703e-05, - "loss": 0.9234, + "epoch": 0.9175374188082462, + "grad_norm": 3.1930088120817866, + "learning_rate": 1.626821334679699e-05, + "loss": 1.0641, "step": 6498 }, { - "epoch": 1.3627594883623402, - "grad_norm": 7.071999670757471, - "learning_rate": 1.195576007224083e-05, - "loss": 0.8486, + "epoch": 0.9176786218582321, + "grad_norm": 3.4269208237411544, + "learning_rate": 1.6267025476593957e-05, + "loss": 1.0319, "step": 6499 }, { - "epoch": 1.3629691759278675, - "grad_norm": 7.867219262641423, - "learning_rate": 1.1953539856447862e-05, - "loss": 0.9015, + "epoch": 0.917819824908218, + "grad_norm": 3.734524923394806, + "learning_rate": 1.626583746074796e-05, + "loss": 1.2924, "step": 6500 }, { - "epoch": 1.3631788634933948, - "grad_norm": 7.391838717048641, - "learning_rate": 1.1951319540532588e-05, - "loss": 0.9211, + "epoch": 0.9179610279582039, + "grad_norm": 3.1224376496902964, + "learning_rate": 1.6264649299286604e-05, + "loss": 0.9259, "step": 6501 }, { - "epoch": 1.363388551058922, - "grad_norm": 7.052136177553956, - "learning_rate": 1.1949099124608805e-05, - "loss": 0.7448, + "epoch": 0.9181022310081898, + "grad_norm": 4.071241522938372, + "learning_rate": 1.6263460992237507e-05, + "loss": 0.8515, "step": 6502 }, { - "epoch": 1.3635982386244496, - "grad_norm": 6.855026564778334, - "learning_rate": 1.1946878608790308e-05, - "loss": 0.7957, + "epoch": 0.9182434340581757, + "grad_norm": 4.059756988466037, + "learning_rate": 1.6262272539628277e-05, + "loss": 0.9501, "step": 6503 }, { - "epoch": 1.363807926189977, - "grad_norm": 7.975799689868046, - "learning_rate": 1.1944657993190908e-05, - "loss": 0.9342, + "epoch": 0.9183846371081615, + "grad_norm": 3.205173473942416, + "learning_rate": 1.6261083941486543e-05, + "loss": 1.0663, "step": 6504 }, { - "epoch": 1.3640176137555042, - "grad_norm": 6.45224396444523, - "learning_rate": 1.1942437277924413e-05, - "loss": 0.7104, + "epoch": 0.9185258401581474, + "grad_norm": 3.1996604623118126, + "learning_rate": 1.625989519783992e-05, + "loss": 0.9513, "step": 6505 }, { - "epoch": 1.3642273013210318, - "grad_norm": 7.690934506271937, - "learning_rate": 1.1940216463104637e-05, - "loss": 0.8596, + "epoch": 0.9186670432081333, + "grad_norm": 3.473012426704352, + "learning_rate": 1.6258706308716035e-05, + "loss": 0.9723, "step": 6506 }, { - "epoch": 1.364436988886559, - "grad_norm": 7.584933779773345, - "learning_rate": 1.1937995548845403e-05, - "loss": 1.0767, + "epoch": 0.9188082462581192, + "grad_norm": 3.04782440693302, + "learning_rate": 1.625751727414252e-05, + "loss": 0.9731, "step": 6507 }, { - "epoch": 1.3646466764520864, - "grad_norm": 8.272794502206157, - "learning_rate": 1.1935774535260539e-05, - "loss": 0.9301, + "epoch": 0.9189494493081051, + "grad_norm": 3.0747473294265513, + "learning_rate": 1.6256328094147003e-05, + "loss": 0.919, "step": 6508 }, { - "epoch": 1.3648563640176137, - "grad_norm": 9.093163977335657, - "learning_rate": 1.1933553422463866e-05, - "loss": 1.0307, + "epoch": 0.919090652358091, + "grad_norm": 3.3822927863583554, + "learning_rate": 1.6255138768757125e-05, + "loss": 0.9418, "step": 6509 }, { - "epoch": 1.365066051583141, - "grad_norm": 8.218454500695204, - "learning_rate": 1.1931332210569231e-05, - "loss": 0.8428, + "epoch": 0.9192318554080768, + "grad_norm": 3.5730048663200353, + "learning_rate": 1.6253949298000527e-05, + "loss": 1.1946, "step": 6510 }, { - "epoch": 1.3652757391486685, - "grad_norm": 7.800110874587736, - "learning_rate": 1.192911089969047e-05, - "loss": 0.7242, + "epoch": 0.9193730584580627, + "grad_norm": 3.3631679261710787, + "learning_rate": 1.6252759681904842e-05, + "loss": 1.0108, "step": 6511 }, { - "epoch": 1.3654854267141958, - "grad_norm": 7.149805351880659, - "learning_rate": 1.1926889489941428e-05, - "loss": 0.8924, + "epoch": 0.9195142615080486, + "grad_norm": 3.6846368827906053, + "learning_rate": 1.6251569920497725e-05, + "loss": 1.1637, "step": 6512 }, { - "epoch": 1.3656951142797231, - "grad_norm": 6.185942571839799, - "learning_rate": 1.192466798143596e-05, - "loss": 0.6214, + "epoch": 0.9196554645580345, + "grad_norm": 3.2062803068476686, + "learning_rate": 1.6250380013806825e-05, + "loss": 1.0155, "step": 6513 }, { - "epoch": 1.3659048018452506, - "grad_norm": 7.893766220554455, - "learning_rate": 1.1922446374287917e-05, - "loss": 0.8213, + "epoch": 0.9197966676080204, + "grad_norm": 3.450059360845324, + "learning_rate": 1.624918996185979e-05, + "loss": 1.0584, "step": 6514 }, { - "epoch": 1.366114489410778, - "grad_norm": 7.622345164180235, - "learning_rate": 1.1920224668611165e-05, - "loss": 0.8677, + "epoch": 0.9199378706580063, + "grad_norm": 3.1505281130072125, + "learning_rate": 1.624799976468428e-05, + "loss": 0.9195, "step": 6515 }, { - "epoch": 1.3663241769763053, - "grad_norm": 7.4316140844846155, - "learning_rate": 1.1918002864519567e-05, - "loss": 0.8886, + "epoch": 0.9200790737079921, + "grad_norm": 3.179172106833367, + "learning_rate": 1.6246809422307954e-05, + "loss": 0.9899, "step": 6516 }, { - "epoch": 1.3665338645418328, - "grad_norm": 6.8587455534025485, - "learning_rate": 1.1915780962126997e-05, - "loss": 0.7774, + "epoch": 0.920220276757978, + "grad_norm": 4.407861571885109, + "learning_rate": 1.6245618934758474e-05, + "loss": 1.1602, "step": 6517 }, { - "epoch": 1.36674355210736, - "grad_norm": 6.172199307142109, - "learning_rate": 1.1913558961547328e-05, - "loss": 0.641, + "epoch": 0.9203614798079639, + "grad_norm": 3.031374950062368, + "learning_rate": 1.6244428302063506e-05, + "loss": 0.9449, "step": 6518 }, { - "epoch": 1.3669532396728874, - "grad_norm": 8.750493772232634, - "learning_rate": 1.1911336862894446e-05, - "loss": 1.1288, + "epoch": 0.9205026828579498, + "grad_norm": 4.520511461060888, + "learning_rate": 1.6243237524250726e-05, + "loss": 1.1381, "step": 6519 }, { - "epoch": 1.3671629272384147, - "grad_norm": 6.659723599216442, - "learning_rate": 1.1909114666282233e-05, - "loss": 0.6658, + "epoch": 0.9206438859079357, + "grad_norm": 2.6540141904241437, + "learning_rate": 1.6242046601347796e-05, + "loss": 0.7752, "step": 6520 }, { - "epoch": 1.367372614803942, - "grad_norm": 8.520531440855127, - "learning_rate": 1.1906892371824583e-05, - "loss": 0.9822, + "epoch": 0.9207850889579214, + "grad_norm": 3.7924728146525797, + "learning_rate": 1.6240855533382403e-05, + "loss": 1.0883, "step": 6521 }, { - "epoch": 1.3675823023694695, - "grad_norm": 6.943030750636971, - "learning_rate": 1.1904669979635391e-05, - "loss": 0.6321, + "epoch": 0.9209262920079073, + "grad_norm": 3.4749409643240963, + "learning_rate": 1.623966432038222e-05, + "loss": 0.8391, "step": 6522 }, { - "epoch": 1.3677919899349968, - "grad_norm": 8.088101730410482, - "learning_rate": 1.1902447489828559e-05, - "loss": 0.7461, + "epoch": 0.9210674950578932, + "grad_norm": 3.090354350555065, + "learning_rate": 1.6238472962374935e-05, + "loss": 0.8217, "step": 6523 }, { - "epoch": 1.3680016775005241, - "grad_norm": 6.13136213200197, - "learning_rate": 1.1900224902517992e-05, - "loss": 0.695, + "epoch": 0.9212086981078791, + "grad_norm": 3.132292832661475, + "learning_rate": 1.6237281459388233e-05, + "loss": 0.9028, "step": 6524 }, { - "epoch": 1.3682113650660517, - "grad_norm": 7.553332933092758, - "learning_rate": 1.1898002217817606e-05, - "loss": 0.8562, + "epoch": 0.921349901157865, + "grad_norm": 3.041310840224807, + "learning_rate": 1.62360898114498e-05, + "loss": 0.8923, "step": 6525 }, { - "epoch": 1.368421052631579, - "grad_norm": 7.833618812952744, - "learning_rate": 1.1895779435841313e-05, - "loss": 0.8995, + "epoch": 0.9214911042078509, + "grad_norm": 3.7704624380968337, + "learning_rate": 1.6234898018587336e-05, + "loss": 1.0205, "step": 6526 }, { - "epoch": 1.3686307401971063, - "grad_norm": 6.599145703370401, - "learning_rate": 1.1893556556703037e-05, - "loss": 0.7193, + "epoch": 0.9216323072578367, + "grad_norm": 3.3463124645082925, + "learning_rate": 1.6233706080828536e-05, + "loss": 1.1514, "step": 6527 }, { - "epoch": 1.3688404277626336, - "grad_norm": 8.287542650753487, - "learning_rate": 1.1891333580516701e-05, - "loss": 0.9613, + "epoch": 0.9217735103078226, + "grad_norm": 3.823108935158952, + "learning_rate": 1.6232513998201094e-05, + "loss": 1.1626, "step": 6528 }, { - "epoch": 1.3690501153281611, - "grad_norm": 8.436060212836612, - "learning_rate": 1.188911050739624e-05, - "loss": 1.0079, + "epoch": 0.9219147133578085, + "grad_norm": 3.247472082851392, + "learning_rate": 1.6231321770732723e-05, + "loss": 0.9973, "step": 6529 }, { - "epoch": 1.3692598028936884, - "grad_norm": 7.204025088454156, - "learning_rate": 1.1886887337455587e-05, - "loss": 0.8264, + "epoch": 0.9220559164077944, + "grad_norm": 3.1485110413108854, + "learning_rate": 1.6230129398451124e-05, + "loss": 1.0056, "step": 6530 }, { - "epoch": 1.3694694904592157, - "grad_norm": 7.766508692077376, - "learning_rate": 1.188466407080869e-05, - "loss": 0.769, + "epoch": 0.9221971194577803, + "grad_norm": 3.613716171026468, + "learning_rate": 1.6228936881384004e-05, + "loss": 0.999, "step": 6531 }, { - "epoch": 1.369679178024743, - "grad_norm": 6.453662159813938, - "learning_rate": 1.1882440707569485e-05, - "loss": 0.6975, + "epoch": 0.9223383225077662, + "grad_norm": 3.0014247942425327, + "learning_rate": 1.6227744219559086e-05, + "loss": 0.9268, "step": 6532 }, { - "epoch": 1.3698888655902706, - "grad_norm": 9.257651786173767, - "learning_rate": 1.1880217247851934e-05, - "loss": 0.8093, + "epoch": 0.922479525557752, + "grad_norm": 3.4157491085468004, + "learning_rate": 1.622655141300408e-05, + "loss": 1.021, "step": 6533 }, { - "epoch": 1.3700985531557979, - "grad_norm": 8.260206614196193, - "learning_rate": 1.1877993691769985e-05, - "loss": 0.8786, + "epoch": 0.9226207286077379, + "grad_norm": 3.1163121875221345, + "learning_rate": 1.622535846174671e-05, + "loss": 0.9583, "step": 6534 }, { - "epoch": 1.3703082407213252, - "grad_norm": 7.913741648930507, - "learning_rate": 1.1875770039437602e-05, - "loss": 0.854, + "epoch": 0.9227619316577238, + "grad_norm": 3.2765055269526098, + "learning_rate": 1.6224165365814696e-05, + "loss": 1.0569, "step": 6535 }, { - "epoch": 1.3705179282868527, - "grad_norm": 8.100140023906857, - "learning_rate": 1.1873546290968751e-05, - "loss": 0.9358, + "epoch": 0.9229031347077097, + "grad_norm": 3.1145723217312815, + "learning_rate": 1.6222972125235766e-05, + "loss": 0.9179, "step": 6536 }, { - "epoch": 1.37072761585238, - "grad_norm": 7.1798431205280195, - "learning_rate": 1.1871322446477405e-05, - "loss": 0.7839, + "epoch": 0.9230443377576956, + "grad_norm": 3.8362118046865925, + "learning_rate": 1.6221778740037654e-05, + "loss": 1.1982, "step": 6537 }, { - "epoch": 1.3709373034179073, - "grad_norm": 8.432020634333737, - "learning_rate": 1.1869098506077535e-05, - "loss": 1.1451, + "epoch": 0.9231855408076814, + "grad_norm": 3.106405013315922, + "learning_rate": 1.6220585210248093e-05, + "loss": 0.8733, "step": 6538 }, { - "epoch": 1.3711469909834346, - "grad_norm": 6.22945718774862, - "learning_rate": 1.1866874469883127e-05, - "loss": 0.7857, + "epoch": 0.9233267438576673, + "grad_norm": 3.097846497794573, + "learning_rate": 1.6219391535894813e-05, + "loss": 0.9241, "step": 6539 }, { - "epoch": 1.371356678548962, - "grad_norm": 6.21727889179072, - "learning_rate": 1.1864650338008166e-05, - "loss": 0.6953, + "epoch": 0.9234679469076532, + "grad_norm": 2.797389330606165, + "learning_rate": 1.6218197717005562e-05, + "loss": 0.7392, "step": 6540 }, { - "epoch": 1.3715663661144895, - "grad_norm": 8.71230058440118, - "learning_rate": 1.1862426110566639e-05, - "loss": 1.1317, + "epoch": 0.9236091499576391, + "grad_norm": 3.7784967448358446, + "learning_rate": 1.6217003753608082e-05, + "loss": 1.0826, "step": 6541 }, { - "epoch": 1.3717760536800168, - "grad_norm": 6.476294272681391, - "learning_rate": 1.1860201787672543e-05, - "loss": 0.8774, + "epoch": 0.923750353007625, + "grad_norm": 3.6470869353337445, + "learning_rate": 1.6215809645730115e-05, + "loss": 1.0618, "step": 6542 }, { - "epoch": 1.371985741245544, - "grad_norm": 7.773833533123625, - "learning_rate": 1.1857977369439879e-05, - "loss": 0.7438, + "epoch": 0.9238915560576109, + "grad_norm": 3.7944147655600178, + "learning_rate": 1.621461539339942e-05, + "loss": 1.0126, "step": 6543 }, { - "epoch": 1.3721954288110716, - "grad_norm": 5.861100426941333, - "learning_rate": 1.1855752855982651e-05, - "loss": 0.6423, + "epoch": 0.9240327591075967, + "grad_norm": 3.0046921863210025, + "learning_rate": 1.621342099664375e-05, + "loss": 0.9825, "step": 6544 }, { - "epoch": 1.372405116376599, - "grad_norm": 6.574254840637303, - "learning_rate": 1.1853528247414874e-05, - "loss": 0.6343, + "epoch": 0.9241739621575826, + "grad_norm": 4.058260534922192, + "learning_rate": 1.6212226455490854e-05, + "loss": 1.0173, "step": 6545 }, { - "epoch": 1.3726148039421262, - "grad_norm": 6.893377801274218, - "learning_rate": 1.185130354385056e-05, - "loss": 0.7643, + "epoch": 0.9243151652075685, + "grad_norm": 3.682830785820891, + "learning_rate": 1.6211031769968503e-05, + "loss": 1.1978, "step": 6546 }, { - "epoch": 1.3728244915076537, - "grad_norm": 6.167877950507337, - "learning_rate": 1.1849078745403723e-05, - "loss": 0.7681, + "epoch": 0.9244563682575544, + "grad_norm": 2.85664317665372, + "learning_rate": 1.6209836940104454e-05, + "loss": 0.8729, "step": 6547 }, { - "epoch": 1.373034179073181, - "grad_norm": 6.292095878116126, - "learning_rate": 1.1846853852188398e-05, - "loss": 0.7757, + "epoch": 0.9245975713075403, + "grad_norm": 3.3040525020861815, + "learning_rate": 1.6208641965926474e-05, + "loss": 1.0727, "step": 6548 }, { - "epoch": 1.3732438666387083, - "grad_norm": 7.708002096251597, - "learning_rate": 1.184462886431861e-05, - "loss": 0.7935, + "epoch": 0.9247387743575262, + "grad_norm": 3.6136875716159746, + "learning_rate": 1.6207446847462338e-05, + "loss": 0.8909, "step": 6549 }, { - "epoch": 1.3734535542042356, - "grad_norm": 10.22684780710782, - "learning_rate": 1.184240378190839e-05, - "loss": 0.9295, + "epoch": 0.924879977407512, + "grad_norm": 2.7700756853847577, + "learning_rate": 1.6206251584739817e-05, + "loss": 0.7598, "step": 6550 }, { - "epoch": 1.373663241769763, - "grad_norm": 7.7082793971969155, - "learning_rate": 1.1840178605071784e-05, - "loss": 1.0751, + "epoch": 0.9250211804574979, + "grad_norm": 3.145393815891417, + "learning_rate": 1.6205056177786694e-05, + "loss": 0.7477, "step": 6551 }, { - "epoch": 1.3738729293352905, - "grad_norm": 7.399360127156178, - "learning_rate": 1.1837953333922833e-05, - "loss": 0.8041, + "epoch": 0.9251623835074838, + "grad_norm": 3.7099153690182023, + "learning_rate": 1.620386062663074e-05, + "loss": 1.3379, "step": 6552 }, { - "epoch": 1.3740826169008178, - "grad_norm": 6.41490753045952, - "learning_rate": 1.1835727968575582e-05, - "loss": 0.717, + "epoch": 0.9253035865574697, + "grad_norm": 2.46848462197872, + "learning_rate": 1.6202664931299747e-05, + "loss": 0.7509, "step": 6553 }, { - "epoch": 1.374292304466345, - "grad_norm": 6.447438474924967, - "learning_rate": 1.1833502509144093e-05, - "loss": 0.7296, + "epoch": 0.9254447896074556, + "grad_norm": 4.174216918755577, + "learning_rate": 1.6201469091821498e-05, + "loss": 1.3007, "step": 6554 }, { - "epoch": 1.3745019920318726, - "grad_norm": 7.69345772460767, - "learning_rate": 1.1831276955742417e-05, - "loss": 1.0048, + "epoch": 0.9255859926574413, + "grad_norm": 3.4526130704189724, + "learning_rate": 1.6200273108223784e-05, + "loss": 1.0193, "step": 6555 }, { - "epoch": 1.3747116795974, - "grad_norm": 8.525720443769169, - "learning_rate": 1.1829051308484623e-05, - "loss": 0.9567, + "epoch": 0.9257271957074272, + "grad_norm": 3.848591305024863, + "learning_rate": 1.6199076980534406e-05, + "loss": 1.2292, "step": 6556 }, { - "epoch": 1.3749213671629272, - "grad_norm": 7.873091609319561, - "learning_rate": 1.1826825567484777e-05, - "loss": 1.0782, + "epoch": 0.9258683987574131, + "grad_norm": 3.247033543524722, + "learning_rate": 1.6197880708781153e-05, + "loss": 0.8927, "step": 6557 }, { - "epoch": 1.3751310547284545, - "grad_norm": 7.660062956156774, - "learning_rate": 1.1824599732856952e-05, - "loss": 0.7907, + "epoch": 0.926009601807399, + "grad_norm": 3.465304199821099, + "learning_rate": 1.6196684292991827e-05, + "loss": 0.8489, "step": 6558 }, { - "epoch": 1.3753407422939818, - "grad_norm": 8.336168050259152, - "learning_rate": 1.1822373804715224e-05, - "loss": 0.9899, + "epoch": 0.9261508048573849, + "grad_norm": 3.2732944513517737, + "learning_rate": 1.619548773319424e-05, + "loss": 0.9409, "step": 6559 }, { - "epoch": 1.3755504298595094, - "grad_norm": 6.66942467141876, - "learning_rate": 1.182014778317368e-05, - "loss": 0.7631, + "epoch": 0.9262920079073708, + "grad_norm": 3.4228844891062935, + "learning_rate": 1.6194291029416188e-05, + "loss": 1.0694, "step": 6560 }, { - "epoch": 1.3757601174250367, - "grad_norm": 6.967669774536097, - "learning_rate": 1.1817921668346404e-05, - "loss": 1.0413, + "epoch": 0.9264332109573566, + "grad_norm": 4.350219465506222, + "learning_rate": 1.6193094181685487e-05, + "loss": 1.2617, "step": 6561 }, { - "epoch": 1.375969804990564, - "grad_norm": 6.984940057443896, - "learning_rate": 1.1815695460347491e-05, - "loss": 0.6704, + "epoch": 0.9265744140073425, + "grad_norm": 2.840311862374248, + "learning_rate": 1.6191897190029956e-05, + "loss": 0.9025, "step": 6562 }, { - "epoch": 1.3761794925560915, - "grad_norm": 6.300850976332309, - "learning_rate": 1.1813469159291036e-05, - "loss": 0.7338, + "epoch": 0.9267156170573284, + "grad_norm": 4.173153730264222, + "learning_rate": 1.6190700054477402e-05, + "loss": 0.9725, "step": 6563 }, { - "epoch": 1.3763891801216188, - "grad_norm": 9.362977625730599, - "learning_rate": 1.181124276529114e-05, - "loss": 1.1666, + "epoch": 0.9268568201073143, + "grad_norm": 3.5415823690999377, + "learning_rate": 1.6189502775055662e-05, + "loss": 1.0944, "step": 6564 }, { - "epoch": 1.3765988676871461, - "grad_norm": 7.339314243966552, - "learning_rate": 1.180901627846191e-05, - "loss": 0.9496, + "epoch": 0.9269980231573002, + "grad_norm": 3.3256080983271277, + "learning_rate": 1.6188305351792545e-05, + "loss": 1.0635, "step": 6565 }, { - "epoch": 1.3768085552526736, - "grad_norm": 6.486303707330697, - "learning_rate": 1.1806789698917464e-05, - "loss": 0.7866, + "epoch": 0.9271392262072861, + "grad_norm": 3.6999797565389034, + "learning_rate": 1.618710778471588e-05, + "loss": 1.1142, "step": 6566 }, { - "epoch": 1.377018242818201, - "grad_norm": 7.359170399846788, - "learning_rate": 1.1804563026771905e-05, - "loss": 0.8162, + "epoch": 0.9272804292572719, + "grad_norm": 3.9718094721407073, + "learning_rate": 1.6185910073853512e-05, + "loss": 1.0722, "step": 6567 }, { - "epoch": 1.3772279303837283, - "grad_norm": 9.306617448383625, - "learning_rate": 1.1802336262139368e-05, - "loss": 0.8148, + "epoch": 0.9274216323072578, + "grad_norm": 2.6431148011264454, + "learning_rate": 1.618471221923326e-05, + "loss": 0.6533, "step": 6568 }, { - "epoch": 1.3774376179492556, - "grad_norm": 6.352273582171359, - "learning_rate": 1.1800109405133967e-05, - "loss": 0.5752, + "epoch": 0.9275628353572437, + "grad_norm": 2.899684122298023, + "learning_rate": 1.6183514220882967e-05, + "loss": 0.7918, "step": 6569 }, { - "epoch": 1.3776473055147829, - "grad_norm": 8.336345222891403, - "learning_rate": 1.1797882455869836e-05, - "loss": 0.8743, + "epoch": 0.9277040384072296, + "grad_norm": 3.5686752323456026, + "learning_rate": 1.6182316078830473e-05, + "loss": 1.0149, "step": 6570 }, { - "epoch": 1.3778569930803104, - "grad_norm": 8.12712759602131, - "learning_rate": 1.1795655414461112e-05, - "loss": 1.1835, + "epoch": 0.9278452414572155, + "grad_norm": 3.4501077651045313, + "learning_rate": 1.6181117793103623e-05, + "loss": 0.9554, "step": 6571 }, { - "epoch": 1.3780666806458377, - "grad_norm": 5.809284087732241, - "learning_rate": 1.1793428281021935e-05, - "loss": 0.7845, + "epoch": 0.9279864445072014, + "grad_norm": 3.1359683910892366, + "learning_rate": 1.617991936373027e-05, + "loss": 1.0575, "step": 6572 }, { - "epoch": 1.378276368211365, - "grad_norm": 5.6606781214139685, - "learning_rate": 1.1791201055666446e-05, - "loss": 0.6728, + "epoch": 0.9281276475571872, + "grad_norm": 3.8091972286309383, + "learning_rate": 1.6178720790738253e-05, + "loss": 1.1078, "step": 6573 }, { - "epoch": 1.3784860557768925, - "grad_norm": 6.066486406165189, - "learning_rate": 1.1788973738508797e-05, - "loss": 0.6765, + "epoch": 0.9282688506071731, + "grad_norm": 3.4005356649117537, + "learning_rate": 1.6177522074155436e-05, + "loss": 0.8075, "step": 6574 }, { - "epoch": 1.3786957433424198, - "grad_norm": 5.912181407690022, - "learning_rate": 1.1786746329663144e-05, - "loss": 0.8034, + "epoch": 0.928410053657159, + "grad_norm": 3.3953090304717635, + "learning_rate": 1.6176323214009673e-05, + "loss": 0.9872, "step": 6575 }, { - "epoch": 1.3789054309079471, - "grad_norm": 7.044504600913205, - "learning_rate": 1.1784518829243635e-05, - "loss": 0.7407, + "epoch": 0.9285512567071449, + "grad_norm": 3.145929491216068, + "learning_rate": 1.617512421032883e-05, + "loss": 0.7226, "step": 6576 }, { - "epoch": 1.3791151184734745, - "grad_norm": 7.5961482495600245, - "learning_rate": 1.1782291237364449e-05, - "loss": 0.945, + "epoch": 0.9286924597571308, + "grad_norm": 3.592347105746373, + "learning_rate": 1.6173925063140763e-05, + "loss": 1.0242, "step": 6577 }, { - "epoch": 1.3793248060390018, - "grad_norm": 7.352758266258367, - "learning_rate": 1.1780063554139742e-05, - "loss": 0.8457, + "epoch": 0.9288336628071167, + "grad_norm": 5.026814788563661, + "learning_rate": 1.6172725772473343e-05, + "loss": 1.2462, "step": 6578 }, { - "epoch": 1.3795344936045293, - "grad_norm": 6.1562648419925, - "learning_rate": 1.177783577968369e-05, - "loss": 0.6212, + "epoch": 0.9289748658571025, + "grad_norm": 3.428738721048319, + "learning_rate": 1.6171526338354447e-05, + "loss": 1.0621, "step": 6579 }, { - "epoch": 1.3797441811700566, - "grad_norm": 7.851459440698408, - "learning_rate": 1.1775607914110474e-05, - "loss": 0.9907, + "epoch": 0.9291160689070884, + "grad_norm": 4.152960011796144, + "learning_rate": 1.617032676081194e-05, + "loss": 0.9564, "step": 6580 }, { - "epoch": 1.379953868735584, - "grad_norm": 7.247724791776623, - "learning_rate": 1.1773379957534273e-05, - "loss": 0.7796, + "epoch": 0.9292572719570743, + "grad_norm": 3.188149118146041, + "learning_rate": 1.6169127039873705e-05, + "loss": 0.898, "step": 6581 }, { - "epoch": 1.3801635563011114, - "grad_norm": 7.191071626854504, - "learning_rate": 1.1771151910069267e-05, - "loss": 0.8473, + "epoch": 0.9293984750070602, + "grad_norm": 3.5968951619181566, + "learning_rate": 1.616792717556762e-05, + "loss": 1.1058, "step": 6582 }, { - "epoch": 1.3803732438666387, - "grad_norm": 6.3841339014981155, - "learning_rate": 1.176892377182966e-05, - "loss": 0.8311, + "epoch": 0.9295396780570461, + "grad_norm": 2.9634276321167383, + "learning_rate": 1.6166727167921574e-05, + "loss": 0.7907, "step": 6583 }, { - "epoch": 1.380582931432166, - "grad_norm": 6.818605530534439, - "learning_rate": 1.176669554292964e-05, - "loss": 0.9068, + "epoch": 0.929680881107032, + "grad_norm": 3.3421397472548584, + "learning_rate": 1.616552701696345e-05, + "loss": 1.1259, "step": 6584 }, { - "epoch": 1.3807926189976936, - "grad_norm": 5.85568504907392, - "learning_rate": 1.176446722348341e-05, - "loss": 0.6317, + "epoch": 0.9298220841570178, + "grad_norm": 4.063807513564883, + "learning_rate": 1.6164326722721143e-05, + "loss": 0.928, "step": 6585 }, { - "epoch": 1.3810023065632209, - "grad_norm": 6.8586862871137, - "learning_rate": 1.1762238813605171e-05, - "loss": 0.7116, + "epoch": 0.9299632872070037, + "grad_norm": 2.824623220074162, + "learning_rate": 1.616312628522254e-05, + "loss": 0.887, "step": 6586 }, { - "epoch": 1.3812119941287482, - "grad_norm": 6.846916365487637, - "learning_rate": 1.1760010313409143e-05, - "loss": 0.6638, + "epoch": 0.9301044902569896, + "grad_norm": 3.7609218569945027, + "learning_rate": 1.616192570449555e-05, + "loss": 1.0199, "step": 6587 }, { - "epoch": 1.3814216816942755, - "grad_norm": 7.163554757813141, - "learning_rate": 1.1757781723009525e-05, - "loss": 0.9004, + "epoch": 0.9302456933069755, + "grad_norm": 3.094255537332547, + "learning_rate": 1.6160724980568066e-05, + "loss": 0.7786, "step": 6588 }, { - "epoch": 1.3816313692598028, - "grad_norm": 6.731100352203539, - "learning_rate": 1.175555304252055e-05, - "loss": 0.7153, + "epoch": 0.9303868963569613, + "grad_norm": 3.3790308587174, + "learning_rate": 1.6159524113467994e-05, + "loss": 0.9376, "step": 6589 }, { - "epoch": 1.3818410568253303, - "grad_norm": 6.891538944043689, - "learning_rate": 1.1753324272056436e-05, - "loss": 0.8494, + "epoch": 0.9305280994069471, + "grad_norm": 3.6992299814511838, + "learning_rate": 1.615832310322324e-05, + "loss": 0.8449, "step": 6590 }, { - "epoch": 1.3820507443908576, - "grad_norm": 8.392547245159692, - "learning_rate": 1.1751095411731414e-05, - "loss": 0.8276, + "epoch": 0.930669302456933, + "grad_norm": 3.0140259491578045, + "learning_rate": 1.6157121949861716e-05, + "loss": 0.8915, "step": 6591 }, { - "epoch": 1.382260431956385, - "grad_norm": 8.85170138099449, - "learning_rate": 1.1748866461659713e-05, - "loss": 1.0985, + "epoch": 0.9308105055069189, + "grad_norm": 4.082198101389141, + "learning_rate": 1.615592065341134e-05, + "loss": 1.0802, "step": 6592 }, { - "epoch": 1.3824701195219125, - "grad_norm": 9.415444169015652, - "learning_rate": 1.1746637421955573e-05, - "loss": 1.0191, + "epoch": 0.9309517085569048, + "grad_norm": 4.0294258299787336, + "learning_rate": 1.6154719213900026e-05, + "loss": 1.2819, "step": 6593 }, { - "epoch": 1.3826798070874398, - "grad_norm": 8.414984492939231, - "learning_rate": 1.1744408292733236e-05, - "loss": 0.9688, + "epoch": 0.9310929116068907, + "grad_norm": 3.2170476712423595, + "learning_rate": 1.6153517631355696e-05, + "loss": 1.0005, "step": 6594 }, { - "epoch": 1.382889494652967, - "grad_norm": 6.15159211441699, - "learning_rate": 1.1742179074106948e-05, - "loss": 0.8505, + "epoch": 0.9312341146568766, + "grad_norm": 3.9953870540984426, + "learning_rate": 1.615231590580627e-05, + "loss": 1.5294, "step": 6595 }, { - "epoch": 1.3830991822184944, - "grad_norm": 7.311576724479214, - "learning_rate": 1.1739949766190959e-05, - "loss": 0.7363, + "epoch": 0.9313753177068624, + "grad_norm": 3.2808214715101602, + "learning_rate": 1.6151114037279682e-05, + "loss": 0.8664, "step": 6596 }, { - "epoch": 1.3833088697840217, - "grad_norm": 7.2252640343780365, - "learning_rate": 1.173772036909953e-05, - "loss": 0.7245, + "epoch": 0.9315165207568483, + "grad_norm": 3.018473763698476, + "learning_rate": 1.6149912025803858e-05, + "loss": 0.9352, "step": 6597 }, { - "epoch": 1.3835185573495492, - "grad_norm": 6.7671096570807805, - "learning_rate": 1.1735490882946918e-05, - "loss": 0.8324, + "epoch": 0.9316577238068342, + "grad_norm": 3.2547615575746933, + "learning_rate": 1.614870987140674e-05, + "loss": 0.846, "step": 6598 }, { - "epoch": 1.3837282449150765, - "grad_norm": 6.038666799327115, - "learning_rate": 1.1733261307847386e-05, - "loss": 0.8066, + "epoch": 0.9317989268568201, + "grad_norm": 3.2381716267024725, + "learning_rate": 1.6147507574116255e-05, + "loss": 1.0794, "step": 6599 }, { - "epoch": 1.3839379324806038, - "grad_norm": 6.93936333838456, - "learning_rate": 1.1731031643915207e-05, - "loss": 0.8516, + "epoch": 0.931940129906806, + "grad_norm": 3.1232228305841723, + "learning_rate": 1.614630513396035e-05, + "loss": 0.7491, "step": 6600 }, { - "epoch": 1.3841476200461313, - "grad_norm": 9.151150395658048, - "learning_rate": 1.1728801891264653e-05, - "loss": 1.1863, + "epoch": 0.9320813329567919, + "grad_norm": 3.3246207921196107, + "learning_rate": 1.6145102550966968e-05, + "loss": 0.9123, "step": 6601 }, { - "epoch": 1.3843573076116586, - "grad_norm": 6.433808575594778, - "learning_rate": 1.1726572050010003e-05, - "loss": 0.9425, + "epoch": 0.9322225360067777, + "grad_norm": 4.079148799336749, + "learning_rate": 1.6143899825164058e-05, + "loss": 1.056, "step": 6602 }, { - "epoch": 1.384566995177186, - "grad_norm": 6.586625059049893, - "learning_rate": 1.1724342120265543e-05, - "loss": 0.7479, + "epoch": 0.9323637390567636, + "grad_norm": 3.599630589151924, + "learning_rate": 1.614269695657957e-05, + "loss": 0.9798, "step": 6603 }, { - "epoch": 1.3847766827427135, - "grad_norm": 7.870109632890547, - "learning_rate": 1.172211210214556e-05, - "loss": 0.9992, + "epoch": 0.9325049421067495, + "grad_norm": 3.077999710278226, + "learning_rate": 1.6141493945241453e-05, + "loss": 0.8897, "step": 6604 }, { - "epoch": 1.3849863703082408, - "grad_norm": 6.727053483684335, - "learning_rate": 1.1719881995764339e-05, - "loss": 0.8071, + "epoch": 0.9326461451567354, + "grad_norm": 2.741628885574526, + "learning_rate": 1.614029079117767e-05, + "loss": 0.8615, "step": 6605 }, { - "epoch": 1.385196057873768, - "grad_norm": 6.434914581165896, - "learning_rate": 1.1717651801236189e-05, - "loss": 0.7525, + "epoch": 0.9327873482067213, + "grad_norm": 3.287507221927196, + "learning_rate": 1.6139087494416184e-05, + "loss": 0.9631, "step": 6606 }, { - "epoch": 1.3854057454392954, - "grad_norm": 7.616129780227017, - "learning_rate": 1.1715421518675403e-05, - "loss": 0.9002, + "epoch": 0.9329285512567072, + "grad_norm": 2.794249827723645, + "learning_rate": 1.6137884054984957e-05, + "loss": 0.7355, "step": 6607 }, { - "epoch": 1.3856154330048227, - "grad_norm": 7.488054598928749, - "learning_rate": 1.1713191148196288e-05, - "loss": 0.8735, + "epoch": 0.933069754306693, + "grad_norm": 3.5959268876182113, + "learning_rate": 1.613668047291195e-05, + "loss": 0.9246, "step": 6608 }, { - "epoch": 1.3858251205703502, - "grad_norm": 5.949504204393372, - "learning_rate": 1.1710960689913156e-05, - "loss": 0.5876, + "epoch": 0.9332109573566789, + "grad_norm": 3.0472050206252, + "learning_rate": 1.6135476748225144e-05, + "loss": 0.8533, "step": 6609 }, { - "epoch": 1.3860348081358775, - "grad_norm": 7.394034745091445, - "learning_rate": 1.1708730143940321e-05, - "loss": 0.8589, + "epoch": 0.9333521604066648, + "grad_norm": 3.442388327176033, + "learning_rate": 1.6134272880952506e-05, + "loss": 0.8853, "step": 6610 }, { - "epoch": 1.3862444957014048, - "grad_norm": 6.002640102669201, - "learning_rate": 1.1706499510392099e-05, - "loss": 0.6967, + "epoch": 0.9334933634566507, + "grad_norm": 3.972918683569106, + "learning_rate": 1.6133068871122014e-05, + "loss": 0.9759, "step": 6611 }, { - "epoch": 1.3864541832669324, - "grad_norm": 7.2500665976079945, - "learning_rate": 1.1704268789382824e-05, - "loss": 0.7438, + "epoch": 0.9336345665066366, + "grad_norm": 3.703257759697238, + "learning_rate": 1.613186471876165e-05, + "loss": 1.1206, "step": 6612 }, { - "epoch": 1.3866638708324597, - "grad_norm": 5.8573168399815, - "learning_rate": 1.1702037981026814e-05, - "loss": 0.6853, + "epoch": 0.9337757695566224, + "grad_norm": 3.9603337825891396, + "learning_rate": 1.6130660423899402e-05, + "loss": 1.1118, "step": 6613 }, { - "epoch": 1.386873558397987, - "grad_norm": 7.3844037753007425, - "learning_rate": 1.1699807085438406e-05, - "loss": 0.8578, + "epoch": 0.9339169726066083, + "grad_norm": 3.3795723024989948, + "learning_rate": 1.612945598656325e-05, + "loss": 1.1892, "step": 6614 }, { - "epoch": 1.3870832459635143, - "grad_norm": 6.815948282668088, - "learning_rate": 1.1697576102731936e-05, - "loss": 0.7754, + "epoch": 0.9340581756565942, + "grad_norm": 3.9905483845370537, + "learning_rate": 1.6128251406781192e-05, + "loss": 1.2366, "step": 6615 }, { - "epoch": 1.3872929335290416, - "grad_norm": 6.014188177500772, - "learning_rate": 1.1695345033021748e-05, - "loss": 0.6792, + "epoch": 0.9341993787065801, + "grad_norm": 3.329887909936065, + "learning_rate": 1.6127046684581212e-05, + "loss": 0.9167, "step": 6616 }, { - "epoch": 1.3875026210945691, - "grad_norm": 6.750373722780624, - "learning_rate": 1.1693113876422184e-05, - "loss": 0.8654, + "epoch": 0.934340581756566, + "grad_norm": 2.8474628022446113, + "learning_rate": 1.6125841819991318e-05, + "loss": 0.7879, "step": 6617 }, { - "epoch": 1.3877123086600964, - "grad_norm": 7.003411895232289, - "learning_rate": 1.16908826330476e-05, - "loss": 0.8274, + "epoch": 0.9344817848065519, + "grad_norm": 3.81014116857258, + "learning_rate": 1.6124636813039502e-05, + "loss": 1.3412, "step": 6618 }, { - "epoch": 1.3879219962256237, - "grad_norm": 7.2807614294050405, - "learning_rate": 1.1688651303012347e-05, - "loss": 0.9513, + "epoch": 0.9346229878565377, + "grad_norm": 3.7165276637482454, + "learning_rate": 1.6123431663753774e-05, + "loss": 1.1202, "step": 6619 }, { - "epoch": 1.3881316837911513, - "grad_norm": 8.022940085852035, - "learning_rate": 1.1686419886430787e-05, - "loss": 0.9033, + "epoch": 0.9347641909065236, + "grad_norm": 3.287506410672592, + "learning_rate": 1.6122226372162137e-05, + "loss": 0.9609, "step": 6620 }, { - "epoch": 1.3883413713566786, - "grad_norm": 8.701788482053715, - "learning_rate": 1.1684188383417281e-05, - "loss": 0.8921, + "epoch": 0.9349053939565095, + "grad_norm": 3.221201278525491, + "learning_rate": 1.6121020938292604e-05, + "loss": 1.0229, "step": 6621 }, { - "epoch": 1.3885510589222059, - "grad_norm": 5.80839781135677, - "learning_rate": 1.16819567940862e-05, - "loss": 0.7583, + "epoch": 0.9350465970064954, + "grad_norm": 3.4827038972567044, + "learning_rate": 1.6119815362173188e-05, + "loss": 1.0889, "step": 6622 }, { - "epoch": 1.3887607464877334, - "grad_norm": 6.441235085451961, - "learning_rate": 1.1679725118551915e-05, - "loss": 0.6999, + "epoch": 0.9351878000564812, + "grad_norm": 2.98283698591927, + "learning_rate": 1.6118609643831905e-05, + "loss": 0.8248, "step": 6623 }, { - "epoch": 1.3889704340532607, - "grad_norm": 8.155314715242946, - "learning_rate": 1.1677493356928803e-05, - "loss": 1.0398, + "epoch": 0.935329003106467, + "grad_norm": 3.7315759284376133, + "learning_rate": 1.6117403783296778e-05, + "loss": 1.1653, "step": 6624 }, { - "epoch": 1.389180121618788, - "grad_norm": 7.350547983014046, - "learning_rate": 1.1675261509331253e-05, - "loss": 0.9179, + "epoch": 0.9354702061564529, + "grad_norm": 2.486813716941703, + "learning_rate": 1.611619778059583e-05, + "loss": 0.7721, "step": 6625 }, { - "epoch": 1.3893898091843153, - "grad_norm": 7.870354504337803, - "learning_rate": 1.167302957587364e-05, - "loss": 0.98, + "epoch": 0.9356114092064388, + "grad_norm": 3.9327041639059077, + "learning_rate": 1.6114991635757085e-05, + "loss": 1.1281, "step": 6626 }, { - "epoch": 1.3895994967498426, - "grad_norm": 8.192615353854334, - "learning_rate": 1.1670797556670363e-05, - "loss": 0.8264, + "epoch": 0.9357526122564247, + "grad_norm": 3.8592349273098367, + "learning_rate": 1.611378534880857e-05, + "loss": 0.9629, "step": 6627 }, { - "epoch": 1.3898091843153701, - "grad_norm": 8.517072470381054, - "learning_rate": 1.1668565451835812e-05, - "loss": 1.1305, + "epoch": 0.9358938153064106, + "grad_norm": 3.346570976486084, + "learning_rate": 1.611257891977833e-05, + "loss": 1.0868, "step": 6628 }, { - "epoch": 1.3900188718808975, - "grad_norm": 6.9074091103139255, - "learning_rate": 1.1666333261484386e-05, - "loss": 0.8471, + "epoch": 0.9360350183563965, + "grad_norm": 3.110660704947061, + "learning_rate": 1.6111372348694397e-05, + "loss": 0.8131, "step": 6629 }, { - "epoch": 1.3902285594464248, - "grad_norm": 7.474998169210219, - "learning_rate": 1.1664100985730492e-05, - "loss": 0.7854, + "epoch": 0.9361762214063823, + "grad_norm": 3.0689548252960726, + "learning_rate": 1.6110165635584807e-05, + "loss": 0.9233, "step": 6630 }, { - "epoch": 1.3904382470119523, - "grad_norm": 7.572328663667642, - "learning_rate": 1.1661868624688535e-05, - "loss": 0.8249, + "epoch": 0.9363174244563682, + "grad_norm": 3.3533527982205085, + "learning_rate": 1.6108958780477607e-05, + "loss": 1.0977, "step": 6631 }, { - "epoch": 1.3906479345774796, - "grad_norm": 7.057146820761422, - "learning_rate": 1.1659636178472928e-05, - "loss": 0.7033, + "epoch": 0.9364586275063541, + "grad_norm": 3.038257348862183, + "learning_rate": 1.6107751783400845e-05, + "loss": 0.7936, "step": 6632 }, { - "epoch": 1.390857622143007, - "grad_norm": 6.370976195399844, - "learning_rate": 1.1657403647198091e-05, - "loss": 0.6608, + "epoch": 0.93659983055634, + "grad_norm": 3.2941898820665467, + "learning_rate": 1.6106544644382567e-05, + "loss": 0.9352, "step": 6633 }, { - "epoch": 1.3910673097085342, - "grad_norm": 6.567614297864201, - "learning_rate": 1.165517103097844e-05, - "loss": 0.7177, + "epoch": 0.9367410336063259, + "grad_norm": 3.484758876676196, + "learning_rate": 1.610533736345083e-05, + "loss": 1.0089, "step": 6634 }, { - "epoch": 1.3912769972740615, - "grad_norm": 7.143718740544215, - "learning_rate": 1.1652938329928405e-05, - "loss": 0.8184, + "epoch": 0.9368822366563118, + "grad_norm": 3.4283958984201943, + "learning_rate": 1.610412994063369e-05, + "loss": 0.9341, "step": 6635 }, { - "epoch": 1.391486684839589, - "grad_norm": 7.242472116809556, - "learning_rate": 1.1650705544162413e-05, - "loss": 0.9029, + "epoch": 0.9370234397062976, + "grad_norm": 4.385211688915032, + "learning_rate": 1.6102922375959204e-05, + "loss": 1.0819, "step": 6636 }, { - "epoch": 1.3916963724051163, - "grad_norm": 6.456009190251499, - "learning_rate": 1.1648472673794898e-05, - "loss": 0.8493, + "epoch": 0.9371646427562835, + "grad_norm": 3.6863060714029827, + "learning_rate": 1.6101714669455438e-05, + "loss": 1.0554, "step": 6637 }, { - "epoch": 1.3919060599706436, - "grad_norm": 7.517263882621033, - "learning_rate": 1.16462397189403e-05, - "loss": 0.8861, + "epoch": 0.9373058458062694, + "grad_norm": 4.0089603959322035, + "learning_rate": 1.6100506821150455e-05, + "loss": 1.2873, "step": 6638 }, { - "epoch": 1.3921157475361712, - "grad_norm": 8.784274984651768, - "learning_rate": 1.1644006679713063e-05, - "loss": 1.1381, + "epoch": 0.9374470488562553, + "grad_norm": 4.1208848745432665, + "learning_rate": 1.6099298831072334e-05, + "loss": 1.1831, "step": 6639 }, { - "epoch": 1.3923254351016985, - "grad_norm": 6.796901845056859, - "learning_rate": 1.1641773556227626e-05, - "loss": 0.7094, + "epoch": 0.9375882519062412, + "grad_norm": 2.8475719310319993, + "learning_rate": 1.6098090699249144e-05, + "loss": 0.7505, "step": 6640 }, { - "epoch": 1.3925351226672258, - "grad_norm": 5.872266529179356, - "learning_rate": 1.1639540348598455e-05, - "loss": 0.448, + "epoch": 0.9377294549562271, + "grad_norm": 4.080486384741495, + "learning_rate": 1.6096882425708953e-05, + "loss": 1.2425, "step": 6641 }, { - "epoch": 1.3927448102327533, - "grad_norm": 5.757750829161933, - "learning_rate": 1.1637307056939993e-05, - "loss": 0.7383, + "epoch": 0.9378706580062129, + "grad_norm": 3.8872234066593196, + "learning_rate": 1.609567401047985e-05, + "loss": 0.8701, "step": 6642 }, { - "epoch": 1.3929544977982806, - "grad_norm": 6.881902975592492, - "learning_rate": 1.1635073681366704e-05, - "loss": 0.7825, + "epoch": 0.9380118610561988, + "grad_norm": 3.4176709692163083, + "learning_rate": 1.6094465453589915e-05, + "loss": 0.9469, "step": 6643 }, { - "epoch": 1.393164185363808, - "grad_norm": 7.445904927085417, - "learning_rate": 1.1632840221993054e-05, - "loss": 0.898, + "epoch": 0.9381530641061847, + "grad_norm": 2.9601686480136684, + "learning_rate": 1.6093256755067236e-05, + "loss": 0.8684, "step": 6644 }, { - "epoch": 1.3933738729293352, - "grad_norm": 6.971941975998979, - "learning_rate": 1.1630606678933514e-05, - "loss": 0.8775, + "epoch": 0.9382942671561706, + "grad_norm": 3.3082696632840434, + "learning_rate": 1.60920479149399e-05, + "loss": 1.0592, "step": 6645 }, { - "epoch": 1.3935835604948625, - "grad_norm": 5.5757962412670725, - "learning_rate": 1.1628373052302546e-05, - "loss": 0.5841, + "epoch": 0.9384354702061565, + "grad_norm": 2.6254444362242153, + "learning_rate": 1.6090838933236004e-05, + "loss": 0.7735, "step": 6646 }, { - "epoch": 1.39379324806039, - "grad_norm": 10.42929692519301, - "learning_rate": 1.1626139342214642e-05, - "loss": 1.3279, + "epoch": 0.9385766732561424, + "grad_norm": 3.0839950407523267, + "learning_rate": 1.608962980998364e-05, + "loss": 0.9762, "step": 6647 }, { - "epoch": 1.3940029356259174, - "grad_norm": 7.391261008883606, - "learning_rate": 1.1623905548784274e-05, - "loss": 0.7357, + "epoch": 0.9387178763061282, + "grad_norm": 3.459557276954536, + "learning_rate": 1.608842054521091e-05, + "loss": 0.9345, "step": 6648 }, { - "epoch": 1.3942126231914447, - "grad_norm": 7.126826664677415, - "learning_rate": 1.1621671672125927e-05, - "loss": 0.7001, + "epoch": 0.9388590793561141, + "grad_norm": 3.5899919801696525, + "learning_rate": 1.6087211138945917e-05, + "loss": 0.9923, "step": 6649 }, { - "epoch": 1.3944223107569722, - "grad_norm": 7.708544415835774, - "learning_rate": 1.1619437712354098e-05, - "loss": 0.7852, + "epoch": 0.9390002824061, + "grad_norm": 3.950889396040656, + "learning_rate": 1.6086001591216764e-05, + "loss": 1.1762, "step": 6650 }, { - "epoch": 1.3946319983224995, - "grad_norm": 6.520850634050474, - "learning_rate": 1.1617203669583275e-05, - "loss": 0.6675, + "epoch": 0.9391414854560859, + "grad_norm": 3.7910557716480087, + "learning_rate": 1.6084791902051563e-05, + "loss": 1.0029, "step": 6651 }, { - "epoch": 1.3948416858880268, - "grad_norm": 6.448882571330285, - "learning_rate": 1.1614969543927958e-05, - "loss": 0.5892, + "epoch": 0.9392826885060718, + "grad_norm": 3.0713211144037684, + "learning_rate": 1.6083582071478424e-05, + "loss": 0.9519, "step": 6652 }, { - "epoch": 1.3950513734535541, - "grad_norm": 8.012185991687247, - "learning_rate": 1.1612735335502655e-05, - "loss": 1.0787, + "epoch": 0.9394238915560577, + "grad_norm": 3.52584341383269, + "learning_rate": 1.6082372099525464e-05, + "loss": 1.0988, "step": 6653 }, { - "epoch": 1.3952610610190816, - "grad_norm": 8.95374544308986, - "learning_rate": 1.1610501044421865e-05, - "loss": 0.7078, + "epoch": 0.9395650946060435, + "grad_norm": 2.9522503340859534, + "learning_rate": 1.6081161986220807e-05, + "loss": 0.8544, "step": 6654 }, { - "epoch": 1.395470748584609, - "grad_norm": 6.720733172879704, - "learning_rate": 1.1608266670800106e-05, - "loss": 0.7862, + "epoch": 0.9397062976560294, + "grad_norm": 3.226681077472121, + "learning_rate": 1.6079951731592573e-05, + "loss": 0.935, "step": 6655 }, { - "epoch": 1.3956804361501363, - "grad_norm": 6.60468007911462, - "learning_rate": 1.160603221475189e-05, - "loss": 0.7212, + "epoch": 0.9398475007060153, + "grad_norm": 3.0553486203415146, + "learning_rate": 1.6078741335668882e-05, + "loss": 0.7655, "step": 6656 }, { - "epoch": 1.3958901237156636, - "grad_norm": 7.410094855264841, - "learning_rate": 1.1603797676391735e-05, - "loss": 0.8174, + "epoch": 0.9399887037560011, + "grad_norm": 3.324879468485464, + "learning_rate": 1.607753079847787e-05, + "loss": 0.8258, "step": 6657 }, { - "epoch": 1.396099811281191, - "grad_norm": 7.267933478916165, - "learning_rate": 1.1601563055834174e-05, - "loss": 0.6736, + "epoch": 0.940129906805987, + "grad_norm": 3.3381348180705266, + "learning_rate": 1.6076320120047667e-05, + "loss": 0.977, "step": 6658 }, { - "epoch": 1.3963094988467184, - "grad_norm": 6.56079821418887, - "learning_rate": 1.1599328353193722e-05, - "loss": 0.6477, + "epoch": 0.9402711098559728, + "grad_norm": 3.8040267048022436, + "learning_rate": 1.6075109300406407e-05, + "loss": 1.3205, "step": 6659 }, { - "epoch": 1.3965191864122457, - "grad_norm": 8.05796146824699, - "learning_rate": 1.159709356858492e-05, - "loss": 0.8721, + "epoch": 0.9404123129059587, + "grad_norm": 3.1607199518088858, + "learning_rate": 1.607389833958223e-05, + "loss": 1.026, "step": 6660 }, { - "epoch": 1.3967288739777732, - "grad_norm": 8.210484700802608, - "learning_rate": 1.1594858702122303e-05, - "loss": 0.8318, + "epoch": 0.9405535159559446, + "grad_norm": 3.2102565232140483, + "learning_rate": 1.6072687237603283e-05, + "loss": 0.8745, "step": 6661 }, { - "epoch": 1.3969385615433005, - "grad_norm": 6.626723191904108, - "learning_rate": 1.1592623753920414e-05, - "loss": 0.768, + "epoch": 0.9406947190059305, + "grad_norm": 3.3262210983727445, + "learning_rate": 1.6071475994497702e-05, + "loss": 0.8807, "step": 6662 }, { - "epoch": 1.3971482491088278, - "grad_norm": 5.648183128524098, - "learning_rate": 1.1590388724093788e-05, - "loss": 0.5109, + "epoch": 0.9408359220559164, + "grad_norm": 3.1618972028901493, + "learning_rate": 1.6070264610293645e-05, + "loss": 0.9394, "step": 6663 }, { - "epoch": 1.3973579366743551, - "grad_norm": 6.6720359335313155, - "learning_rate": 1.158815361275699e-05, - "loss": 0.7033, + "epoch": 0.9409771251059023, + "grad_norm": 3.512876482071222, + "learning_rate": 1.6069053085019258e-05, + "loss": 1.2797, "step": 6664 }, { - "epoch": 1.3975676242398825, - "grad_norm": 6.644300058167578, - "learning_rate": 1.158591842002456e-05, - "loss": 0.791, + "epoch": 0.9411183281558881, + "grad_norm": 3.290273807703738, + "learning_rate": 1.6067841418702702e-05, + "loss": 1.057, "step": 6665 }, { - "epoch": 1.39777731180541, - "grad_norm": 6.99716113977253, - "learning_rate": 1.1583683146011062e-05, - "loss": 0.7895, + "epoch": 0.941259531205874, + "grad_norm": 3.308185624541523, + "learning_rate": 1.6066629611372127e-05, + "loss": 0.9493, "step": 6666 }, { - "epoch": 1.3979869993709373, - "grad_norm": 8.822283805176465, - "learning_rate": 1.1581447790831055e-05, - "loss": 1.0284, + "epoch": 0.9414007342558599, + "grad_norm": 3.2422475772554624, + "learning_rate": 1.60654176630557e-05, + "loss": 0.898, "step": 6667 }, { - "epoch": 1.3981966869364646, - "grad_norm": 7.471159556788238, - "learning_rate": 1.1579212354599107e-05, - "loss": 0.7165, + "epoch": 0.9415419373058458, + "grad_norm": 3.4866492215997145, + "learning_rate": 1.6064205573781587e-05, + "loss": 1.05, "step": 6668 }, { - "epoch": 1.3984063745019921, - "grad_norm": 5.996239090689492, - "learning_rate": 1.1576976837429783e-05, - "loss": 0.7642, + "epoch": 0.9416831403558317, + "grad_norm": 2.8983844958828526, + "learning_rate": 1.606299334357796e-05, + "loss": 0.9658, "step": 6669 }, { - "epoch": 1.3986160620675194, - "grad_norm": 7.690131599098741, - "learning_rate": 1.1574741239437666e-05, - "loss": 0.8927, + "epoch": 0.9418243434058176, + "grad_norm": 3.4328595192868456, + "learning_rate": 1.6061780972472978e-05, + "loss": 1.0506, "step": 6670 }, { - "epoch": 1.3988257496330467, - "grad_norm": 7.542280628270704, - "learning_rate": 1.1572505560737329e-05, - "loss": 0.8225, + "epoch": 0.9419655464558034, + "grad_norm": 3.0889144449357104, + "learning_rate": 1.6060568460494828e-05, + "loss": 1.02, "step": 6671 }, { - "epoch": 1.3990354371985743, - "grad_norm": 7.425715383220487, - "learning_rate": 1.1570269801443352e-05, - "loss": 0.8586, + "epoch": 0.9421067495057893, + "grad_norm": 3.03361620794687, + "learning_rate": 1.6059355807671683e-05, + "loss": 0.8275, "step": 6672 }, { - "epoch": 1.3992451247641016, - "grad_norm": 7.665122626708507, - "learning_rate": 1.1568033961670324e-05, - "loss": 0.7262, + "epoch": 0.9422479525557752, + "grad_norm": 2.884333698059003, + "learning_rate": 1.605814301403173e-05, + "loss": 0.7323, "step": 6673 }, { - "epoch": 1.3994548123296289, - "grad_norm": 7.027987050218133, - "learning_rate": 1.1565798041532836e-05, - "loss": 0.9316, + "epoch": 0.9423891556057611, + "grad_norm": 3.1836482184551316, + "learning_rate": 1.6056930079603144e-05, + "loss": 0.7934, "step": 6674 }, { - "epoch": 1.3996644998951562, - "grad_norm": 6.23455783090704, - "learning_rate": 1.1563562041145484e-05, - "loss": 0.5578, + "epoch": 0.942530358655747, + "grad_norm": 3.4125152825407725, + "learning_rate": 1.6055717004414125e-05, + "loss": 0.8653, "step": 6675 }, { - "epoch": 1.3998741874606835, - "grad_norm": 5.651723177620124, - "learning_rate": 1.1561325960622866e-05, - "loss": 0.6555, + "epoch": 0.9426715617057329, + "grad_norm": 2.910643880237899, + "learning_rate": 1.6054503788492852e-05, + "loss": 0.9476, "step": 6676 }, { - "epoch": 1.400083875026211, - "grad_norm": 7.631585497905252, - "learning_rate": 1.1559089800079584e-05, - "loss": 0.9897, + "epoch": 0.9428127647557187, + "grad_norm": 3.4209158245494056, + "learning_rate": 1.6053290431867528e-05, + "loss": 0.9221, "step": 6677 }, { - "epoch": 1.4002935625917383, - "grad_norm": 6.232764541741989, - "learning_rate": 1.1556853559630246e-05, - "loss": 0.7395, + "epoch": 0.9429539678057046, + "grad_norm": 3.6658283944741523, + "learning_rate": 1.605207693456635e-05, + "loss": 1.1199, "step": 6678 }, { - "epoch": 1.4005032501572656, - "grad_norm": 6.22801725460777, - "learning_rate": 1.155461723938946e-05, - "loss": 0.6478, + "epoch": 0.9430951708556905, + "grad_norm": 3.2051746205158427, + "learning_rate": 1.605086329661752e-05, + "loss": 0.8933, "step": 6679 }, { - "epoch": 1.4007129377227931, - "grad_norm": 5.925271108309575, - "learning_rate": 1.1552380839471847e-05, - "loss": 0.5511, + "epoch": 0.9432363739056764, + "grad_norm": 3.3420356761290653, + "learning_rate": 1.6049649518049234e-05, + "loss": 1.044, "step": 6680 }, { - "epoch": 1.4009226252883205, - "grad_norm": 6.257955492364735, - "learning_rate": 1.1550144359992022e-05, - "loss": 0.4483, + "epoch": 0.9433775769556623, + "grad_norm": 3.2460219685075087, + "learning_rate": 1.6048435598889708e-05, + "loss": 0.7345, "step": 6681 }, { - "epoch": 1.4011323128538478, - "grad_norm": 6.343931791855393, - "learning_rate": 1.154790780106461e-05, - "loss": 0.6076, + "epoch": 0.9435187800056481, + "grad_norm": 3.5539531333143897, + "learning_rate": 1.6047221539167152e-05, + "loss": 1.0141, "step": 6682 }, { - "epoch": 1.401342000419375, - "grad_norm": 8.031149173356003, - "learning_rate": 1.1545671162804238e-05, - "loss": 0.9635, + "epoch": 0.943659983055634, + "grad_norm": 3.392029939472976, + "learning_rate": 1.604600733890978e-05, + "loss": 1.0401, "step": 6683 }, { - "epoch": 1.4015516879849024, - "grad_norm": 8.257850143433433, - "learning_rate": 1.1543434445325539e-05, - "loss": 0.8289, + "epoch": 0.9438011861056199, + "grad_norm": 3.6183830643802763, + "learning_rate": 1.6044792998145804e-05, + "loss": 1.0269, "step": 6684 }, { - "epoch": 1.40176137555043, - "grad_norm": 6.436673506554482, - "learning_rate": 1.154119764874315e-05, - "loss": 0.7955, + "epoch": 0.9439423891556058, + "grad_norm": 3.7388635242574146, + "learning_rate": 1.6043578516903452e-05, + "loss": 1.0434, "step": 6685 }, { - "epoch": 1.4019710631159572, - "grad_norm": 7.388303940153177, - "learning_rate": 1.1538960773171703e-05, - "loss": 0.731, + "epoch": 0.9440835922055917, + "grad_norm": 2.765974676715772, + "learning_rate": 1.6042363895210948e-05, + "loss": 0.7987, "step": 6686 }, { - "epoch": 1.4021807506814845, - "grad_norm": 7.4479717084489145, - "learning_rate": 1.1536723818725855e-05, - "loss": 0.7709, + "epoch": 0.9442247952555776, + "grad_norm": 3.228900249498772, + "learning_rate": 1.6041149133096515e-05, + "loss": 0.995, "step": 6687 }, { - "epoch": 1.402390438247012, - "grad_norm": 7.986727123830757, - "learning_rate": 1.153448678552024e-05, - "loss": 0.7147, + "epoch": 0.9443659983055634, + "grad_norm": 3.8561738013689646, + "learning_rate": 1.6039934230588384e-05, + "loss": 1.168, "step": 6688 }, { - "epoch": 1.4026001258125393, - "grad_norm": 7.98665383696985, - "learning_rate": 1.1532249673669518e-05, - "loss": 1.04, + "epoch": 0.9445072013555493, + "grad_norm": 3.5186093249951895, + "learning_rate": 1.6038719187714788e-05, + "loss": 0.9663, "step": 6689 }, { - "epoch": 1.4028098133780667, - "grad_norm": 7.9263985683917015, - "learning_rate": 1.153001248328834e-05, - "loss": 0.8171, + "epoch": 0.9446484044055352, + "grad_norm": 3.5293143488948933, + "learning_rate": 1.6037504004503967e-05, + "loss": 1.1972, "step": 6690 }, { - "epoch": 1.4030195009435942, - "grad_norm": 8.234110955107315, - "learning_rate": 1.1527775214491373e-05, - "loss": 1.0076, + "epoch": 0.944789607455521, + "grad_norm": 3.222830060959618, + "learning_rate": 1.6036288680984164e-05, + "loss": 1.0092, "step": 6691 }, { - "epoch": 1.4032291885091215, - "grad_norm": 5.728570372407337, - "learning_rate": 1.1525537867393271e-05, - "loss": 0.7229, + "epoch": 0.9449308105055069, + "grad_norm": 3.3479942661906525, + "learning_rate": 1.6035073217183613e-05, + "loss": 0.7898, "step": 6692 }, { - "epoch": 1.4034388760746488, - "grad_norm": 7.338062605341393, - "learning_rate": 1.1523300442108712e-05, - "loss": 0.8714, + "epoch": 0.9450720135554928, + "grad_norm": 2.706742446356195, + "learning_rate": 1.6033857613130574e-05, + "loss": 0.7797, "step": 6693 }, { - "epoch": 1.403648563640176, - "grad_norm": 7.772863859547496, - "learning_rate": 1.1521062938752361e-05, - "loss": 0.814, + "epoch": 0.9452132166054786, + "grad_norm": 2.967255021079711, + "learning_rate": 1.6032641868853283e-05, + "loss": 0.8365, "step": 6694 }, { - "epoch": 1.4038582512057034, - "grad_norm": 9.347894945695758, - "learning_rate": 1.1518825357438899e-05, - "loss": 0.8665, + "epoch": 0.9453544196554645, + "grad_norm": 3.0158795341226474, + "learning_rate": 1.6031425984380006e-05, + "loss": 0.9347, "step": 6695 }, { - "epoch": 1.404067938771231, - "grad_norm": 7.686234776394881, - "learning_rate": 1.1516587698283e-05, - "loss": 0.7991, + "epoch": 0.9454956227054504, + "grad_norm": 3.418218520605631, + "learning_rate": 1.6030209959738988e-05, + "loss": 0.9387, "step": 6696 }, { - "epoch": 1.4042776263367582, - "grad_norm": 7.671059535136048, - "learning_rate": 1.1514349961399354e-05, - "loss": 1.0598, + "epoch": 0.9456368257554363, + "grad_norm": 3.6190250247360907, + "learning_rate": 1.60289937949585e-05, + "loss": 1.0718, "step": 6697 }, { - "epoch": 1.4044873139022855, - "grad_norm": 7.836100909154196, - "learning_rate": 1.151211214690264e-05, - "loss": 0.8187, + "epoch": 0.9457780288054222, + "grad_norm": 3.417781106338642, + "learning_rate": 1.6027777490066798e-05, + "loss": 0.8786, "step": 6698 }, { - "epoch": 1.404697001467813, - "grad_norm": 7.632462960110242, - "learning_rate": 1.1509874254907562e-05, - "loss": 0.8339, + "epoch": 0.945919231855408, + "grad_norm": 3.226895744446707, + "learning_rate": 1.602656104509215e-05, + "loss": 0.8636, "step": 6699 }, { - "epoch": 1.4049066890333404, - "grad_norm": 8.871719374825206, - "learning_rate": 1.150763628552881e-05, - "loss": 1.1294, + "epoch": 0.9460604349053939, + "grad_norm": 3.0532366793867434, + "learning_rate": 1.6025344460062826e-05, + "loss": 0.8661, "step": 6700 }, { - "epoch": 1.4051163765988677, - "grad_norm": 6.559798980788915, - "learning_rate": 1.1505398238881078e-05, - "loss": 0.6144, + "epoch": 0.9462016379553798, + "grad_norm": 3.5896744323056304, + "learning_rate": 1.60241277350071e-05, + "loss": 0.9442, "step": 6701 }, { - "epoch": 1.405326064164395, - "grad_norm": 8.17831443941465, - "learning_rate": 1.150316011507908e-05, - "loss": 0.8922, + "epoch": 0.9463428410053657, + "grad_norm": 3.5276638642262217, + "learning_rate": 1.6022910869953245e-05, + "loss": 0.9921, "step": 6702 }, { - "epoch": 1.4055357517299223, - "grad_norm": 8.11354201627378, - "learning_rate": 1.1500921914237516e-05, - "loss": 0.9922, + "epoch": 0.9464840440553516, + "grad_norm": 3.754744285092437, + "learning_rate": 1.6021693864929548e-05, + "loss": 1.206, "step": 6703 }, { - "epoch": 1.4057454392954498, - "grad_norm": 7.116864222188996, - "learning_rate": 1.1498683636471101e-05, - "loss": 0.656, + "epoch": 0.9466252471053375, + "grad_norm": 3.199563372734988, + "learning_rate": 1.602047671996428e-05, + "loss": 0.7907, "step": 6704 }, { - "epoch": 1.4059551268609771, - "grad_norm": 7.542254159001361, - "learning_rate": 1.1496445281894552e-05, - "loss": 0.916, + "epoch": 0.9467664501553233, + "grad_norm": 3.0184576091123363, + "learning_rate": 1.6019259435085733e-05, + "loss": 0.8096, "step": 6705 }, { - "epoch": 1.4061648144265044, - "grad_norm": 4.891630131140577, - "learning_rate": 1.1494206850622589e-05, - "loss": 0.5368, + "epoch": 0.9469076532053092, + "grad_norm": 3.161366852180515, + "learning_rate": 1.6018042010322197e-05, + "loss": 1.1012, "step": 6706 }, { - "epoch": 1.406374501992032, - "grad_norm": 6.93797773384164, - "learning_rate": 1.1491968342769931e-05, - "loss": 0.7061, + "epoch": 0.9470488562552951, + "grad_norm": 3.6676417078460988, + "learning_rate": 1.6016824445701965e-05, + "loss": 1.031, "step": 6707 }, { - "epoch": 1.4065841895575593, - "grad_norm": 7.017046514112816, - "learning_rate": 1.1489729758451306e-05, - "loss": 0.7606, + "epoch": 0.947190059305281, + "grad_norm": 3.417058895814996, + "learning_rate": 1.6015606741253334e-05, + "loss": 0.9663, "step": 6708 }, { - "epoch": 1.4067938771230866, - "grad_norm": 7.278615592127141, - "learning_rate": 1.148749109778145e-05, - "loss": 0.8107, + "epoch": 0.9473312623552669, + "grad_norm": 2.9297808756704122, + "learning_rate": 1.6014388897004595e-05, + "loss": 0.8103, "step": 6709 }, { - "epoch": 1.407003564688614, - "grad_norm": 8.159660732897715, - "learning_rate": 1.1485252360875093e-05, - "loss": 0.8696, + "epoch": 0.9474724654052528, + "grad_norm": 2.8256065991246504, + "learning_rate": 1.601317091298406e-05, + "loss": 0.8442, "step": 6710 }, { - "epoch": 1.4072132522541414, - "grad_norm": 7.977671974336682, - "learning_rate": 1.1483013547846975e-05, - "loss": 1.0339, + "epoch": 0.9476136684552386, + "grad_norm": 3.035086442580244, + "learning_rate": 1.6011952789220025e-05, + "loss": 0.8863, "step": 6711 }, { - "epoch": 1.4074229398196687, - "grad_norm": 6.6734354443972315, - "learning_rate": 1.1480774658811844e-05, - "loss": 0.6853, + "epoch": 0.9477548715052245, + "grad_norm": 3.2467716081350417, + "learning_rate": 1.601073452574081e-05, + "loss": 1.1156, "step": 6712 }, { - "epoch": 1.407632627385196, - "grad_norm": 7.180669595183709, - "learning_rate": 1.1478535693884441e-05, - "loss": 0.6942, + "epoch": 0.9478960745552104, + "grad_norm": 2.9998715551611137, + "learning_rate": 1.6009516122574717e-05, + "loss": 0.7638, "step": 6713 }, { - "epoch": 1.4078423149507233, - "grad_norm": 5.820651066616864, - "learning_rate": 1.1476296653179521e-05, - "loss": 0.7162, + "epoch": 0.9480372776051963, + "grad_norm": 2.9927676190915706, + "learning_rate": 1.6008297579750063e-05, + "loss": 0.8013, "step": 6714 }, { - "epoch": 1.4080520025162508, - "grad_norm": 6.75856456320359, - "learning_rate": 1.1474057536811836e-05, - "loss": 0.7588, + "epoch": 0.9481784806551822, + "grad_norm": 3.7753727829347907, + "learning_rate": 1.600707889729517e-05, + "loss": 0.9594, "step": 6715 }, { - "epoch": 1.4082616900817782, - "grad_norm": 6.8938669947813, - "learning_rate": 1.1471818344896148e-05, - "loss": 0.7462, + "epoch": 0.9483196837051681, + "grad_norm": 3.0565816641728967, + "learning_rate": 1.6005860075238358e-05, + "loss": 1.0416, "step": 6716 }, { - "epoch": 1.4084713776473055, - "grad_norm": 6.5945010223640415, - "learning_rate": 1.1469579077547215e-05, - "loss": 0.735, + "epoch": 0.9484608867551539, + "grad_norm": 3.365895551812851, + "learning_rate": 1.600464111360795e-05, + "loss": 1.1126, "step": 6717 }, { - "epoch": 1.408681065212833, - "grad_norm": 6.18346029394847, - "learning_rate": 1.1467339734879806e-05, - "loss": 0.6428, + "epoch": 0.9486020898051398, + "grad_norm": 3.2037498624525673, + "learning_rate": 1.6003422012432275e-05, + "loss": 1.0029, "step": 6718 }, { - "epoch": 1.4088907527783603, - "grad_norm": 6.683122716945392, - "learning_rate": 1.1465100317008689e-05, - "loss": 0.7731, + "epoch": 0.9487432928551257, + "grad_norm": 2.78030582086645, + "learning_rate": 1.6002202771739666e-05, + "loss": 0.8412, "step": 6719 }, { - "epoch": 1.4091004403438876, - "grad_norm": 6.039894996750581, - "learning_rate": 1.1462860824048643e-05, - "loss": 0.5133, + "epoch": 0.9488844959051116, + "grad_norm": 3.519099739407791, + "learning_rate": 1.6000983391558457e-05, + "loss": 0.9792, "step": 6720 }, { - "epoch": 1.409310127909415, - "grad_norm": 5.965861207019783, - "learning_rate": 1.1460621256114438e-05, - "loss": 0.5742, + "epoch": 0.9490256989550975, + "grad_norm": 3.4547811176230856, + "learning_rate": 1.5999763871916987e-05, + "loss": 1.0257, "step": 6721 }, { - "epoch": 1.4095198154749422, - "grad_norm": 7.801798550247179, - "learning_rate": 1.1458381613320863e-05, - "loss": 0.8277, + "epoch": 0.9491669020050834, + "grad_norm": 3.928588709125975, + "learning_rate": 1.5998544212843597e-05, + "loss": 1.1313, "step": 6722 }, { - "epoch": 1.4097295030404697, - "grad_norm": 6.304478333269873, - "learning_rate": 1.1456141895782701e-05, - "loss": 0.6967, + "epoch": 0.9493081050550692, + "grad_norm": 3.5481236859440477, + "learning_rate": 1.5997324414366626e-05, + "loss": 1.0001, "step": 6723 }, { - "epoch": 1.409939190605997, - "grad_norm": 6.188005309802785, - "learning_rate": 1.145390210361474e-05, - "loss": 0.6774, + "epoch": 0.9494493081050551, + "grad_norm": 3.5957404600538183, + "learning_rate": 1.5996104476514426e-05, + "loss": 1.1066, "step": 6724 }, { - "epoch": 1.4101488781715243, - "grad_norm": 6.169355337765976, - "learning_rate": 1.1451662236931775e-05, - "loss": 0.7409, + "epoch": 0.9495905111550409, + "grad_norm": 3.3883070319642385, + "learning_rate": 1.5994884399315348e-05, + "loss": 1.1193, "step": 6725 }, { - "epoch": 1.4103585657370519, - "grad_norm": 7.982775636463912, - "learning_rate": 1.1449422295848606e-05, - "loss": 1.0103, + "epoch": 0.9497317142050268, + "grad_norm": 3.4567736060702896, + "learning_rate": 1.5993664182797747e-05, + "loss": 1.1202, "step": 6726 }, { - "epoch": 1.4105682533025792, - "grad_norm": 8.892824784948115, - "learning_rate": 1.1447182280480024e-05, - "loss": 1.2514, + "epoch": 0.9498729172550127, + "grad_norm": 3.6762692009232993, + "learning_rate": 1.599244382698998e-05, + "loss": 1.2075, "step": 6727 }, { - "epoch": 1.4107779408681065, - "grad_norm": 6.370409539353546, - "learning_rate": 1.1444942190940846e-05, - "loss": 0.7717, + "epoch": 0.9500141203049985, + "grad_norm": 3.675974358282678, + "learning_rate": 1.59912233319204e-05, + "loss": 1.0842, "step": 6728 }, { - "epoch": 1.410987628433634, - "grad_norm": 6.8691096710535895, - "learning_rate": 1.1442702027345873e-05, - "loss": 0.8624, + "epoch": 0.9501553233549844, + "grad_norm": 3.4022117365793787, + "learning_rate": 1.5990002697617386e-05, + "loss": 0.8944, "step": 6729 }, { - "epoch": 1.4111973159991613, - "grad_norm": 6.516750276258667, - "learning_rate": 1.1440461789809919e-05, - "loss": 0.8874, + "epoch": 0.9502965264049703, + "grad_norm": 3.33084817505258, + "learning_rate": 1.5988781924109293e-05, + "loss": 1.0385, "step": 6730 }, { - "epoch": 1.4114070035646886, - "grad_norm": 6.928663245017479, - "learning_rate": 1.1438221478447798e-05, - "loss": 0.6907, + "epoch": 0.9504377294549562, + "grad_norm": 3.1293236362281576, + "learning_rate": 1.598756101142449e-05, + "loss": 0.8546, "step": 6731 }, { - "epoch": 1.411616691130216, - "grad_norm": 8.727437240819333, - "learning_rate": 1.143598109337433e-05, - "loss": 0.958, + "epoch": 0.9505789325049421, + "grad_norm": 3.11331648312038, + "learning_rate": 1.598633995959136e-05, + "loss": 0.8249, "step": 6732 }, { - "epoch": 1.4118263786957432, - "grad_norm": 7.3211381000363716, - "learning_rate": 1.1433740634704345e-05, - "loss": 0.928, + "epoch": 0.950720135554928, + "grad_norm": 3.152349150049387, + "learning_rate": 1.5985118768638276e-05, + "loss": 0.9512, "step": 6733 }, { - "epoch": 1.4120360662612708, - "grad_norm": 7.592545888747638, - "learning_rate": 1.1431500102552665e-05, - "loss": 0.9485, + "epoch": 0.9508613386049138, + "grad_norm": 2.7945221659294814, + "learning_rate": 1.5983897438593612e-05, + "loss": 0.853, "step": 6734 }, { - "epoch": 1.412245753826798, - "grad_norm": 7.8112810049339805, - "learning_rate": 1.142925949703412e-05, - "loss": 0.867, + "epoch": 0.9510025416548997, + "grad_norm": 3.8858451242695167, + "learning_rate": 1.5982675969485756e-05, + "loss": 1.0983, "step": 6735 }, { - "epoch": 1.4124554413923254, - "grad_norm": 8.289506472042069, - "learning_rate": 1.1427018818263546e-05, - "loss": 0.923, + "epoch": 0.9511437447048856, + "grad_norm": 2.8233957316869205, + "learning_rate": 1.5981454361343097e-05, + "loss": 0.7475, "step": 6736 }, { - "epoch": 1.412665128957853, - "grad_norm": 8.916790717111908, - "learning_rate": 1.1424778066355786e-05, - "loss": 1.0108, + "epoch": 0.9512849477548715, + "grad_norm": 3.2609576157697053, + "learning_rate": 1.5980232614194023e-05, + "loss": 0.8103, "step": 6737 }, { - "epoch": 1.4128748165233802, - "grad_norm": 6.348259163613662, - "learning_rate": 1.1422537241425677e-05, - "loss": 0.7022, + "epoch": 0.9514261508048574, + "grad_norm": 3.6054255120768683, + "learning_rate": 1.597901072806692e-05, + "loss": 1.1149, "step": 6738 }, { - "epoch": 1.4130845040889075, - "grad_norm": 6.536542319791054, - "learning_rate": 1.1420296343588067e-05, - "loss": 0.7582, + "epoch": 0.9515673538548433, + "grad_norm": 3.028609404345994, + "learning_rate": 1.5977788702990192e-05, + "loss": 0.8763, "step": 6739 }, { - "epoch": 1.4132941916544348, - "grad_norm": 6.943847595090378, - "learning_rate": 1.1418055372957806e-05, - "loss": 0.6813, + "epoch": 0.9517085569048291, + "grad_norm": 3.2838051152049257, + "learning_rate": 1.5976566538992237e-05, + "loss": 0.9289, "step": 6740 }, { - "epoch": 1.4135038792199621, - "grad_norm": 5.963084966633167, - "learning_rate": 1.1415814329649751e-05, - "loss": 0.6869, + "epoch": 0.951849759954815, + "grad_norm": 3.67397767400594, + "learning_rate": 1.597534423610146e-05, + "loss": 0.8777, "step": 6741 }, { - "epoch": 1.4137135667854897, - "grad_norm": 6.319514219174101, - "learning_rate": 1.141357321377875e-05, - "loss": 0.7139, + "epoch": 0.9519909630048009, + "grad_norm": 3.7955202689089167, + "learning_rate": 1.597412179434626e-05, + "loss": 1.2206, "step": 6742 }, { - "epoch": 1.413923254351017, - "grad_norm": 7.326589281094628, - "learning_rate": 1.1411332025459674e-05, - "loss": 0.9211, + "epoch": 0.9521321660547868, + "grad_norm": 3.23797944649157, + "learning_rate": 1.5972899213755047e-05, + "loss": 0.9903, "step": 6743 }, { - "epoch": 1.4141329419165443, - "grad_norm": 5.734591328297965, - "learning_rate": 1.140909076480738e-05, - "loss": 0.6822, + "epoch": 0.9522733691047727, + "grad_norm": 3.5213991069236985, + "learning_rate": 1.5971676494356237e-05, + "loss": 0.7797, "step": 6744 }, { - "epoch": 1.4143426294820718, - "grad_norm": 8.314877954884183, - "learning_rate": 1.1406849431936746e-05, - "loss": 1.0942, + "epoch": 0.9524145721547586, + "grad_norm": 3.2323595962664284, + "learning_rate": 1.5970453636178248e-05, + "loss": 1.017, "step": 6745 }, { - "epoch": 1.414552317047599, - "grad_norm": 7.305383448928347, - "learning_rate": 1.1404608026962635e-05, - "loss": 0.7597, + "epoch": 0.9525557752047444, + "grad_norm": 3.944320190742375, + "learning_rate": 1.5969230639249492e-05, + "loss": 1.001, "step": 6746 }, { - "epoch": 1.4147620046131264, - "grad_norm": 8.686699770359398, - "learning_rate": 1.1402366549999927e-05, - "loss": 1.0398, + "epoch": 0.9526969782547303, + "grad_norm": 2.962039286834128, + "learning_rate": 1.5968007503598397e-05, + "loss": 0.8269, "step": 6747 }, { - "epoch": 1.414971692178654, - "grad_norm": 6.371654515055473, - "learning_rate": 1.1400125001163503e-05, - "loss": 0.9606, + "epoch": 0.9528381813047162, + "grad_norm": 3.08786057862195, + "learning_rate": 1.596678422925338e-05, + "loss": 0.978, "step": 6748 }, { - "epoch": 1.4151813797441812, - "grad_norm": 6.075498973301181, - "learning_rate": 1.1397883380568242e-05, - "loss": 0.6941, + "epoch": 0.9529793843547021, + "grad_norm": 2.8344784056513173, + "learning_rate": 1.596556081624288e-05, + "loss": 0.8715, "step": 6749 }, { - "epoch": 1.4153910673097085, - "grad_norm": 5.84278018462633, - "learning_rate": 1.1395641688329031e-05, - "loss": 0.6959, + "epoch": 0.953120587404688, + "grad_norm": 3.534679241935083, + "learning_rate": 1.596433726459532e-05, + "loss": 0.9334, "step": 6750 }, { - "epoch": 1.4156007548752358, - "grad_norm": 7.199695647428923, - "learning_rate": 1.1393399924560769e-05, - "loss": 0.8448, + "epoch": 0.9532617904546739, + "grad_norm": 3.5766762057609385, + "learning_rate": 1.596311357433914e-05, + "loss": 1.1952, "step": 6751 }, { - "epoch": 1.4158104424407632, - "grad_norm": 6.503234197931621, - "learning_rate": 1.1391158089378341e-05, - "loss": 0.6662, + "epoch": 0.9534029935046597, + "grad_norm": 2.867566151961349, + "learning_rate": 1.5961889745502767e-05, + "loss": 0.8382, "step": 6752 }, { - "epoch": 1.4160201300062907, - "grad_norm": 7.327085431679781, - "learning_rate": 1.1388916182896645e-05, - "loss": 0.877, + "epoch": 0.9535441965546456, + "grad_norm": 4.406039812060257, + "learning_rate": 1.596066577811466e-05, + "loss": 1.151, "step": 6753 }, { - "epoch": 1.416229817571818, - "grad_norm": 6.245139488377247, - "learning_rate": 1.138667420523059e-05, - "loss": 0.704, + "epoch": 0.9536853996046315, + "grad_norm": 3.5751234630709696, + "learning_rate": 1.5959441672203254e-05, + "loss": 0.9983, "step": 6754 }, { - "epoch": 1.4164395051373453, - "grad_norm": 6.934263139837212, - "learning_rate": 1.1384432156495078e-05, - "loss": 0.9103, + "epoch": 0.9538266026546174, + "grad_norm": 3.2278906890611005, + "learning_rate": 1.5958217427796994e-05, + "loss": 0.8101, "step": 6755 }, { - "epoch": 1.4166491927028728, - "grad_norm": 7.046466192923788, - "learning_rate": 1.1382190036805012e-05, - "loss": 0.6756, + "epoch": 0.9539678057046033, + "grad_norm": 2.9200246756230728, + "learning_rate": 1.5956993044924334e-05, + "loss": 0.8703, "step": 6756 }, { - "epoch": 1.4168588802684001, - "grad_norm": 7.032428613400029, - "learning_rate": 1.1379947846275311e-05, - "loss": 0.7064, + "epoch": 0.9541090087545891, + "grad_norm": 3.595892262961553, + "learning_rate": 1.595576852361373e-05, + "loss": 0.9129, "step": 6757 }, { - "epoch": 1.4170685678339274, - "grad_norm": 5.801448440714787, - "learning_rate": 1.1377705585020891e-05, - "loss": 0.7222, + "epoch": 0.954250211804575, + "grad_norm": 3.1449596351379996, + "learning_rate": 1.5954543863893638e-05, + "loss": 0.8221, "step": 6758 }, { - "epoch": 1.4172782553994547, - "grad_norm": 7.167286371211719, - "learning_rate": 1.1375463253156669e-05, - "loss": 0.7781, + "epoch": 0.9543914148545608, + "grad_norm": 3.4367082936662827, + "learning_rate": 1.5953319065792516e-05, + "loss": 1.1315, "step": 6759 }, { - "epoch": 1.417487942964982, - "grad_norm": 6.907400720677501, - "learning_rate": 1.1373220850797571e-05, - "loss": 0.955, + "epoch": 0.9545326179045467, + "grad_norm": 3.2051265225121366, + "learning_rate": 1.5952094129338834e-05, + "loss": 0.9898, "step": 6760 }, { - "epoch": 1.4176976305305096, - "grad_norm": 6.467355626817842, - "learning_rate": 1.137097837805852e-05, - "loss": 0.7319, + "epoch": 0.9546738209545326, + "grad_norm": 3.5080836225380705, + "learning_rate": 1.595086905456105e-05, + "loss": 1.1654, "step": 6761 }, { - "epoch": 1.4179073180960369, - "grad_norm": 7.167739619716819, - "learning_rate": 1.1368735835054452e-05, - "loss": 0.7917, + "epoch": 0.9548150240045185, + "grad_norm": 3.864677702889774, + "learning_rate": 1.594964384148764e-05, + "loss": 1.1831, "step": 6762 }, { - "epoch": 1.4181170056615642, - "grad_norm": 6.551186382007419, - "learning_rate": 1.1366493221900297e-05, - "loss": 0.7372, + "epoch": 0.9549562270545043, + "grad_norm": 3.2950318257495756, + "learning_rate": 1.594841849014708e-05, + "loss": 1.008, "step": 6763 }, { - "epoch": 1.4183266932270917, - "grad_norm": 6.579719688252546, - "learning_rate": 1.1364250538710997e-05, - "loss": 0.7559, + "epoch": 0.9550974301044902, + "grad_norm": 3.266416286832824, + "learning_rate": 1.5947193000567844e-05, + "loss": 0.9798, "step": 6764 }, { - "epoch": 1.418536380792619, - "grad_norm": 6.222482984031967, - "learning_rate": 1.1362007785601486e-05, - "loss": 0.7952, + "epoch": 0.9552386331544761, + "grad_norm": 3.307313507320486, + "learning_rate": 1.5945967372778406e-05, + "loss": 1.1058, "step": 6765 }, { - "epoch": 1.4187460683581463, - "grad_norm": 7.59299136814988, - "learning_rate": 1.1359764962686717e-05, - "loss": 0.8762, + "epoch": 0.955379836204462, + "grad_norm": 3.29561805948912, + "learning_rate": 1.5944741606807257e-05, + "loss": 1.048, "step": 6766 }, { - "epoch": 1.4189557559236738, - "grad_norm": 6.6228444780873605, - "learning_rate": 1.1357522070081635e-05, - "loss": 0.7169, + "epoch": 0.9555210392544479, + "grad_norm": 3.205673217210416, + "learning_rate": 1.594351570268288e-05, + "loss": 1.048, "step": 6767 }, { - "epoch": 1.4191654434892012, - "grad_norm": 7.751878120518925, - "learning_rate": 1.1355279107901191e-05, - "loss": 0.7775, + "epoch": 0.9556622423044338, + "grad_norm": 3.4806223230940496, + "learning_rate": 1.5942289660433766e-05, + "loss": 0.9942, "step": 6768 }, { - "epoch": 1.4193751310547285, - "grad_norm": 6.496836818213145, - "learning_rate": 1.1353036076260343e-05, - "loss": 0.5969, + "epoch": 0.9558034453544196, + "grad_norm": 3.324422296548328, + "learning_rate": 1.5941063480088406e-05, + "loss": 0.9264, "step": 6769 }, { - "epoch": 1.4195848186202558, - "grad_norm": 7.605081227279532, - "learning_rate": 1.135079297527405e-05, - "loss": 0.7277, + "epoch": 0.9559446484044055, + "grad_norm": 3.25724625024677, + "learning_rate": 1.5939837161675297e-05, + "loss": 0.916, "step": 6770 }, { - "epoch": 1.419794506185783, - "grad_norm": 8.290769996194726, - "learning_rate": 1.1348549805057271e-05, - "loss": 0.893, + "epoch": 0.9560858514543914, + "grad_norm": 3.2390138304741565, + "learning_rate": 1.5938610705222936e-05, + "loss": 0.9942, "step": 6771 }, { - "epoch": 1.4200041937513106, - "grad_norm": 5.623628443157594, - "learning_rate": 1.134630656572498e-05, - "loss": 0.6528, + "epoch": 0.9562270545043773, + "grad_norm": 3.435879977823398, + "learning_rate": 1.5937384110759824e-05, + "loss": 1.0334, "step": 6772 }, { - "epoch": 1.420213881316838, - "grad_norm": 7.391427346614643, - "learning_rate": 1.1344063257392138e-05, - "loss": 0.6381, + "epoch": 0.9563682575543632, + "grad_norm": 2.4984399307293517, + "learning_rate": 1.5936157378314473e-05, + "loss": 0.7904, "step": 6773 }, { - "epoch": 1.4204235688823652, - "grad_norm": 6.226915171518154, - "learning_rate": 1.1341819880173727e-05, - "loss": 0.7045, + "epoch": 0.956509460604349, + "grad_norm": 2.781947933495412, + "learning_rate": 1.5934930507915386e-05, + "loss": 0.7486, "step": 6774 }, { - "epoch": 1.4206332564478927, - "grad_norm": 7.79484729066865, - "learning_rate": 1.1339576434184718e-05, - "loss": 0.8221, + "epoch": 0.9566506636543349, + "grad_norm": 3.1584283741458976, + "learning_rate": 1.5933703499591082e-05, + "loss": 0.9675, "step": 6775 }, { - "epoch": 1.42084294401342, - "grad_norm": 7.988799461913488, - "learning_rate": 1.1337332919540094e-05, - "loss": 0.9877, + "epoch": 0.9567918667043208, + "grad_norm": 3.0449649968016734, + "learning_rate": 1.5932476353370068e-05, + "loss": 0.9858, "step": 6776 }, { - "epoch": 1.4210526315789473, - "grad_norm": 7.338870329231204, - "learning_rate": 1.133508933635484e-05, - "loss": 0.7249, + "epoch": 0.9569330697543067, + "grad_norm": 2.9607852389209657, + "learning_rate": 1.5931249069280866e-05, + "loss": 0.7486, "step": 6777 }, { - "epoch": 1.4212623191444747, - "grad_norm": 7.466283641881016, - "learning_rate": 1.133284568474394e-05, - "loss": 0.837, + "epoch": 0.9570742728042926, + "grad_norm": 3.6066535603275143, + "learning_rate": 1.5930021647351997e-05, + "loss": 1.2387, "step": 6778 }, { - "epoch": 1.4214720067100022, - "grad_norm": 7.317575295886132, - "learning_rate": 1.1330601964822385e-05, - "loss": 0.8228, + "epoch": 0.9572154758542785, + "grad_norm": 4.13675172208724, + "learning_rate": 1.5928794087611988e-05, + "loss": 1.1815, "step": 6779 }, { - "epoch": 1.4216816942755295, - "grad_norm": 6.930856716815753, - "learning_rate": 1.1328358176705174e-05, - "loss": 0.7348, + "epoch": 0.9573566789042643, + "grad_norm": 2.847477817893974, + "learning_rate": 1.5927566390089362e-05, + "loss": 0.8239, "step": 6780 }, { - "epoch": 1.4218913818410568, - "grad_norm": 8.411142307383761, - "learning_rate": 1.1326114320507303e-05, - "loss": 0.863, + "epoch": 0.9574978819542502, + "grad_norm": 3.360918265216355, + "learning_rate": 1.5926338554812653e-05, + "loss": 0.7735, "step": 6781 }, { - "epoch": 1.422101069406584, - "grad_norm": 6.016921555142951, - "learning_rate": 1.132387039634377e-05, - "loss": 0.6091, + "epoch": 0.9576390850042361, + "grad_norm": 3.5124256924633612, + "learning_rate": 1.5925110581810396e-05, + "loss": 1.1325, "step": 6782 }, { - "epoch": 1.4223107569721116, - "grad_norm": 6.512092902098799, - "learning_rate": 1.1321626404329585e-05, - "loss": 0.7196, + "epoch": 0.957780288054222, + "grad_norm": 3.5172408555891796, + "learning_rate": 1.592388247111113e-05, + "loss": 1.0073, "step": 6783 }, { - "epoch": 1.422520444537639, - "grad_norm": 7.065472327619744, - "learning_rate": 1.1319382344579755e-05, - "loss": 0.7822, + "epoch": 0.9579214911042079, + "grad_norm": 4.034763736468467, + "learning_rate": 1.592265422274339e-05, + "loss": 1.087, "step": 6784 }, { - "epoch": 1.4227301321031662, - "grad_norm": 8.793812604725721, - "learning_rate": 1.1317138217209291e-05, - "loss": 0.8931, + "epoch": 0.9580626941541938, + "grad_norm": 3.7955639946441897, + "learning_rate": 1.5921425836735725e-05, + "loss": 1.1642, "step": 6785 }, { - "epoch": 1.4229398196686938, - "grad_norm": 6.323260672700587, - "learning_rate": 1.1314894022333212e-05, - "loss": 0.6437, + "epoch": 0.9582038972041796, + "grad_norm": 3.1893957637619863, + "learning_rate": 1.5920197313116682e-05, + "loss": 0.8976, "step": 6786 }, { - "epoch": 1.423149507234221, - "grad_norm": 7.090874068478431, - "learning_rate": 1.131264976006653e-05, - "loss": 0.728, + "epoch": 0.9583451002541655, + "grad_norm": 3.877590000041149, + "learning_rate": 1.591896865191481e-05, + "loss": 0.9861, "step": 6787 }, { - "epoch": 1.4233591947997484, - "grad_norm": 8.311724824048913, - "learning_rate": 1.1310405430524272e-05, - "loss": 0.9696, + "epoch": 0.9584863033041514, + "grad_norm": 3.7872837437015408, + "learning_rate": 1.591773985315866e-05, + "loss": 0.9501, "step": 6788 }, { - "epoch": 1.4235688823652757, - "grad_norm": 7.316499776186425, - "learning_rate": 1.1308161033821461e-05, - "loss": 0.7765, + "epoch": 0.9586275063541373, + "grad_norm": 2.8069601379982436, + "learning_rate": 1.5916510916876794e-05, + "loss": 0.7218, "step": 6789 }, { - "epoch": 1.423778569930803, - "grad_norm": 6.93233136223138, - "learning_rate": 1.130591657007313e-05, - "loss": 0.7739, + "epoch": 0.9587687094041232, + "grad_norm": 4.518970237798156, + "learning_rate": 1.591528184309777e-05, + "loss": 1.5069, "step": 6790 }, { - "epoch": 1.4239882574963305, - "grad_norm": 8.415228827715094, - "learning_rate": 1.130367203939431e-05, - "loss": 0.9769, + "epoch": 0.9589099124541091, + "grad_norm": 3.404808765362731, + "learning_rate": 1.591405263185015e-05, + "loss": 1.1348, "step": 6791 }, { - "epoch": 1.4241979450618578, - "grad_norm": 7.746226707073446, - "learning_rate": 1.1301427441900035e-05, - "loss": 0.9966, + "epoch": 0.9590511155040949, + "grad_norm": 3.2324040040428614, + "learning_rate": 1.59128232831625e-05, + "loss": 0.9001, "step": 6792 }, { - "epoch": 1.4244076326273851, - "grad_norm": 7.013120406748664, - "learning_rate": 1.1299182777705351e-05, - "loss": 0.7732, + "epoch": 0.9591923185540807, + "grad_norm": 3.587905717034151, + "learning_rate": 1.591159379706339e-05, + "loss": 1.123, "step": 6793 }, { - "epoch": 1.4246173201929127, - "grad_norm": 6.547606610261444, - "learning_rate": 1.1296938046925289e-05, - "loss": 0.8093, + "epoch": 0.9593335216040666, + "grad_norm": 2.9311800938656, + "learning_rate": 1.5910364173581395e-05, + "loss": 0.7579, "step": 6794 }, { - "epoch": 1.42482700775844, - "grad_norm": 7.424236635872814, - "learning_rate": 1.129469324967491e-05, - "loss": 0.8441, + "epoch": 0.9594747246540525, + "grad_norm": 3.4258164922055507, + "learning_rate": 1.5909134412745087e-05, + "loss": 0.995, "step": 6795 }, { - "epoch": 1.4250366953239673, - "grad_norm": 7.354512909720858, - "learning_rate": 1.1292448386069252e-05, - "loss": 0.7541, + "epoch": 0.9596159277040384, + "grad_norm": 3.2619321189690305, + "learning_rate": 1.5907904514583047e-05, + "loss": 0.8399, "step": 6796 }, { - "epoch": 1.4252463828894948, - "grad_norm": 6.068157592023968, - "learning_rate": 1.1290203456223374e-05, - "loss": 0.6598, + "epoch": 0.9597571307540242, + "grad_norm": 3.236394450637207, + "learning_rate": 1.590667447912386e-05, + "loss": 0.9869, "step": 6797 }, { - "epoch": 1.425456070455022, - "grad_norm": 8.365753287769886, - "learning_rate": 1.1287958460252331e-05, - "loss": 0.8944, + "epoch": 0.9598983338040101, + "grad_norm": 3.177077362516382, + "learning_rate": 1.590544430639611e-05, + "loss": 0.9513, "step": 6798 }, { - "epoch": 1.4256657580205494, - "grad_norm": 6.919343432815605, - "learning_rate": 1.1285713398271185e-05, - "loss": 0.6958, + "epoch": 0.960039536853996, + "grad_norm": 4.065562860549573, + "learning_rate": 1.590421399642838e-05, + "loss": 1.275, "step": 6799 }, { - "epoch": 1.4258754455860767, - "grad_norm": 8.196646092298492, - "learning_rate": 1.1283468270394993e-05, - "loss": 1.1285, + "epoch": 0.9601807399039819, + "grad_norm": 3.2497034351080276, + "learning_rate": 1.5902983549249272e-05, + "loss": 1.1588, "step": 6800 }, { - "epoch": 1.426085133151604, - "grad_norm": 7.218896919051745, - "learning_rate": 1.128122307673883e-05, - "loss": 0.8807, + "epoch": 0.9603219429539678, + "grad_norm": 3.3055289396953778, + "learning_rate": 1.5901752964887373e-05, + "loss": 0.7902, "step": 6801 }, { - "epoch": 1.4262948207171315, - "grad_norm": 7.785695421465841, - "learning_rate": 1.1278977817417762e-05, - "loss": 0.8562, + "epoch": 0.9604631460039537, + "grad_norm": 3.4158928668264985, + "learning_rate": 1.5900522243371283e-05, + "loss": 0.9128, "step": 6802 }, { - "epoch": 1.4265045082826588, - "grad_norm": 6.778392624836859, - "learning_rate": 1.127673249254686e-05, - "loss": 0.7605, + "epoch": 0.9606043490539395, + "grad_norm": 3.4712116606356336, + "learning_rate": 1.5899291384729606e-05, + "loss": 1.2385, "step": 6803 }, { - "epoch": 1.4267141958481862, - "grad_norm": 7.101900565039615, - "learning_rate": 1.1274487102241205e-05, - "loss": 0.6591, + "epoch": 0.9607455521039254, + "grad_norm": 3.6750775894209404, + "learning_rate": 1.589806038899094e-05, + "loss": 1.0007, "step": 6804 }, { - "epoch": 1.4269238834137137, - "grad_norm": 8.561583302191645, - "learning_rate": 1.1272241646615874e-05, - "loss": 1.1022, + "epoch": 0.9608867551539113, + "grad_norm": 3.057119133281129, + "learning_rate": 1.5896829256183905e-05, + "loss": 0.8873, "step": 6805 }, { - "epoch": 1.427133570979241, - "grad_norm": 6.15805347737487, - "learning_rate": 1.1269996125785954e-05, - "loss": 0.7484, + "epoch": 0.9610279582038972, + "grad_norm": 3.136489052840755, + "learning_rate": 1.58955979863371e-05, + "loss": 0.8882, "step": 6806 }, { - "epoch": 1.4273432585447683, - "grad_norm": 5.717574163656914, - "learning_rate": 1.1267750539866531e-05, - "loss": 0.5766, + "epoch": 0.9611691612538831, + "grad_norm": 3.6939306596756376, + "learning_rate": 1.5894366579479144e-05, + "loss": 0.9684, "step": 6807 }, { - "epoch": 1.4275529461102956, - "grad_norm": 8.552316564846208, - "learning_rate": 1.126550488897269e-05, - "loss": 0.6417, + "epoch": 0.961310364303869, + "grad_norm": 2.9173679167197206, + "learning_rate": 1.5893135035638658e-05, + "loss": 0.9104, "step": 6808 }, { - "epoch": 1.427762633675823, - "grad_norm": 6.416222677272059, - "learning_rate": 1.1263259173219533e-05, - "loss": 0.9058, + "epoch": 0.9614515673538548, + "grad_norm": 2.8311280096775353, + "learning_rate": 1.5891903354844258e-05, + "loss": 0.8049, "step": 6809 }, { - "epoch": 1.4279723212413504, - "grad_norm": 7.061573179795442, - "learning_rate": 1.1261013392722149e-05, - "loss": 0.9154, + "epoch": 0.9615927704038407, + "grad_norm": 3.573897937300494, + "learning_rate": 1.5890671537124565e-05, + "loss": 0.9463, "step": 6810 }, { - "epoch": 1.4281820088068777, - "grad_norm": 7.211097942811558, - "learning_rate": 1.1258767547595641e-05, - "loss": 0.9448, + "epoch": 0.9617339734538266, + "grad_norm": 3.0231796177610146, + "learning_rate": 1.588943958250821e-05, + "loss": 0.7729, "step": 6811 }, { - "epoch": 1.428391696372405, - "grad_norm": 7.471317820971958, - "learning_rate": 1.1256521637955116e-05, - "loss": 0.9165, + "epoch": 0.9618751765038125, + "grad_norm": 3.0563598004731216, + "learning_rate": 1.5888207491023824e-05, + "loss": 0.9399, "step": 6812 }, { - "epoch": 1.4286013839379326, - "grad_norm": 6.838154164727039, - "learning_rate": 1.1254275663915674e-05, - "loss": 0.7439, + "epoch": 0.9620163795537984, + "grad_norm": 3.915954008629436, + "learning_rate": 1.588697526270004e-05, + "loss": 1.0972, "step": 6813 }, { - "epoch": 1.4288110715034599, - "grad_norm": 6.269883511501781, - "learning_rate": 1.125202962559243e-05, - "loss": 0.857, + "epoch": 0.9621575826037843, + "grad_norm": 3.571242425262027, + "learning_rate": 1.5885742897565494e-05, + "loss": 0.9581, "step": 6814 }, { - "epoch": 1.4290207590689872, - "grad_norm": 7.609776467570679, - "learning_rate": 1.12497835231005e-05, - "loss": 0.8754, + "epoch": 0.9622987856537701, + "grad_norm": 3.425426153330954, + "learning_rate": 1.5884510395648823e-05, + "loss": 1.0471, "step": 6815 }, { - "epoch": 1.4292304466345147, - "grad_norm": 6.48014643829328, - "learning_rate": 1.1247537356554994e-05, - "loss": 0.8153, + "epoch": 0.962439988703756, + "grad_norm": 2.854683644645683, + "learning_rate": 1.588327775697867e-05, + "loss": 0.792, "step": 6816 }, { - "epoch": 1.429440134200042, - "grad_norm": 7.295291520523818, - "learning_rate": 1.1245291126071035e-05, - "loss": 0.8505, + "epoch": 0.9625811917537419, + "grad_norm": 4.078296539366986, + "learning_rate": 1.5882044981583685e-05, + "loss": 1.141, "step": 6817 }, { - "epoch": 1.4296498217655693, - "grad_norm": 6.889204933847784, - "learning_rate": 1.1243044831763744e-05, - "loss": 0.7721, + "epoch": 0.9627223948037278, + "grad_norm": 3.8403016607712845, + "learning_rate": 1.5880812069492516e-05, + "loss": 0.9468, "step": 6818 }, { - "epoch": 1.4298595093310966, - "grad_norm": 8.469373231386127, - "learning_rate": 1.1240798473748252e-05, - "loss": 0.999, + "epoch": 0.9628635978537137, + "grad_norm": 3.7463268626977646, + "learning_rate": 1.5879579020733814e-05, + "loss": 1.0548, "step": 6819 }, { - "epoch": 1.430069196896624, - "grad_norm": 6.509745627852387, - "learning_rate": 1.1238552052139684e-05, - "loss": 0.745, + "epoch": 0.9630048009036996, + "grad_norm": 2.8867767484420708, + "learning_rate": 1.5878345835336232e-05, + "loss": 0.8262, "step": 6820 }, { - "epoch": 1.4302788844621515, - "grad_norm": 8.08729757098183, - "learning_rate": 1.1236305567053176e-05, - "loss": 0.8381, + "epoch": 0.9631460039536854, + "grad_norm": 3.9045870175460133, + "learning_rate": 1.587711251332843e-05, + "loss": 1.2583, "step": 6821 }, { - "epoch": 1.4304885720276788, - "grad_norm": 6.463844679219682, - "learning_rate": 1.1234059018603867e-05, - "loss": 0.7346, + "epoch": 0.9632872070036713, + "grad_norm": 3.310821321982624, + "learning_rate": 1.5875879054739075e-05, + "loss": 0.9406, "step": 6822 }, { - "epoch": 1.430698259593206, - "grad_norm": 6.506432636843981, - "learning_rate": 1.1231812406906885e-05, - "loss": 0.7153, + "epoch": 0.9634284100536572, + "grad_norm": 3.948061844915971, + "learning_rate": 1.587464545959683e-05, + "loss": 0.9872, "step": 6823 }, { - "epoch": 1.4309079471587336, - "grad_norm": 8.201294357528045, - "learning_rate": 1.122956573207739e-05, - "loss": 0.9634, + "epoch": 0.9635696131036431, + "grad_norm": 3.3474727001525486, + "learning_rate": 1.587341172793036e-05, + "loss": 1.1394, "step": 6824 }, { - "epoch": 1.431117634724261, - "grad_norm": 7.680738299593677, - "learning_rate": 1.1227318994230512e-05, - "loss": 0.9054, + "epoch": 0.963710816153629, + "grad_norm": 2.9843997917899645, + "learning_rate": 1.5872177859768336e-05, + "loss": 0.8646, "step": 6825 }, { - "epoch": 1.4313273222897882, - "grad_norm": 5.994903141195533, - "learning_rate": 1.122507219348141e-05, - "loss": 0.7382, + "epoch": 0.9638520192036149, + "grad_norm": 4.043080962413109, + "learning_rate": 1.5870943855139437e-05, + "loss": 1.2428, "step": 6826 }, { - "epoch": 1.4315370098553155, - "grad_norm": 5.707624272431289, - "learning_rate": 1.122282532994523e-05, - "loss": 0.7145, + "epoch": 0.9639932222536006, + "grad_norm": 2.927053804436132, + "learning_rate": 1.5869709714072335e-05, + "loss": 1.0471, "step": 6827 }, { - "epoch": 1.4317466974208428, - "grad_norm": 7.897009353019369, - "learning_rate": 1.1220578403737137e-05, - "loss": 0.89, + "epoch": 0.9641344253035865, + "grad_norm": 3.5492530662742485, + "learning_rate": 1.5868475436595713e-05, + "loss": 1.1749, "step": 6828 }, { - "epoch": 1.4319563849863703, - "grad_norm": 6.319534739876409, - "learning_rate": 1.1218331414972277e-05, - "loss": 0.7071, + "epoch": 0.9642756283535724, + "grad_norm": 3.102729490915012, + "learning_rate": 1.5867241022738262e-05, + "loss": 0.9967, "step": 6829 }, { - "epoch": 1.4321660725518977, - "grad_norm": 5.15359936519419, - "learning_rate": 1.1216084363765825e-05, - "loss": 0.6287, + "epoch": 0.9644168314035583, + "grad_norm": 3.516904227721121, + "learning_rate": 1.586600647252866e-05, + "loss": 1.0932, "step": 6830 }, { - "epoch": 1.432375760117425, - "grad_norm": 6.737012297382711, - "learning_rate": 1.1213837250232935e-05, - "loss": 0.7068, + "epoch": 0.9645580344535442, + "grad_norm": 3.3609921418556965, + "learning_rate": 1.5864771785995602e-05, + "loss": 1.0374, "step": 6831 }, { - "epoch": 1.4325854476829525, - "grad_norm": 7.5346853377034675, - "learning_rate": 1.1211590074488783e-05, - "loss": 0.8438, + "epoch": 0.96469923750353, + "grad_norm": 3.276700813425036, + "learning_rate": 1.586353696316778e-05, + "loss": 1.0624, "step": 6832 }, { - "epoch": 1.4327951352484798, - "grad_norm": 7.816487233258562, - "learning_rate": 1.1209342836648538e-05, - "loss": 1.0707, + "epoch": 0.9648404405535159, + "grad_norm": 3.222746563496837, + "learning_rate": 1.586230200407389e-05, + "loss": 1.0355, "step": 6833 }, { - "epoch": 1.433004822814007, - "grad_norm": 6.861830249273758, - "learning_rate": 1.1207095536827376e-05, - "loss": 0.9528, + "epoch": 0.9649816436035018, + "grad_norm": 3.5511795942251068, + "learning_rate": 1.5861066908742638e-05, + "loss": 1.0672, "step": 6834 }, { - "epoch": 1.4332145103795346, - "grad_norm": 8.068642141290168, - "learning_rate": 1.1204848175140472e-05, - "loss": 0.8905, + "epoch": 0.9651228466534877, + "grad_norm": 2.9941926665216476, + "learning_rate": 1.5859831677202718e-05, + "loss": 0.8671, "step": 6835 }, { - "epoch": 1.433424197945062, - "grad_norm": 5.713705973028899, - "learning_rate": 1.1202600751703013e-05, - "loss": 0.5972, + "epoch": 0.9652640497034736, + "grad_norm": 3.070440140597219, + "learning_rate": 1.585859630948284e-05, + "loss": 0.9123, "step": 6836 }, { - "epoch": 1.4336338855105892, - "grad_norm": 7.042698191155072, - "learning_rate": 1.1200353266630173e-05, - "loss": 0.887, + "epoch": 0.9654052527534595, + "grad_norm": 3.7192103244389427, + "learning_rate": 1.5857360805611717e-05, + "loss": 0.9213, "step": 6837 }, { - "epoch": 1.4338435730761165, - "grad_norm": 8.732268642872002, - "learning_rate": 1.1198105720037154e-05, - "loss": 1.1082, + "epoch": 0.9655464558034453, + "grad_norm": 3.523345687234521, + "learning_rate": 1.5856125165618056e-05, + "loss": 1.1936, "step": 6838 }, { - "epoch": 1.4340532606416438, - "grad_norm": 6.380987528333441, - "learning_rate": 1.1195858112039136e-05, - "loss": 0.7097, + "epoch": 0.9656876588534312, + "grad_norm": 3.4636091709232892, + "learning_rate": 1.5854889389530577e-05, + "loss": 1.2216, "step": 6839 }, { - "epoch": 1.4342629482071714, - "grad_norm": 8.448333331418867, - "learning_rate": 1.1193610442751313e-05, - "loss": 0.833, + "epoch": 0.9658288619034171, + "grad_norm": 3.5887460992891738, + "learning_rate": 1.5853653477377996e-05, + "loss": 0.9244, "step": 6840 }, { - "epoch": 1.4344726357726987, - "grad_norm": 6.381589242359744, - "learning_rate": 1.1191362712288886e-05, - "loss": 0.7075, + "epoch": 0.965970064953403, + "grad_norm": 2.721520328532032, + "learning_rate": 1.5852417429189037e-05, + "loss": 0.7991, "step": 6841 }, { - "epoch": 1.434682323338226, - "grad_norm": 7.89764556914232, - "learning_rate": 1.1189114920767054e-05, - "loss": 0.7255, + "epoch": 0.9661112680033889, + "grad_norm": 3.4393972019609196, + "learning_rate": 1.585118124499242e-05, + "loss": 0.9428, "step": 6842 }, { - "epoch": 1.4348920109037535, - "grad_norm": 8.372742998193994, - "learning_rate": 1.118686706830102e-05, - "loss": 0.8649, + "epoch": 0.9662524710533748, + "grad_norm": 3.432416544453715, + "learning_rate": 1.5849944924816883e-05, + "loss": 1.1066, "step": 6843 }, { - "epoch": 1.4351016984692808, - "grad_norm": 5.711536870467814, - "learning_rate": 1.1184619155005989e-05, - "loss": 0.694, + "epoch": 0.9663936741033606, + "grad_norm": 3.0051061265447045, + "learning_rate": 1.584870846869115e-05, + "loss": 0.8612, "step": 6844 }, { - "epoch": 1.4353113860348081, - "grad_norm": 6.1391252319342575, - "learning_rate": 1.1182371180997175e-05, - "loss": 0.8052, + "epoch": 0.9665348771533465, + "grad_norm": 3.1454586567533847, + "learning_rate": 1.584747187664396e-05, + "loss": 0.8654, "step": 6845 }, { - "epoch": 1.4355210736003354, - "grad_norm": 8.167742332903346, - "learning_rate": 1.1180123146389783e-05, - "loss": 0.7993, + "epoch": 0.9666760802033324, + "grad_norm": 3.735789297060759, + "learning_rate": 1.5846235148704047e-05, + "loss": 1.1413, "step": 6846 }, { - "epoch": 1.4357307611658627, - "grad_norm": 9.865808095658805, - "learning_rate": 1.117787505129904e-05, - "loss": 1.1074, + "epoch": 0.9668172832533183, + "grad_norm": 3.8938661289639125, + "learning_rate": 1.5844998284900155e-05, + "loss": 1.1151, "step": 6847 }, { - "epoch": 1.4359404487313903, - "grad_norm": 7.46567715244991, - "learning_rate": 1.117562689584015e-05, - "loss": 0.6687, + "epoch": 0.9669584863033042, + "grad_norm": 3.7629071977821638, + "learning_rate": 1.5843761285261027e-05, + "loss": 0.9197, "step": 6848 }, { - "epoch": 1.4361501362969176, - "grad_norm": 5.684198534797, - "learning_rate": 1.1173378680128346e-05, - "loss": 0.5612, + "epoch": 0.96709968935329, + "grad_norm": 4.374299755015566, + "learning_rate": 1.584252414981541e-05, + "loss": 1.1907, "step": 6849 }, { - "epoch": 1.4363598238624449, - "grad_norm": 8.10394183334663, - "learning_rate": 1.1171130404278849e-05, - "loss": 0.8482, + "epoch": 0.9672408924032759, + "grad_norm": 3.203809547659136, + "learning_rate": 1.5841286878592055e-05, + "loss": 0.9175, "step": 6850 }, { - "epoch": 1.4365695114279724, - "grad_norm": 6.615306366319538, - "learning_rate": 1.1168882068406887e-05, - "loss": 0.5142, + "epoch": 0.9673820954532618, + "grad_norm": 3.3445786517312053, + "learning_rate": 1.5840049471619717e-05, + "loss": 1.1313, "step": 6851 }, { - "epoch": 1.4367791989934997, - "grad_norm": 7.571027684801783, - "learning_rate": 1.1166633672627692e-05, - "loss": 0.9545, + "epoch": 0.9675232985032477, + "grad_norm": 3.391874673777384, + "learning_rate": 1.583881192892715e-05, + "loss": 1.0373, "step": 6852 }, { - "epoch": 1.436988886559027, - "grad_norm": 7.590467612895191, - "learning_rate": 1.1164385217056499e-05, - "loss": 0.9034, + "epoch": 0.9676645015532336, + "grad_norm": 3.0386219188269252, + "learning_rate": 1.5837574250543118e-05, + "loss": 0.7768, "step": 6853 }, { - "epoch": 1.4371985741245545, - "grad_norm": 6.51941949486874, - "learning_rate": 1.1162136701808543e-05, - "loss": 0.8776, + "epoch": 0.9678057046032195, + "grad_norm": 3.0068322827410707, + "learning_rate": 1.5836336436496377e-05, + "loss": 0.8668, "step": 6854 }, { - "epoch": 1.4374082616900818, - "grad_norm": 6.992731292453129, - "learning_rate": 1.1159888126999065e-05, - "loss": 0.6193, + "epoch": 0.9679469076532053, + "grad_norm": 3.4130528716695028, + "learning_rate": 1.5835098486815698e-05, + "loss": 0.8466, "step": 6855 }, { - "epoch": 1.4376179492556092, - "grad_norm": 6.27191166475011, - "learning_rate": 1.115763949274331e-05, - "loss": 0.8363, + "epoch": 0.9680881107031912, + "grad_norm": 3.150392581721561, + "learning_rate": 1.5833860401529855e-05, + "loss": 1.0577, "step": 6856 }, { - "epoch": 1.4378276368211365, - "grad_norm": 5.535020701068475, - "learning_rate": 1.1155390799156525e-05, - "loss": 0.6222, + "epoch": 0.9682293137531771, + "grad_norm": 2.7528416175145547, + "learning_rate": 1.5832622180667613e-05, + "loss": 1.024, "step": 6857 }, { - "epoch": 1.4380373243866638, - "grad_norm": 7.695522514372399, - "learning_rate": 1.115314204635395e-05, - "loss": 0.9623, + "epoch": 0.968370516803163, + "grad_norm": 3.0843098285562354, + "learning_rate": 1.5831383824257748e-05, + "loss": 0.9073, "step": 6858 }, { - "epoch": 1.4382470119521913, - "grad_norm": 7.296892077667993, - "learning_rate": 1.1150893234450855e-05, - "loss": 0.7106, + "epoch": 0.9685117198531489, + "grad_norm": 3.5290779794876537, + "learning_rate": 1.5830145332329043e-05, + "loss": 1.1477, "step": 6859 }, { - "epoch": 1.4384566995177186, - "grad_norm": 7.673352068248968, - "learning_rate": 1.114864436356248e-05, - "loss": 0.8278, + "epoch": 0.9686529229031348, + "grad_norm": 3.510242526965286, + "learning_rate": 1.582890670491028e-05, + "loss": 1.1602, "step": 6860 }, { - "epoch": 1.438666387083246, - "grad_norm": 7.040973132530379, - "learning_rate": 1.1146395433804089e-05, - "loss": 0.8479, + "epoch": 0.9687941259531205, + "grad_norm": 3.228156173843682, + "learning_rate": 1.5827667942030244e-05, + "loss": 1.1081, "step": 6861 }, { - "epoch": 1.4388760746487734, - "grad_norm": 6.607635933500828, - "learning_rate": 1.1144146445290944e-05, - "loss": 0.7321, + "epoch": 0.9689353290031064, + "grad_norm": 2.93213187446821, + "learning_rate": 1.5826429043717716e-05, + "loss": 0.9327, "step": 6862 }, { - "epoch": 1.4390857622143007, - "grad_norm": 5.495097877891542, - "learning_rate": 1.114189739813831e-05, - "loss": 0.638, + "epoch": 0.9690765320530923, + "grad_norm": 2.8671632917765932, + "learning_rate": 1.5825190010001496e-05, + "loss": 0.9143, "step": 6863 }, { - "epoch": 1.439295449779828, - "grad_norm": 7.301819846091403, - "learning_rate": 1.1139648292461453e-05, - "loss": 0.8304, + "epoch": 0.9692177351030782, + "grad_norm": 3.873518327711928, + "learning_rate": 1.5823950840910376e-05, + "loss": 1.1302, "step": 6864 }, { - "epoch": 1.4395051373453553, - "grad_norm": 7.131639686568112, - "learning_rate": 1.1137399128375648e-05, - "loss": 0.6566, + "epoch": 0.9693589381530641, + "grad_norm": 3.131185158429214, + "learning_rate": 1.5822711536473156e-05, + "loss": 0.7891, "step": 6865 }, { - "epoch": 1.4397148249108827, - "grad_norm": 7.524496340441047, - "learning_rate": 1.113514990599616e-05, - "loss": 0.8597, + "epoch": 0.96950014120305, + "grad_norm": 3.884595612913844, + "learning_rate": 1.582147209671863e-05, + "loss": 1.1184, "step": 6866 }, { - "epoch": 1.4399245124764102, - "grad_norm": 6.956022342830008, - "learning_rate": 1.1132900625438274e-05, - "loss": 0.6969, + "epoch": 0.9696413442530358, + "grad_norm": 3.327291461393861, + "learning_rate": 1.582023252167561e-05, + "loss": 1.0937, "step": 6867 }, { - "epoch": 1.4401342000419375, - "grad_norm": 7.546059746077672, - "learning_rate": 1.1130651286817265e-05, - "loss": 0.7141, + "epoch": 0.9697825473030217, + "grad_norm": 4.067796244243117, + "learning_rate": 1.5818992811372898e-05, + "loss": 1.0095, "step": 6868 }, { - "epoch": 1.4403438876074648, - "grad_norm": 9.220226344601553, - "learning_rate": 1.1128401890248415e-05, - "loss": 1.0678, + "epoch": 0.9699237503530076, + "grad_norm": 3.5734283154589366, + "learning_rate": 1.5817752965839308e-05, + "loss": 1.0098, "step": 6869 }, { - "epoch": 1.4405535751729923, - "grad_norm": 8.926742237162193, - "learning_rate": 1.1126152435847012e-05, - "loss": 0.8513, + "epoch": 0.9700649534029935, + "grad_norm": 3.584176672852647, + "learning_rate": 1.581651298510365e-05, + "loss": 0.9498, "step": 6870 }, { - "epoch": 1.4407632627385196, - "grad_norm": 6.901832700893549, - "learning_rate": 1.1123902923728342e-05, - "loss": 0.7339, + "epoch": 0.9702061564529794, + "grad_norm": 3.2518716175459983, + "learning_rate": 1.581527286919474e-05, + "loss": 1.0061, "step": 6871 }, { - "epoch": 1.440972950304047, - "grad_norm": 6.371586708776338, - "learning_rate": 1.11216533540077e-05, - "loss": 0.7701, + "epoch": 0.9703473595029652, + "grad_norm": 3.4856346375892553, + "learning_rate": 1.58140326181414e-05, + "loss": 0.7918, "step": 6872 }, { - "epoch": 1.4411826378695745, - "grad_norm": 6.795254688220955, - "learning_rate": 1.1119403726800377e-05, - "loss": 0.9154, + "epoch": 0.9704885625529511, + "grad_norm": 2.6094398776856638, + "learning_rate": 1.581279223197246e-05, + "loss": 0.6555, "step": 6873 }, { - "epoch": 1.4413923254351018, - "grad_norm": 5.90342045155571, - "learning_rate": 1.1117154042221677e-05, - "loss": 0.5962, + "epoch": 0.970629765602937, + "grad_norm": 3.3146461727426906, + "learning_rate": 1.5811551710716732e-05, + "loss": 0.8259, "step": 6874 }, { - "epoch": 1.441602013000629, - "grad_norm": 7.298245507032124, - "learning_rate": 1.1114904300386886e-05, - "loss": 0.8072, + "epoch": 0.9707709686529229, + "grad_norm": 3.4566785361733077, + "learning_rate": 1.5810311054403056e-05, + "loss": 1.0797, "step": 6875 }, { - "epoch": 1.4418117005661564, - "grad_norm": 6.481776073143847, - "learning_rate": 1.1112654501411321e-05, - "loss": 0.9831, + "epoch": 0.9709121717029088, + "grad_norm": 3.7048654585669736, + "learning_rate": 1.5809070263060256e-05, + "loss": 0.8461, "step": 6876 }, { - "epoch": 1.4420213881316837, - "grad_norm": 5.918697646547978, - "learning_rate": 1.1110404645410282e-05, - "loss": 0.4852, + "epoch": 0.9710533747528947, + "grad_norm": 2.8460653855458053, + "learning_rate": 1.5807829336717176e-05, + "loss": 0.8323, "step": 6877 }, { - "epoch": 1.4422310756972112, - "grad_norm": 6.6511943933593365, - "learning_rate": 1.1108154732499079e-05, - "loss": 0.7606, + "epoch": 0.9711945778028805, + "grad_norm": 4.093219741670033, + "learning_rate": 1.580658827540265e-05, + "loss": 1.0815, "step": 6878 }, { - "epoch": 1.4424407632627385, - "grad_norm": 7.504540261000289, - "learning_rate": 1.1105904762793023e-05, - "loss": 0.875, + "epoch": 0.9713357808528664, + "grad_norm": 3.8501741569804246, + "learning_rate": 1.580534707914552e-05, + "loss": 1.2278, "step": 6879 }, { - "epoch": 1.4426504508282658, - "grad_norm": 7.7672005691832755, - "learning_rate": 1.1103654736407432e-05, - "loss": 0.9111, + "epoch": 0.9714769839028523, + "grad_norm": 3.9819037809808386, + "learning_rate": 1.5804105747974626e-05, + "loss": 1.0844, "step": 6880 }, { - "epoch": 1.4428601383937933, - "grad_norm": 6.606686948993776, - "learning_rate": 1.1101404653457619e-05, - "loss": 0.7731, + "epoch": 0.9716181869528382, + "grad_norm": 3.9075175206641486, + "learning_rate": 1.5802864281918832e-05, + "loss": 1.0516, "step": 6881 }, { - "epoch": 1.4430698259593207, - "grad_norm": 7.963783671621207, - "learning_rate": 1.109915451405891e-05, - "loss": 0.8656, + "epoch": 0.9717593900028241, + "grad_norm": 3.5049321380720655, + "learning_rate": 1.5801622681006966e-05, + "loss": 0.9866, "step": 6882 }, { - "epoch": 1.443279513524848, - "grad_norm": 8.344906365838602, - "learning_rate": 1.1096904318326623e-05, - "loss": 0.8629, + "epoch": 0.97190059305281, + "grad_norm": 3.046121035944033, + "learning_rate": 1.5800380945267902e-05, + "loss": 0.8331, "step": 6883 }, { - "epoch": 1.4434892010903753, - "grad_norm": 8.368263836560889, - "learning_rate": 1.109465406637609e-05, - "loss": 0.9468, + "epoch": 0.9720417961027958, + "grad_norm": 3.35198584994267, + "learning_rate": 1.5799139074730487e-05, + "loss": 0.9949, "step": 6884 }, { - "epoch": 1.4436988886559026, - "grad_norm": 8.323988382216479, - "learning_rate": 1.1092403758322633e-05, - "loss": 0.7567, + "epoch": 0.9721829991527817, + "grad_norm": 3.5445071027830206, + "learning_rate": 1.5797897069423584e-05, + "loss": 0.8613, "step": 6885 }, { - "epoch": 1.44390857622143, - "grad_norm": 7.880848924046126, - "learning_rate": 1.1090153394281595e-05, - "loss": 0.8679, + "epoch": 0.9723242022027676, + "grad_norm": 3.472183312156491, + "learning_rate": 1.5796654929376057e-05, + "loss": 1.0795, "step": 6886 }, { - "epoch": 1.4441182637869574, - "grad_norm": 6.838945078947538, - "learning_rate": 1.10879029743683e-05, - "loss": 0.9316, + "epoch": 0.9724654052527535, + "grad_norm": 3.787288286908652, + "learning_rate": 1.5795412654616776e-05, + "loss": 0.8871, "step": 6887 }, { - "epoch": 1.4443279513524847, - "grad_norm": 7.1032783562023365, - "learning_rate": 1.1085652498698091e-05, - "loss": 0.8852, + "epoch": 0.9726066083027394, + "grad_norm": 2.6620783569640687, + "learning_rate": 1.5794170245174605e-05, + "loss": 0.7564, "step": 6888 }, { - "epoch": 1.4445376389180122, - "grad_norm": 7.021332674682388, - "learning_rate": 1.1083401967386311e-05, - "loss": 0.6722, + "epoch": 0.9727478113527253, + "grad_norm": 3.6164220689390367, + "learning_rate": 1.579292770107842e-05, + "loss": 0.9598, "step": 6889 }, { - "epoch": 1.4447473264835395, - "grad_norm": 8.515355877604629, - "learning_rate": 1.1081151380548299e-05, - "loss": 0.9394, + "epoch": 0.9728890144027111, + "grad_norm": 3.234976480525693, + "learning_rate": 1.5791685022357098e-05, + "loss": 1.0008, "step": 6890 }, { - "epoch": 1.4449570140490668, - "grad_norm": 6.193472587828282, - "learning_rate": 1.1078900738299402e-05, - "loss": 0.6843, + "epoch": 0.973030217452697, + "grad_norm": 3.1478943685983323, + "learning_rate": 1.5790442209039517e-05, + "loss": 0.9154, "step": 6891 }, { - "epoch": 1.4451667016145944, - "grad_norm": 5.782952204333246, - "learning_rate": 1.1076650040754975e-05, - "loss": 0.6531, + "epoch": 0.9731714205026829, + "grad_norm": 3.979593816630565, + "learning_rate": 1.5789199261154557e-05, + "loss": 0.9352, "step": 6892 }, { - "epoch": 1.4453763891801217, - "grad_norm": 8.732258653960399, - "learning_rate": 1.1074399288030365e-05, - "loss": 0.7034, + "epoch": 0.9733126235526688, + "grad_norm": 3.086726929904248, + "learning_rate": 1.578795617873111e-05, + "loss": 0.9797, "step": 6893 }, { - "epoch": 1.445586076745649, - "grad_norm": 5.386831830278344, - "learning_rate": 1.1072148480240925e-05, - "loss": 0.6337, + "epoch": 0.9734538266026547, + "grad_norm": 3.547628256933968, + "learning_rate": 1.578671296179806e-05, + "loss": 1.0601, "step": 6894 }, { - "epoch": 1.4457957643111763, - "grad_norm": 5.908290954283815, - "learning_rate": 1.1069897617502018e-05, - "loss": 0.678, + "epoch": 0.9735950296526404, + "grad_norm": 3.7934147359910755, + "learning_rate": 1.57854696103843e-05, + "loss": 1.1474, "step": 6895 }, { - "epoch": 1.4460054518767036, - "grad_norm": 7.897323336675538, - "learning_rate": 1.1067646699929004e-05, - "loss": 0.9813, + "epoch": 0.9737362327026263, + "grad_norm": 4.291327581643057, + "learning_rate": 1.5784226124518724e-05, + "loss": 1.4029, "step": 6896 }, { - "epoch": 1.4462151394422311, - "grad_norm": 7.993555765918302, - "learning_rate": 1.1065395727637245e-05, - "loss": 0.9201, + "epoch": 0.9738774357526122, + "grad_norm": 3.033833682302448, + "learning_rate": 1.578298250423023e-05, + "loss": 0.8849, "step": 6897 }, { - "epoch": 1.4464248270077584, - "grad_norm": 6.7154006922599905, - "learning_rate": 1.1063144700742103e-05, - "loss": 0.7165, + "epoch": 0.9740186388025981, + "grad_norm": 3.2378779310510093, + "learning_rate": 1.5781738749547724e-05, + "loss": 0.9698, "step": 6898 }, { - "epoch": 1.4466345145732857, - "grad_norm": 6.641355129101737, - "learning_rate": 1.1060893619358955e-05, - "loss": 0.7253, + "epoch": 0.974159841852584, + "grad_norm": 3.725550847691739, + "learning_rate": 1.5780494860500103e-05, + "loss": 1.0099, "step": 6899 }, { - "epoch": 1.4468442021388133, - "grad_norm": 7.115315108583384, - "learning_rate": 1.1058642483603167e-05, - "loss": 0.7449, + "epoch": 0.9743010449025699, + "grad_norm": 3.158443764881195, + "learning_rate": 1.5779250837116275e-05, + "loss": 0.8815, "step": 6900 }, { - "epoch": 1.4470538897043406, - "grad_norm": 7.191787097083664, - "learning_rate": 1.1056391293590117e-05, - "loss": 0.8129, + "epoch": 0.9744422479525557, + "grad_norm": 2.808354497167918, + "learning_rate": 1.577800667942516e-05, + "loss": 0.8346, "step": 6901 }, { - "epoch": 1.4472635772698679, - "grad_norm": 6.969260975628303, - "learning_rate": 1.1054140049435178e-05, - "loss": 0.8854, + "epoch": 0.9745834510025416, + "grad_norm": 3.6570803382766566, + "learning_rate": 1.577676238745566e-05, + "loss": 1.11, "step": 6902 }, { - "epoch": 1.4474732648353952, - "grad_norm": 7.015662062043676, - "learning_rate": 1.1051888751253738e-05, - "loss": 0.6776, + "epoch": 0.9747246540525275, + "grad_norm": 3.605730746823552, + "learning_rate": 1.57755179612367e-05, + "loss": 1.1869, "step": 6903 }, { - "epoch": 1.4476829524009227, - "grad_norm": 6.568530606508727, - "learning_rate": 1.104963739916117e-05, - "loss": 0.7591, + "epoch": 0.9748658571025134, + "grad_norm": 4.262153500361827, + "learning_rate": 1.5774273400797195e-05, + "loss": 1.1906, "step": 6904 }, { - "epoch": 1.44789263996645, - "grad_norm": 6.801360167705214, - "learning_rate": 1.104738599327287e-05, - "loss": 0.7625, + "epoch": 0.9750070601524993, + "grad_norm": 2.7671942956806292, + "learning_rate": 1.5773028706166072e-05, + "loss": 0.7458, "step": 6905 }, { - "epoch": 1.4481023275319773, - "grad_norm": 6.697832381429832, - "learning_rate": 1.1045134533704218e-05, - "loss": 0.609, + "epoch": 0.9751482632024852, + "grad_norm": 3.6153926077309233, + "learning_rate": 1.5771783877372252e-05, + "loss": 1.1004, "step": 6906 }, { - "epoch": 1.4483120150975046, - "grad_norm": 7.044685071598635, - "learning_rate": 1.1042883020570608e-05, - "loss": 0.7233, + "epoch": 0.975289466252471, + "grad_norm": 3.7013225797162317, + "learning_rate": 1.5770538914444672e-05, + "loss": 1.1629, "step": 6907 }, { - "epoch": 1.4485217026630322, - "grad_norm": 9.776300223920158, - "learning_rate": 1.1040631453987436e-05, - "loss": 0.8672, + "epoch": 0.9754306693024569, + "grad_norm": 3.9692878853993054, + "learning_rate": 1.576929381741226e-05, + "loss": 0.9929, "step": 6908 }, { - "epoch": 1.4487313902285595, - "grad_norm": 8.534666058673096, - "learning_rate": 1.1038379834070099e-05, - "loss": 0.9572, + "epoch": 0.9755718723524428, + "grad_norm": 3.335377330363598, + "learning_rate": 1.576804858630394e-05, + "loss": 1.0708, "step": 6909 }, { - "epoch": 1.4489410777940868, - "grad_norm": 7.29111684840825, - "learning_rate": 1.1036128160933987e-05, - "loss": 0.7511, + "epoch": 0.9757130754024287, + "grad_norm": 3.3016386486113207, + "learning_rate": 1.5766803221148676e-05, + "loss": 0.9419, "step": 6910 }, { - "epoch": 1.4491507653596143, - "grad_norm": 6.587983089197357, - "learning_rate": 1.1033876434694517e-05, - "loss": 0.7783, + "epoch": 0.9758542784524146, + "grad_norm": 3.6043610029243336, + "learning_rate": 1.576555772197539e-05, + "loss": 1.2182, "step": 6911 }, { - "epoch": 1.4493604529251416, - "grad_norm": 6.774896749683167, - "learning_rate": 1.1031624655467085e-05, - "loss": 0.9562, + "epoch": 0.9759954815024005, + "grad_norm": 2.8666036338856244, + "learning_rate": 1.576431208881303e-05, + "loss": 0.9397, "step": 6912 }, { - "epoch": 1.449570140490669, - "grad_norm": 7.952914255325333, - "learning_rate": 1.1029372823367099e-05, - "loss": 0.8752, + "epoch": 0.9761366845523863, + "grad_norm": 3.69751471671717, + "learning_rate": 1.576306632169055e-05, + "loss": 1.0234, "step": 6913 }, { - "epoch": 1.4497798280561962, - "grad_norm": 8.1949826088685, - "learning_rate": 1.1027120938509969e-05, - "loss": 1.0126, + "epoch": 0.9762778876023722, + "grad_norm": 3.0733924380982525, + "learning_rate": 1.5761820420636894e-05, + "loss": 0.8671, "step": 6914 }, { - "epoch": 1.4499895156217235, - "grad_norm": 7.138411227267545, - "learning_rate": 1.1024869001011114e-05, - "loss": 0.9027, + "epoch": 0.9764190906523581, + "grad_norm": 3.7748641828352287, + "learning_rate": 1.576057438568102e-05, + "loss": 0.9105, "step": 6915 }, { - "epoch": 1.450199203187251, - "grad_norm": 7.18447107420487, - "learning_rate": 1.1022617010985941e-05, - "loss": 0.6824, + "epoch": 0.976560293702344, + "grad_norm": 4.0278514125909535, + "learning_rate": 1.5759328216851886e-05, + "loss": 1.1893, "step": 6916 }, { - "epoch": 1.4504088907527783, - "grad_norm": 7.850307653510362, - "learning_rate": 1.1020364968549875e-05, - "loss": 0.9505, + "epoch": 0.9767014967523299, + "grad_norm": 3.7982278528909563, + "learning_rate": 1.5758081914178457e-05, + "loss": 1.223, "step": 6917 }, { - "epoch": 1.4506185783183057, - "grad_norm": 7.386083095944167, - "learning_rate": 1.1018112873818335e-05, - "loss": 0.6007, + "epoch": 0.9768426998023158, + "grad_norm": 3.34537858554109, + "learning_rate": 1.5756835477689683e-05, + "loss": 1.1261, "step": 6918 }, { - "epoch": 1.4508282658838332, - "grad_norm": 6.882912068940383, - "learning_rate": 1.1015860726906743e-05, - "loss": 0.7787, + "epoch": 0.9769839028523016, + "grad_norm": 3.246170363653838, + "learning_rate": 1.5755588907414544e-05, + "loss": 0.806, "step": 6919 }, { - "epoch": 1.4510379534493605, - "grad_norm": 8.149664522439808, - "learning_rate": 1.1013608527930526e-05, - "loss": 0.839, + "epoch": 0.9771251059022875, + "grad_norm": 3.713685600210636, + "learning_rate": 1.5754342203382003e-05, + "loss": 1.0807, "step": 6920 }, { - "epoch": 1.4512476410148878, - "grad_norm": 7.844721802081185, - "learning_rate": 1.1011356277005114e-05, - "loss": 0.9131, + "epoch": 0.9772663089522734, + "grad_norm": 3.066571006635337, + "learning_rate": 1.5753095365621033e-05, + "loss": 0.9107, "step": 6921 }, { - "epoch": 1.4514573285804153, - "grad_norm": 7.842989979261236, - "learning_rate": 1.1009103974245938e-05, - "loss": 0.7796, + "epoch": 0.9774075120022593, + "grad_norm": 3.5619322967913076, + "learning_rate": 1.575184839416061e-05, + "loss": 1.1108, "step": 6922 }, { - "epoch": 1.4516670161459426, - "grad_norm": 6.5184647608922655, - "learning_rate": 1.1006851619768435e-05, - "loss": 0.8229, + "epoch": 0.9775487150522452, + "grad_norm": 3.783484182435127, + "learning_rate": 1.5750601289029716e-05, + "loss": 0.9791, "step": 6923 }, { - "epoch": 1.45187670371147, - "grad_norm": 8.890786859810532, - "learning_rate": 1.100459921368804e-05, - "loss": 0.9806, + "epoch": 0.977689918102231, + "grad_norm": 3.347506141866573, + "learning_rate": 1.5749354050257334e-05, + "loss": 1.0116, "step": 6924 }, { - "epoch": 1.4520863912769972, - "grad_norm": 7.1443871621232855, - "learning_rate": 1.1002346756120192e-05, - "loss": 0.7822, + "epoch": 0.9778311211522169, + "grad_norm": 2.951419145491745, + "learning_rate": 1.5748106677872447e-05, + "loss": 0.8336, "step": 6925 }, { - "epoch": 1.4522960788425245, - "grad_norm": 7.322218271571962, - "learning_rate": 1.1000094247180337e-05, - "loss": 0.7353, + "epoch": 0.9779723242022028, + "grad_norm": 3.8479643720786068, + "learning_rate": 1.574685917190404e-05, + "loss": 1.001, "step": 6926 }, { - "epoch": 1.452505766408052, - "grad_norm": 8.857157899483214, - "learning_rate": 1.0997841686983914e-05, - "loss": 1.0475, + "epoch": 0.9781135272521887, + "grad_norm": 3.2761712161137986, + "learning_rate": 1.574561153238111e-05, + "loss": 1.0955, "step": 6927 }, { - "epoch": 1.4527154539735794, - "grad_norm": 6.739530633147822, - "learning_rate": 1.0995589075646376e-05, - "loss": 0.7762, + "epoch": 0.9782547303021746, + "grad_norm": 3.215192452005424, + "learning_rate": 1.5744363759332647e-05, + "loss": 1.0562, "step": 6928 }, { - "epoch": 1.4529251415391067, - "grad_norm": 6.875342787384439, - "learning_rate": 1.099333641328317e-05, - "loss": 0.847, + "epoch": 0.9783959333521604, + "grad_norm": 2.7404894657469754, + "learning_rate": 1.574311585278765e-05, + "loss": 0.7568, "step": 6929 }, { - "epoch": 1.4531348291046342, - "grad_norm": 5.843777896819617, - "learning_rate": 1.099108370000975e-05, - "loss": 0.7353, + "epoch": 0.9785371364021462, + "grad_norm": 3.031030347938334, + "learning_rate": 1.5741867812775125e-05, + "loss": 0.9893, "step": 6930 }, { - "epoch": 1.4533445166701615, - "grad_norm": 6.41658678749503, - "learning_rate": 1.098883093594157e-05, - "loss": 0.7813, + "epoch": 0.9786783394521321, + "grad_norm": 3.1543805295442846, + "learning_rate": 1.574061963932407e-05, + "loss": 0.9141, "step": 6931 }, { - "epoch": 1.4535542042356888, - "grad_norm": 6.463762765442355, - "learning_rate": 1.0986578121194093e-05, - "loss": 0.5983, + "epoch": 0.978819542502118, + "grad_norm": 2.879539667564236, + "learning_rate": 1.5739371332463496e-05, + "loss": 0.7845, "step": 6932 }, { - "epoch": 1.4537638918012161, - "grad_norm": 7.7043754225778205, - "learning_rate": 1.0984325255882772e-05, - "loss": 0.9623, + "epoch": 0.9789607455521039, + "grad_norm": 3.2888939995831104, + "learning_rate": 1.5738122892222407e-05, + "loss": 0.9535, "step": 6933 }, { - "epoch": 1.4539735793667434, - "grad_norm": 7.018612668854298, - "learning_rate": 1.0982072340123078e-05, - "loss": 0.7825, + "epoch": 0.9791019486020898, + "grad_norm": 3.3660376509967906, + "learning_rate": 1.573687431862982e-05, + "loss": 0.9463, "step": 6934 }, { - "epoch": 1.454183266932271, - "grad_norm": 7.79225597229119, - "learning_rate": 1.097981937403047e-05, - "loss": 0.9175, + "epoch": 0.9792431516520756, + "grad_norm": 3.3763077317102357, + "learning_rate": 1.5735625611714754e-05, + "loss": 0.9301, "step": 6935 }, { - "epoch": 1.4543929544977983, - "grad_norm": 7.797663450271207, - "learning_rate": 1.0977566357720423e-05, - "loss": 0.8233, + "epoch": 0.9793843547020615, + "grad_norm": 3.534849893334151, + "learning_rate": 1.5734376771506222e-05, + "loss": 1.0963, "step": 6936 }, { - "epoch": 1.4546026420633256, - "grad_norm": 8.021999765422642, - "learning_rate": 1.0975313291308402e-05, - "loss": 0.8661, + "epoch": 0.9795255577520474, + "grad_norm": 3.5510548809389353, + "learning_rate": 1.5733127798033257e-05, + "loss": 1.0692, "step": 6937 }, { - "epoch": 1.454812329628853, - "grad_norm": 8.005624943576937, - "learning_rate": 1.0973060174909888e-05, - "loss": 0.7796, + "epoch": 0.9796667608020333, + "grad_norm": 2.9643008601291796, + "learning_rate": 1.5731878691324874e-05, + "loss": 0.7428, "step": 6938 }, { - "epoch": 1.4550220171943804, - "grad_norm": 7.25699804286934, - "learning_rate": 1.0970807008640345e-05, - "loss": 0.9146, + "epoch": 0.9798079638520192, + "grad_norm": 3.239925961012417, + "learning_rate": 1.5730629451410103e-05, + "loss": 0.9767, "step": 6939 }, { - "epoch": 1.4552317047599077, - "grad_norm": 7.110779914008801, - "learning_rate": 1.0968553792615267e-05, - "loss": 0.8783, + "epoch": 0.9799491669020051, + "grad_norm": 3.223027482757864, + "learning_rate": 1.5729380078317982e-05, + "loss": 1.1101, "step": 6940 }, { - "epoch": 1.4554413923254352, - "grad_norm": 4.574432018591386, - "learning_rate": 1.0966300526950122e-05, - "loss": 0.4127, + "epoch": 0.980090369951991, + "grad_norm": 3.5910506772478366, + "learning_rate": 1.5728130572077543e-05, + "loss": 0.9898, "step": 6941 }, { - "epoch": 1.4556510798909625, - "grad_norm": 10.85671678433408, - "learning_rate": 1.09640472117604e-05, - "loss": 0.9038, + "epoch": 0.9802315730019768, + "grad_norm": 3.5382573150416254, + "learning_rate": 1.572688093271782e-05, + "loss": 1.1979, "step": 6942 }, { - "epoch": 1.4558607674564898, - "grad_norm": 7.174455694701378, - "learning_rate": 1.0961793847161586e-05, - "loss": 0.7581, + "epoch": 0.9803727760519627, + "grad_norm": 3.2634444563160283, + "learning_rate": 1.5725631160267858e-05, + "loss": 1.0517, "step": 6943 }, { - "epoch": 1.4560704550220172, - "grad_norm": 7.772469407331134, - "learning_rate": 1.095954043326917e-05, - "loss": 0.8159, + "epoch": 0.9805139791019486, + "grad_norm": 3.380523977344248, + "learning_rate": 1.57243812547567e-05, + "loss": 0.9333, "step": 6944 }, { - "epoch": 1.4562801425875445, - "grad_norm": 8.198968373274605, - "learning_rate": 1.0957286970198642e-05, - "loss": 0.929, + "epoch": 0.9806551821519345, + "grad_norm": 3.904459545111257, + "learning_rate": 1.57231312162134e-05, + "loss": 1.1011, "step": 6945 }, { - "epoch": 1.456489830153072, - "grad_norm": 5.512110921599867, - "learning_rate": 1.0955033458065499e-05, - "loss": 0.5957, + "epoch": 0.9807963852019204, + "grad_norm": 2.843037788944992, + "learning_rate": 1.5721881044666996e-05, + "loss": 0.9079, "step": 6946 }, { - "epoch": 1.4566995177185993, - "grad_norm": 7.141574330632033, - "learning_rate": 1.095277989698523e-05, - "loss": 0.5706, + "epoch": 0.9809375882519062, + "grad_norm": 3.4792988498133735, + "learning_rate": 1.572063074014655e-05, + "loss": 1.0359, "step": 6947 }, { - "epoch": 1.4569092052841266, - "grad_norm": 8.475745144124659, - "learning_rate": 1.0950526287073343e-05, - "loss": 0.7797, + "epoch": 0.9810787913018921, + "grad_norm": 2.9680471599663694, + "learning_rate": 1.5719380302681114e-05, + "loss": 0.8634, "step": 6948 }, { - "epoch": 1.4571188928496541, - "grad_norm": 8.053372807005887, - "learning_rate": 1.0948272628445331e-05, - "loss": 1.1298, + "epoch": 0.981219994351878, + "grad_norm": 3.695512963764563, + "learning_rate": 1.5718129732299753e-05, + "loss": 1.1771, "step": 6949 }, { - "epoch": 1.4573285804151814, - "grad_norm": 7.3863802655281265, - "learning_rate": 1.0946018921216705e-05, - "loss": 0.7573, + "epoch": 0.9813611974018639, + "grad_norm": 3.00318618308261, + "learning_rate": 1.5716879029031523e-05, + "loss": 0.9638, "step": 6950 }, { - "epoch": 1.4575382679807087, - "grad_norm": 6.318406022325628, - "learning_rate": 1.0943765165502968e-05, - "loss": 0.6765, + "epoch": 0.9815024004518498, + "grad_norm": 3.3874566214544086, + "learning_rate": 1.5715628192905495e-05, + "loss": 0.9435, "step": 6951 }, { - "epoch": 1.457747955546236, - "grad_norm": 8.882307898432142, - "learning_rate": 1.094151136141963e-05, - "loss": 1.0538, + "epoch": 0.9816436035018357, + "grad_norm": 3.423338292236942, + "learning_rate": 1.5714377223950734e-05, + "loss": 0.8845, "step": 6952 }, { - "epoch": 1.4579576431117633, - "grad_norm": 5.350578988501194, - "learning_rate": 1.0939257509082202e-05, - "loss": 0.6303, + "epoch": 0.9817848065518215, + "grad_norm": 2.5500230239586874, + "learning_rate": 1.571312612219632e-05, + "loss": 0.5997, "step": 6953 }, { - "epoch": 1.4581673306772909, - "grad_norm": 7.422614753146623, - "learning_rate": 1.0937003608606194e-05, - "loss": 0.8199, + "epoch": 0.9819260096018074, + "grad_norm": 4.404992993318122, + "learning_rate": 1.5711874887671318e-05, + "loss": 1.3522, "step": 6954 }, { - "epoch": 1.4583770182428182, - "grad_norm": 6.7216625049908485, - "learning_rate": 1.0934749660107131e-05, - "loss": 0.7224, + "epoch": 0.9820672126517933, + "grad_norm": 3.255053771014023, + "learning_rate": 1.571062352040481e-05, + "loss": 0.8757, "step": 6955 }, { - "epoch": 1.4585867058083455, - "grad_norm": 7.7932868476710055, - "learning_rate": 1.0932495663700522e-05, - "loss": 0.7766, + "epoch": 0.9822084157017792, + "grad_norm": 4.188498337049012, + "learning_rate": 1.5709372020425877e-05, + "loss": 1.1596, "step": 6956 }, { - "epoch": 1.458796393373873, - "grad_norm": 6.493093401288736, - "learning_rate": 1.0930241619501898e-05, - "loss": 0.6023, + "epoch": 0.9823496187517651, + "grad_norm": 3.157554487681271, + "learning_rate": 1.5708120387763607e-05, + "loss": 0.8428, "step": 6957 }, { - "epoch": 1.4590060809394003, - "grad_norm": 9.06989515130504, - "learning_rate": 1.0927987527626773e-05, - "loss": 1.0163, + "epoch": 0.982490821801751, + "grad_norm": 3.228535383690472, + "learning_rate": 1.5706868622447084e-05, + "loss": 0.9451, "step": 6958 }, { - "epoch": 1.4592157685049276, - "grad_norm": 6.748799009956378, - "learning_rate": 1.092573338819068e-05, - "loss": 0.813, + "epoch": 0.9826320248517368, + "grad_norm": 4.422321718570655, + "learning_rate": 1.5705616724505394e-05, + "loss": 1.0163, "step": 6959 }, { - "epoch": 1.4594254560704552, - "grad_norm": 7.319475751817692, - "learning_rate": 1.0923479201309144e-05, - "loss": 0.8586, + "epoch": 0.9827732279017227, + "grad_norm": 3.680373155701201, + "learning_rate": 1.570436469396764e-05, + "loss": 1.2727, "step": 6960 }, { - "epoch": 1.4596351436359825, - "grad_norm": 7.424438084687504, - "learning_rate": 1.0921224967097701e-05, - "loss": 0.8178, + "epoch": 0.9829144309517086, + "grad_norm": 3.597854083406116, + "learning_rate": 1.5703112530862912e-05, + "loss": 1.0064, "step": 6961 }, { - "epoch": 1.4598448312015098, - "grad_norm": 7.589390352914218, - "learning_rate": 1.0918970685671874e-05, - "loss": 0.9653, + "epoch": 0.9830556340016945, + "grad_norm": 2.8992529116195374, + "learning_rate": 1.5701860235220314e-05, + "loss": 0.7728, "step": 6962 }, { - "epoch": 1.460054518767037, - "grad_norm": 7.186533920004667, - "learning_rate": 1.0916716357147213e-05, - "loss": 0.7361, + "epoch": 0.9831968370516803, + "grad_norm": 3.208125986474056, + "learning_rate": 1.5700607807068946e-05, + "loss": 0.9497, "step": 6963 }, { - "epoch": 1.4602642063325644, - "grad_norm": 7.992026114395873, - "learning_rate": 1.0914461981639244e-05, - "loss": 0.8174, + "epoch": 0.9833380401016661, + "grad_norm": 3.705476981783934, + "learning_rate": 1.569935524643791e-05, + "loss": 1.2896, "step": 6964 }, { - "epoch": 1.460473893898092, - "grad_norm": 7.6054046786861385, - "learning_rate": 1.0912207559263513e-05, - "loss": 0.8008, + "epoch": 0.983479243151652, + "grad_norm": 3.469104658118669, + "learning_rate": 1.5698102553356325e-05, + "loss": 1.0155, "step": 6965 }, { - "epoch": 1.4606835814636192, - "grad_norm": 7.1621197819276565, - "learning_rate": 1.0909953090135561e-05, - "loss": 0.8023, + "epoch": 0.9836204462016379, + "grad_norm": 3.5618830968649102, + "learning_rate": 1.5696849727853297e-05, + "loss": 0.9966, "step": 6966 }, { - "epoch": 1.4608932690291465, - "grad_norm": 6.5964627963707265, - "learning_rate": 1.090769857437094e-05, - "loss": 0.7509, + "epoch": 0.9837616492516238, + "grad_norm": 3.250448878096537, + "learning_rate": 1.569559676995794e-05, + "loss": 1.0674, "step": 6967 }, { - "epoch": 1.461102956594674, - "grad_norm": 6.768847624565883, - "learning_rate": 1.0905444012085188e-05, - "loss": 0.7515, + "epoch": 0.9839028523016097, + "grad_norm": 3.6351630743531866, + "learning_rate": 1.5694343679699377e-05, + "loss": 0.932, "step": 6968 }, { - "epoch": 1.4613126441602013, - "grad_norm": 7.105126024005378, - "learning_rate": 1.0903189403393862e-05, - "loss": 0.7554, + "epoch": 0.9840440553515956, + "grad_norm": 3.7322724348988916, + "learning_rate": 1.569309045710672e-05, + "loss": 1.2539, "step": 6969 }, { - "epoch": 1.4615223317257287, - "grad_norm": 6.47221993332316, - "learning_rate": 1.090093474841251e-05, - "loss": 0.7285, + "epoch": 0.9841852584015814, + "grad_norm": 3.077452036850276, + "learning_rate": 1.5691837102209103e-05, + "loss": 0.8782, "step": 6970 }, { - "epoch": 1.461732019291256, - "grad_norm": 6.021824291062265, - "learning_rate": 1.0898680047256693e-05, - "loss": 0.646, + "epoch": 0.9843264614515673, + "grad_norm": 4.088119037012379, + "learning_rate": 1.569058361503565e-05, + "loss": 1.106, "step": 6971 }, { - "epoch": 1.4619417068567833, - "grad_norm": 8.91745955274642, - "learning_rate": 1.0896425300041961e-05, - "loss": 0.9178, + "epoch": 0.9844676645015532, + "grad_norm": 3.8713703787299076, + "learning_rate": 1.568932999561549e-05, + "loss": 1.1572, "step": 6972 }, { - "epoch": 1.4621513944223108, - "grad_norm": 8.02855279616266, - "learning_rate": 1.0894170506883882e-05, - "loss": 0.9764, + "epoch": 0.9846088675515391, + "grad_norm": 3.628531331112453, + "learning_rate": 1.5688076243977758e-05, + "loss": 1.0644, "step": 6973 }, { - "epoch": 1.462361081987838, - "grad_norm": 7.5319468766420705, - "learning_rate": 1.0891915667898009e-05, - "loss": 0.869, + "epoch": 0.984750070601525, + "grad_norm": 3.0259289312552244, + "learning_rate": 1.568682236015159e-05, + "loss": 0.9877, "step": 6974 }, { - "epoch": 1.4625707695533654, - "grad_norm": 6.851904643187208, - "learning_rate": 1.0889660783199918e-05, - "loss": 0.7802, + "epoch": 0.9848912736515109, + "grad_norm": 3.421105743113141, + "learning_rate": 1.568556834416613e-05, + "loss": 0.8944, "step": 6975 }, { - "epoch": 1.462780457118893, - "grad_norm": 7.895049987192538, - "learning_rate": 1.0887405852905165e-05, - "loss": 1.0078, + "epoch": 0.9850324767014967, + "grad_norm": 3.0690637735114557, + "learning_rate": 1.5684314196050516e-05, + "loss": 0.8954, "step": 6976 }, { - "epoch": 1.4629901446844202, - "grad_norm": 6.174017721352524, - "learning_rate": 1.088515087712932e-05, - "loss": 0.7581, + "epoch": 0.9851736797514826, + "grad_norm": 3.533321172873058, + "learning_rate": 1.5683059915833893e-05, + "loss": 1.0902, "step": 6977 }, { - "epoch": 1.4631998322499475, - "grad_norm": 6.175165017766711, - "learning_rate": 1.0882895855987961e-05, - "loss": 0.6452, + "epoch": 0.9853148828014685, + "grad_norm": 3.451738621238093, + "learning_rate": 1.568180550354541e-05, + "loss": 0.9889, "step": 6978 }, { - "epoch": 1.463409519815475, - "grad_norm": 5.647606342514414, - "learning_rate": 1.0880640789596654e-05, - "loss": 0.6178, + "epoch": 0.9854560858514544, + "grad_norm": 3.689876372140364, + "learning_rate": 1.5680550959214228e-05, + "loss": 1.1631, "step": 6979 }, { - "epoch": 1.4636192073810024, - "grad_norm": 6.195371793785506, - "learning_rate": 1.0878385678070984e-05, - "loss": 0.7581, + "epoch": 0.9855972889014403, + "grad_norm": 3.7522893547777083, + "learning_rate": 1.567929628286949e-05, + "loss": 1.0714, "step": 6980 }, { - "epoch": 1.4638288949465297, - "grad_norm": 7.686737086656785, - "learning_rate": 1.0876130521526522e-05, - "loss": 0.7766, + "epoch": 0.9857384919514262, + "grad_norm": 3.469037734312863, + "learning_rate": 1.5678041474540356e-05, + "loss": 1.1226, "step": 6981 }, { - "epoch": 1.464038582512057, - "grad_norm": 6.5541007330019445, - "learning_rate": 1.0873875320078852e-05, - "loss": 0.7089, + "epoch": 0.985879695001412, + "grad_norm": 3.2522972241197867, + "learning_rate": 1.5676786534255993e-05, + "loss": 0.7746, "step": 6982 }, { - "epoch": 1.4642482700775843, - "grad_norm": 9.367968565476625, - "learning_rate": 1.0871620073843554e-05, - "loss": 0.9481, + "epoch": 0.9860208980513979, + "grad_norm": 3.608721466873715, + "learning_rate": 1.5675531462045567e-05, + "loss": 1.1465, "step": 6983 }, { - "epoch": 1.4644579576431118, - "grad_norm": 7.6243250261817765, - "learning_rate": 1.086936478293622e-05, - "loss": 0.7618, + "epoch": 0.9861621011013838, + "grad_norm": 3.499731549687233, + "learning_rate": 1.5674276257938234e-05, + "loss": 1.0859, "step": 6984 }, { - "epoch": 1.4646676452086391, - "grad_norm": 6.676643628936052, - "learning_rate": 1.086710944747243e-05, - "loss": 0.6947, + "epoch": 0.9863033041513697, + "grad_norm": 4.813250376864065, + "learning_rate": 1.5673020921963174e-05, + "loss": 1.2355, "step": 6985 }, { - "epoch": 1.4648773327741664, - "grad_norm": 7.294701536903166, - "learning_rate": 1.0864854067567777e-05, - "loss": 0.864, + "epoch": 0.9864445072013556, + "grad_norm": 3.4343063979784643, + "learning_rate": 1.5671765454149558e-05, + "loss": 1.1618, "step": 6986 }, { - "epoch": 1.465087020339694, - "grad_norm": 5.057533797854403, - "learning_rate": 1.0862598643337854e-05, - "loss": 0.4011, + "epoch": 0.9865857102513415, + "grad_norm": 3.680731680392903, + "learning_rate": 1.5670509854526566e-05, + "loss": 1.078, "step": 6987 }, { - "epoch": 1.4652967079052213, - "grad_norm": 7.207787166278459, - "learning_rate": 1.086034317489825e-05, - "loss": 0.7775, + "epoch": 0.9867269133013273, + "grad_norm": 3.748786507501026, + "learning_rate": 1.566925412312337e-05, + "loss": 1.1682, "step": 6988 }, { - "epoch": 1.4655063954707486, - "grad_norm": 6.6724294883215345, - "learning_rate": 1.0858087662364572e-05, - "loss": 0.767, + "epoch": 0.9868681163513132, + "grad_norm": 3.377734580982437, + "learning_rate": 1.5667998259969154e-05, + "loss": 0.971, "step": 6989 }, { - "epoch": 1.4657160830362759, - "grad_norm": 7.472615112773731, - "learning_rate": 1.0855832105852413e-05, - "loss": 0.9126, + "epoch": 0.9870093194012991, + "grad_norm": 3.4895733730412752, + "learning_rate": 1.566674226509311e-05, + "loss": 1.1186, "step": 6990 }, { - "epoch": 1.4659257706018032, - "grad_norm": 7.340186983267856, - "learning_rate": 1.085357650547737e-05, - "loss": 0.6979, + "epoch": 0.987150522451285, + "grad_norm": 3.9237819825322156, + "learning_rate": 1.5665486138524425e-05, + "loss": 1.1409, "step": 6991 }, { - "epoch": 1.4661354581673307, - "grad_norm": 6.690484240622299, - "learning_rate": 1.0851320861355055e-05, - "loss": 0.7461, + "epoch": 0.9872917255012709, + "grad_norm": 3.713623998650466, + "learning_rate": 1.5664229880292286e-05, + "loss": 1.0355, "step": 6992 }, { - "epoch": 1.466345145732858, - "grad_norm": 6.980456969283783, - "learning_rate": 1.0849065173601066e-05, - "loss": 0.821, + "epoch": 0.9874329285512568, + "grad_norm": 3.607571372416664, + "learning_rate": 1.566297349042589e-05, + "loss": 1.1739, "step": 6993 }, { - "epoch": 1.4665548332983853, - "grad_norm": 5.824463113043737, - "learning_rate": 1.0846809442331014e-05, - "loss": 0.5662, + "epoch": 0.9875741316012426, + "grad_norm": 3.4152831711214544, + "learning_rate": 1.5661716968954436e-05, + "loss": 1.0416, "step": 6994 }, { - "epoch": 1.4667645208639128, - "grad_norm": 9.593140131129912, - "learning_rate": 1.0844553667660511e-05, - "loss": 0.9659, + "epoch": 0.9877153346512285, + "grad_norm": 3.0364888505542136, + "learning_rate": 1.5660460315907125e-05, + "loss": 0.8819, "step": 6995 }, { - "epoch": 1.4669742084294402, - "grad_norm": 6.736554412298324, - "learning_rate": 1.084229784970517e-05, - "loss": 0.7312, + "epoch": 0.9878565377012144, + "grad_norm": 2.8189967619070364, + "learning_rate": 1.5659203531313162e-05, + "loss": 0.6696, "step": 6996 }, { - "epoch": 1.4671838959949675, - "grad_norm": 6.496612408555153, - "learning_rate": 1.0840041988580596e-05, - "loss": 0.5665, + "epoch": 0.9879977407512002, + "grad_norm": 2.6824644103683766, + "learning_rate": 1.5657946615201755e-05, + "loss": 0.7952, "step": 6997 }, { - "epoch": 1.467393583560495, - "grad_norm": 6.989413718189927, - "learning_rate": 1.0837786084402422e-05, - "loss": 0.8089, + "epoch": 0.988138943801186, + "grad_norm": 3.9999698076649297, + "learning_rate": 1.565668956760211e-05, + "loss": 1.1374, "step": 6998 }, { - "epoch": 1.4676032711260223, - "grad_norm": 7.831308445194834, - "learning_rate": 1.0835530137286252e-05, - "loss": 0.8865, + "epoch": 0.9882801468511719, + "grad_norm": 2.815202808771742, + "learning_rate": 1.565543238854344e-05, + "loss": 0.7855, "step": 6999 }, { - "epoch": 1.4678129586915496, - "grad_norm": 8.014899865426337, - "learning_rate": 1.0833274147347716e-05, - "loss": 1.039, + "epoch": 0.9884213499011578, + "grad_norm": 3.119927566390854, + "learning_rate": 1.5654175078054965e-05, + "loss": 0.7941, "step": 7000 }, { - "epoch": 1.468022646257077, - "grad_norm": 7.435002702350244, - "learning_rate": 1.0831018114702435e-05, - "loss": 0.7409, + "epoch": 0.9885625529511437, + "grad_norm": 3.710496191695528, + "learning_rate": 1.5652917636165905e-05, + "loss": 1.0291, "step": 7001 }, { - "epoch": 1.4682323338226042, - "grad_norm": 6.257718963912202, - "learning_rate": 1.082876203946603e-05, - "loss": 0.6339, + "epoch": 0.9887037560011296, + "grad_norm": 3.5445064781669657, + "learning_rate": 1.5651660062905476e-05, + "loss": 1.0, "step": 7002 }, { - "epoch": 1.4684420213881317, - "grad_norm": 6.452257626006419, - "learning_rate": 1.0826505921754137e-05, - "loss": 0.7642, + "epoch": 0.9888449590511155, + "grad_norm": 3.0508636236193296, + "learning_rate": 1.5650402358302913e-05, + "loss": 0.7994, "step": 7003 }, { - "epoch": 1.468651708953659, - "grad_norm": 6.50086412279567, - "learning_rate": 1.0824249761682382e-05, - "loss": 0.7113, + "epoch": 0.9889861621011014, + "grad_norm": 3.0206325015955295, + "learning_rate": 1.5649144522387437e-05, + "loss": 0.9788, "step": 7004 }, { - "epoch": 1.4688613965191863, - "grad_norm": 6.584702201126405, - "learning_rate": 1.0821993559366398e-05, - "loss": 0.7537, + "epoch": 0.9891273651510872, + "grad_norm": 3.257815829686731, + "learning_rate": 1.5647886555188282e-05, + "loss": 1.1091, "step": 7005 }, { - "epoch": 1.4690710840847139, - "grad_norm": 8.099909513306217, - "learning_rate": 1.0819737314921817e-05, - "loss": 0.945, + "epoch": 0.9892685682010731, + "grad_norm": 4.0364687772674825, + "learning_rate": 1.564662845673468e-05, + "loss": 1.3046, "step": 7006 }, { - "epoch": 1.4692807716502412, - "grad_norm": 7.5890492483824055, - "learning_rate": 1.0817481028464274e-05, - "loss": 0.8279, + "epoch": 0.989409771251059, + "grad_norm": 3.430130517321909, + "learning_rate": 1.5645370227055874e-05, + "loss": 1.0219, "step": 7007 }, { - "epoch": 1.4694904592157685, - "grad_norm": 8.40989966317815, - "learning_rate": 1.0815224700109414e-05, - "loss": 0.7376, + "epoch": 0.9895509743010449, + "grad_norm": 3.5588576823990614, + "learning_rate": 1.56441118661811e-05, + "loss": 1.1569, "step": 7008 }, { - "epoch": 1.4697001467812958, - "grad_norm": 6.415005363112198, - "learning_rate": 1.0812968329972873e-05, - "loss": 0.7743, + "epoch": 0.9896921773510308, + "grad_norm": 3.1063170184303655, + "learning_rate": 1.564285337413961e-05, + "loss": 0.8242, "step": 7009 }, { - "epoch": 1.469909834346823, - "grad_norm": 7.024819439683643, - "learning_rate": 1.0810711918170296e-05, - "loss": 0.7637, + "epoch": 0.9898333804010166, + "grad_norm": 3.0517766556805617, + "learning_rate": 1.564159475096064e-05, + "loss": 0.859, "step": 7010 }, { - "epoch": 1.4701195219123506, - "grad_norm": 6.746138328466806, - "learning_rate": 1.0808455464817326e-05, - "loss": 0.7718, + "epoch": 0.9899745834510025, + "grad_norm": 3.977257597100643, + "learning_rate": 1.5640335996673446e-05, + "loss": 1.1925, "step": 7011 }, { - "epoch": 1.470329209477878, - "grad_norm": 7.058141736254412, - "learning_rate": 1.0806198970029612e-05, - "loss": 0.6958, + "epoch": 0.9901157865009884, + "grad_norm": 3.2798279653197553, + "learning_rate": 1.5639077111307276e-05, + "loss": 0.8232, "step": 7012 }, { - "epoch": 1.4705388970434052, - "grad_norm": 7.815646219691848, - "learning_rate": 1.0803942433922804e-05, - "loss": 0.8925, + "epoch": 0.9902569895509743, + "grad_norm": 3.346384994046706, + "learning_rate": 1.563781809489139e-05, + "loss": 1.0752, "step": 7013 }, { - "epoch": 1.4707485846089328, - "grad_norm": 8.719273444241164, - "learning_rate": 1.0801685856612547e-05, - "loss": 0.9824, + "epoch": 0.9903981926009602, + "grad_norm": 2.9977320693960756, + "learning_rate": 1.563655894745505e-05, + "loss": 0.8363, "step": 7014 }, { - "epoch": 1.47095827217446, - "grad_norm": 8.69743326489451, - "learning_rate": 1.0799429238214502e-05, - "loss": 0.8838, + "epoch": 0.9905393956509461, + "grad_norm": 3.255853153086719, + "learning_rate": 1.563529966902751e-05, + "loss": 1.0111, "step": 7015 }, { - "epoch": 1.4711679597399874, - "grad_norm": 8.689867937918082, - "learning_rate": 1.0797172578844324e-05, - "loss": 0.9967, + "epoch": 0.990680598700932, + "grad_norm": 3.1205128480531688, + "learning_rate": 1.5634040259638044e-05, + "loss": 0.7567, "step": 7016 }, { - "epoch": 1.471377647305515, - "grad_norm": 7.708944331930854, - "learning_rate": 1.0794915878617667e-05, - "loss": 0.7831, + "epoch": 0.9908218017509178, + "grad_norm": 3.582942074407956, + "learning_rate": 1.5632780719315912e-05, + "loss": 1.224, "step": 7017 }, { - "epoch": 1.4715873348710422, - "grad_norm": 7.684149756432796, - "learning_rate": 1.0792659137650191e-05, - "loss": 0.785, + "epoch": 0.9909630048009037, + "grad_norm": 3.556001786382917, + "learning_rate": 1.563152104809039e-05, + "loss": 0.951, "step": 7018 }, { - "epoch": 1.4717970224365695, - "grad_norm": 7.3221774509317505, - "learning_rate": 1.0790402356057563e-05, - "loss": 0.8536, + "epoch": 0.9911042078508896, + "grad_norm": 3.2147592862399317, + "learning_rate": 1.563026124599075e-05, + "loss": 0.9708, "step": 7019 }, { - "epoch": 1.4720067100020968, - "grad_norm": 7.175964656047589, - "learning_rate": 1.0788145533955439e-05, - "loss": 0.7382, + "epoch": 0.9912454109008755, + "grad_norm": 3.4783365359073595, + "learning_rate": 1.562900131304627e-05, + "loss": 0.9113, "step": 7020 }, { - "epoch": 1.4722163975676241, - "grad_norm": 7.614592141334634, - "learning_rate": 1.0785888671459494e-05, - "loss": 0.8799, + "epoch": 0.9913866139508614, + "grad_norm": 3.4651610598413387, + "learning_rate": 1.5627741249286233e-05, + "loss": 0.928, "step": 7021 }, { - "epoch": 1.4724260851331517, - "grad_norm": 6.016435102544791, - "learning_rate": 1.0783631768685389e-05, - "loss": 0.6672, + "epoch": 0.9915278170008472, + "grad_norm": 3.303366968723144, + "learning_rate": 1.5626481054739916e-05, + "loss": 1.0275, "step": 7022 }, { - "epoch": 1.472635772698679, - "grad_norm": 6.485757201309465, - "learning_rate": 1.0781374825748795e-05, - "loss": 0.6662, + "epoch": 0.9916690200508331, + "grad_norm": 4.047000915110003, + "learning_rate": 1.562522072943661e-05, + "loss": 0.9735, "step": 7023 }, { - "epoch": 1.4728454602642063, - "grad_norm": 6.632678340312162, - "learning_rate": 1.077911784276539e-05, - "loss": 0.7688, + "epoch": 0.991810223100819, + "grad_norm": 3.117661129910116, + "learning_rate": 1.5623960273405605e-05, + "loss": 0.9313, "step": 7024 }, { - "epoch": 1.4730551478297338, - "grad_norm": 5.984807281779909, - "learning_rate": 1.0776860819850846e-05, - "loss": 0.5357, + "epoch": 0.9919514261508049, + "grad_norm": 3.0924144270926632, + "learning_rate": 1.562269968667619e-05, + "loss": 0.9477, "step": 7025 }, { - "epoch": 1.473264835395261, - "grad_norm": 6.582498979221595, - "learning_rate": 1.077460375712083e-05, - "loss": 0.7566, + "epoch": 0.9920926292007908, + "grad_norm": 3.5471854211645657, + "learning_rate": 1.5621438969277667e-05, + "loss": 1.1245, "step": 7026 }, { - "epoch": 1.4734745229607884, - "grad_norm": 7.997955168965671, - "learning_rate": 1.0772346654691033e-05, - "loss": 0.8477, + "epoch": 0.9922338322507767, + "grad_norm": 3.328051526576892, + "learning_rate": 1.5620178121239325e-05, + "loss": 0.9079, "step": 7027 }, { - "epoch": 1.4736842105263157, - "grad_norm": 8.261956867547118, - "learning_rate": 1.0770089512677129e-05, - "loss": 0.7434, + "epoch": 0.9923750353007625, + "grad_norm": 3.3618530498812427, + "learning_rate": 1.5618917142590472e-05, + "loss": 1.0184, "step": 7028 }, { - "epoch": 1.4738938980918432, - "grad_norm": 8.138387642820739, - "learning_rate": 1.07678323311948e-05, - "loss": 0.8062, + "epoch": 0.9925162383507484, + "grad_norm": 2.995249132331739, + "learning_rate": 1.5617656033360414e-05, + "loss": 0.9172, "step": 7029 }, { - "epoch": 1.4741035856573705, - "grad_norm": 6.918887172985046, - "learning_rate": 1.0765575110359735e-05, - "loss": 0.6932, + "epoch": 0.9926574414007343, + "grad_norm": 3.5022546646379156, + "learning_rate": 1.5616394793578455e-05, + "loss": 0.9893, "step": 7030 }, { - "epoch": 1.4743132732228978, - "grad_norm": 9.969833676299642, - "learning_rate": 1.0763317850287618e-05, - "loss": 1.1169, + "epoch": 0.9927986444507201, + "grad_norm": 3.06374782003365, + "learning_rate": 1.5615133423273906e-05, + "loss": 0.8965, "step": 7031 }, { - "epoch": 1.4745229607884252, - "grad_norm": 8.112814820289168, - "learning_rate": 1.0761060551094135e-05, - "loss": 0.8434, + "epoch": 0.992939847500706, + "grad_norm": 3.6321936916219038, + "learning_rate": 1.5613871922476082e-05, + "loss": 1.1367, "step": 7032 }, { - "epoch": 1.4747326483539527, - "grad_norm": 6.706794037787037, - "learning_rate": 1.0758803212894981e-05, - "loss": 0.8027, + "epoch": 0.9930810505506918, + "grad_norm": 3.793161549894474, + "learning_rate": 1.5612610291214296e-05, + "loss": 1.2445, "step": 7033 }, { - "epoch": 1.47494233591948, - "grad_norm": 6.307318762916073, - "learning_rate": 1.0756545835805847e-05, - "loss": 0.6003, + "epoch": 0.9932222536006777, + "grad_norm": 3.277534278856119, + "learning_rate": 1.5611348529517872e-05, + "loss": 0.9351, "step": 7034 }, { - "epoch": 1.4751520234850073, - "grad_norm": 6.697717672924268, - "learning_rate": 1.0754288419942422e-05, - "loss": 0.8563, + "epoch": 0.9933634566506636, + "grad_norm": 3.518118534321915, + "learning_rate": 1.561008663741613e-05, + "loss": 1.0028, "step": 7035 }, { - "epoch": 1.4753617110505348, - "grad_norm": 6.226644419426775, - "learning_rate": 1.0752030965420413e-05, - "loss": 0.6525, + "epoch": 0.9935046597006495, + "grad_norm": 3.35057390033774, + "learning_rate": 1.56088246149384e-05, + "loss": 1.0941, "step": 7036 }, { - "epoch": 1.4755713986160621, - "grad_norm": 7.208126792628617, - "learning_rate": 1.0749773472355509e-05, - "loss": 0.6862, + "epoch": 0.9936458627506354, + "grad_norm": 3.380690123914075, + "learning_rate": 1.560756246211401e-05, + "loss": 0.9519, "step": 7037 }, { - "epoch": 1.4757810861815894, - "grad_norm": 8.257804750603077, - "learning_rate": 1.0747515940863416e-05, - "loss": 1.0113, + "epoch": 0.9937870658006213, + "grad_norm": 2.8764134346023327, + "learning_rate": 1.560630017897229e-05, + "loss": 0.9132, "step": 7038 }, { - "epoch": 1.4759907737471167, - "grad_norm": 8.731089854922134, - "learning_rate": 1.0745258371059832e-05, - "loss": 0.88, + "epoch": 0.9939282688506071, + "grad_norm": 3.469913487101549, + "learning_rate": 1.560503776554257e-05, + "loss": 1.2105, "step": 7039 }, { - "epoch": 1.476200461312644, - "grad_norm": 6.58570148768025, - "learning_rate": 1.074300076306047e-05, - "loss": 0.5896, + "epoch": 0.994069471900593, + "grad_norm": 3.8035032284545878, + "learning_rate": 1.5603775221854195e-05, + "loss": 1.0751, "step": 7040 }, { - "epoch": 1.4764101488781716, - "grad_norm": 6.398412668904247, - "learning_rate": 1.0740743116981023e-05, - "loss": 0.5519, + "epoch": 0.9942106749505789, + "grad_norm": 3.011659005685611, + "learning_rate": 1.5602512547936504e-05, + "loss": 0.9795, "step": 7041 }, { - "epoch": 1.4766198364436989, - "grad_norm": 6.41983844347742, - "learning_rate": 1.0738485432937211e-05, - "loss": 0.8377, + "epoch": 0.9943518780005648, + "grad_norm": 3.322639403798117, + "learning_rate": 1.560124974381884e-05, + "loss": 0.9458, "step": 7042 }, { - "epoch": 1.4768295240092262, - "grad_norm": 7.482973538452351, - "learning_rate": 1.0736227711044738e-05, - "loss": 0.779, + "epoch": 0.9944930810505507, + "grad_norm": 3.292749054251956, + "learning_rate": 1.559998680953055e-05, + "loss": 1.0769, "step": 7043 }, { - "epoch": 1.4770392115747537, - "grad_norm": 7.751294241402007, - "learning_rate": 1.0733969951419317e-05, - "loss": 0.9654, + "epoch": 0.9946342841005366, + "grad_norm": 2.983155213528207, + "learning_rate": 1.559872374510099e-05, + "loss": 0.8762, "step": 7044 }, { - "epoch": 1.477248899140281, - "grad_norm": 7.00281367606814, - "learning_rate": 1.0731712154176663e-05, - "loss": 0.6882, + "epoch": 0.9947754871505224, + "grad_norm": 4.127678051151956, + "learning_rate": 1.5597460550559508e-05, + "loss": 1.1266, "step": 7045 }, { - "epoch": 1.4774585867058083, - "grad_norm": 8.84644468361504, - "learning_rate": 1.0729454319432495e-05, - "loss": 0.8992, + "epoch": 0.9949166902005083, + "grad_norm": 3.374956110435403, + "learning_rate": 1.5596197225935458e-05, + "loss": 0.8584, "step": 7046 }, { - "epoch": 1.4776682742713358, - "grad_norm": 8.600213816495094, - "learning_rate": 1.0727196447302523e-05, - "loss": 0.8576, + "epoch": 0.9950578932504942, + "grad_norm": 3.200598872907082, + "learning_rate": 1.55949337712582e-05, + "loss": 0.9614, "step": 7047 }, { - "epoch": 1.4778779618368632, - "grad_norm": 7.332713326080814, - "learning_rate": 1.0724938537902476e-05, - "loss": 0.9805, + "epoch": 0.9951990963004801, + "grad_norm": 3.539221452036798, + "learning_rate": 1.5593670186557096e-05, + "loss": 1.0793, "step": 7048 }, { - "epoch": 1.4780876494023905, - "grad_norm": 7.780696758266012, - "learning_rate": 1.072268059134807e-05, - "loss": 0.8381, + "epoch": 0.995340299350466, + "grad_norm": 2.8747625666202863, + "learning_rate": 1.5592406471861516e-05, + "loss": 0.7875, "step": 7049 }, { - "epoch": 1.4782973369679178, - "grad_norm": 6.007873542597066, - "learning_rate": 1.0720422607755032e-05, - "loss": 0.608, + "epoch": 0.9954815024004519, + "grad_norm": 3.7785221697406897, + "learning_rate": 1.5591142627200825e-05, + "loss": 1.1175, "step": 7050 }, { - "epoch": 1.478507024533445, - "grad_norm": 7.278521117226383, - "learning_rate": 1.0718164587239084e-05, - "loss": 0.786, + "epoch": 0.9956227054504377, + "grad_norm": 3.3536102215692853, + "learning_rate": 1.5589878652604392e-05, + "loss": 1.061, "step": 7051 }, { - "epoch": 1.4787167120989726, - "grad_norm": 6.708216431656241, - "learning_rate": 1.0715906529915956e-05, - "loss": 0.8159, + "epoch": 0.9957639085004236, + "grad_norm": 3.333537881416331, + "learning_rate": 1.5588614548101593e-05, + "loss": 0.8706, "step": 7052 }, { - "epoch": 1.4789263996645, - "grad_norm": 9.06760666816895, - "learning_rate": 1.0713648435901376e-05, - "loss": 0.8638, + "epoch": 0.9959051115504095, + "grad_norm": 3.260950893171303, + "learning_rate": 1.5587350313721806e-05, + "loss": 0.9259, "step": 7053 }, { - "epoch": 1.4791360872300272, - "grad_norm": 7.160288261397438, - "learning_rate": 1.0711390305311077e-05, - "loss": 0.8218, + "epoch": 0.9960463146003954, + "grad_norm": 2.9409695260660826, + "learning_rate": 1.558608594949441e-05, + "loss": 0.9021, "step": 7054 }, { - "epoch": 1.4793457747955547, - "grad_norm": 9.012669355243682, - "learning_rate": 1.0709132138260788e-05, - "loss": 1.0622, + "epoch": 0.9961875176503813, + "grad_norm": 3.190645696272113, + "learning_rate": 1.558482145544879e-05, + "loss": 0.9771, "step": 7055 }, { - "epoch": 1.479555462361082, - "grad_norm": 8.541998536022632, - "learning_rate": 1.0706873934866251e-05, - "loss": 0.9541, + "epoch": 0.9963287207003672, + "grad_norm": 3.3954615071214866, + "learning_rate": 1.5583556831614333e-05, + "loss": 1.0387, "step": 7056 }, { - "epoch": 1.4797651499266093, - "grad_norm": 7.394470655159761, - "learning_rate": 1.0704615695243197e-05, - "loss": 0.7982, + "epoch": 0.996469923750353, + "grad_norm": 3.0652519708635606, + "learning_rate": 1.5582292078020425e-05, + "loss": 0.8763, "step": 7057 }, { - "epoch": 1.4799748374921367, - "grad_norm": 6.928453318865293, - "learning_rate": 1.0702357419507367e-05, - "loss": 0.6739, + "epoch": 0.9966111268003389, + "grad_norm": 3.935264533859818, + "learning_rate": 1.5581027194696458e-05, + "loss": 1.24, "step": 7058 }, { - "epoch": 1.480184525057664, - "grad_norm": 7.729687714889983, - "learning_rate": 1.07000991077745e-05, - "loss": 0.9483, + "epoch": 0.9967523298503248, + "grad_norm": 3.21581062488035, + "learning_rate": 1.5579762181671832e-05, + "loss": 1.0062, "step": 7059 }, { - "epoch": 1.4803942126231915, - "grad_norm": 6.751826441820275, - "learning_rate": 1.069784076016034e-05, - "loss": 0.7243, + "epoch": 0.9968935329003107, + "grad_norm": 3.362957833964704, + "learning_rate": 1.557849703897594e-05, + "loss": 0.9979, "step": 7060 }, { - "epoch": 1.4806039001887188, - "grad_norm": 7.674851410482444, - "learning_rate": 1.0695582376780632e-05, - "loss": 0.7664, + "epoch": 0.9970347359502966, + "grad_norm": 4.292140604164678, + "learning_rate": 1.557723176663819e-05, + "loss": 1.1308, "step": 7061 }, { - "epoch": 1.480813587754246, - "grad_norm": 10.9091376673557, - "learning_rate": 1.0693323957751118e-05, - "loss": 1.0597, + "epoch": 0.9971759390002825, + "grad_norm": 3.185660636420962, + "learning_rate": 1.557596636468798e-05, + "loss": 0.9128, "step": 7062 }, { - "epoch": 1.4810232753197736, - "grad_norm": 7.616185609511507, - "learning_rate": 1.0691065503187552e-05, - "loss": 0.8427, + "epoch": 0.9973171420502683, + "grad_norm": 3.591203109867533, + "learning_rate": 1.5574700833154717e-05, + "loss": 0.822, "step": 7063 }, { - "epoch": 1.481232962885301, - "grad_norm": 7.184007538353708, - "learning_rate": 1.0688807013205675e-05, - "loss": 0.7318, + "epoch": 0.9974583451002542, + "grad_norm": 3.6661527945391743, + "learning_rate": 1.557343517206782e-05, + "loss": 1.0747, "step": 7064 }, { - "epoch": 1.4814426504508282, - "grad_norm": 7.07671838193026, - "learning_rate": 1.068654848792125e-05, - "loss": 0.8175, + "epoch": 0.99759954815024, + "grad_norm": 4.363442797423987, + "learning_rate": 1.5572169381456692e-05, + "loss": 1.2384, "step": 7065 }, { - "epoch": 1.4816523380163558, - "grad_norm": 5.543034183164377, - "learning_rate": 1.0684289927450022e-05, - "loss": 0.4713, + "epoch": 0.9977407512002259, + "grad_norm": 3.1332204666480528, + "learning_rate": 1.5570903461350754e-05, + "loss": 0.9118, "step": 7066 }, { - "epoch": 1.481862025581883, - "grad_norm": 7.13368237576877, - "learning_rate": 1.0682031331907748e-05, - "loss": 0.685, + "epoch": 0.9978819542502118, + "grad_norm": 3.4591507532155643, + "learning_rate": 1.5569637411779428e-05, + "loss": 0.792, "step": 7067 }, { - "epoch": 1.4820717131474104, - "grad_norm": 7.007583046402595, - "learning_rate": 1.0679772701410185e-05, - "loss": 0.6954, + "epoch": 0.9980231573001976, + "grad_norm": 3.358025928020668, + "learning_rate": 1.5568371232772134e-05, + "loss": 1.0065, "step": 7068 }, { - "epoch": 1.4822814007129377, - "grad_norm": 6.347053411761595, - "learning_rate": 1.0677514036073096e-05, - "loss": 0.6355, + "epoch": 0.9981643603501835, + "grad_norm": 2.9706410828511425, + "learning_rate": 1.5567104924358297e-05, + "loss": 0.8425, "step": 7069 }, { - "epoch": 1.482491088278465, - "grad_norm": 5.882246874755733, - "learning_rate": 1.0675255336012232e-05, - "loss": 0.6091, + "epoch": 0.9983055634001694, + "grad_norm": 3.11709050814431, + "learning_rate": 1.5565838486567343e-05, + "loss": 0.8252, "step": 7070 }, { - "epoch": 1.4827007758439925, - "grad_norm": 8.70443673196323, - "learning_rate": 1.0672996601343363e-05, - "loss": 1.0314, + "epoch": 0.9984467664501553, + "grad_norm": 3.44957872462301, + "learning_rate": 1.5564571919428708e-05, + "loss": 0.9985, "step": 7071 }, { - "epoch": 1.4829104634095198, - "grad_norm": 7.295387126584218, - "learning_rate": 1.0670737832182251e-05, - "loss": 0.9358, + "epoch": 0.9985879695001412, + "grad_norm": 3.835627913308889, + "learning_rate": 1.5563305222971826e-05, + "loss": 1.2034, "step": 7072 }, { - "epoch": 1.4831201509750471, - "grad_norm": 8.552708804555555, - "learning_rate": 1.0668479028644661e-05, - "loss": 0.9032, + "epoch": 0.998729172550127, + "grad_norm": 3.5639893779221574, + "learning_rate": 1.5562038397226135e-05, + "loss": 1.0206, "step": 7073 }, { - "epoch": 1.4833298385405747, - "grad_norm": 6.593498558096608, - "learning_rate": 1.0666220190846361e-05, - "loss": 0.6793, + "epoch": 0.9988703756001129, + "grad_norm": 4.018968833148265, + "learning_rate": 1.556077144222107e-05, + "loss": 1.2566, "step": 7074 }, { - "epoch": 1.483539526106102, - "grad_norm": 6.793962967857767, - "learning_rate": 1.0663961318903124e-05, - "loss": 0.683, + "epoch": 0.9990115786500988, + "grad_norm": 3.4173695035190272, + "learning_rate": 1.5559504357986078e-05, + "loss": 1.0191, "step": 7075 }, { - "epoch": 1.4837492136716293, - "grad_norm": 6.9491709341116366, - "learning_rate": 1.0661702412930715e-05, - "loss": 0.8255, + "epoch": 0.9991527817000847, + "grad_norm": 3.134018704373015, + "learning_rate": 1.5558237144550608e-05, + "loss": 1.0316, "step": 7076 }, { - "epoch": 1.4839589012371566, - "grad_norm": 6.815572034374432, - "learning_rate": 1.065944347304491e-05, - "loss": 0.7957, + "epoch": 0.9992939847500706, + "grad_norm": 3.1429545745613896, + "learning_rate": 1.5556969801944105e-05, + "loss": 0.9545, "step": 7077 }, { - "epoch": 1.4841685888026839, - "grad_norm": 7.4930338013630955, - "learning_rate": 1.0657184499361484e-05, - "loss": 0.683, + "epoch": 0.9994351878000565, + "grad_norm": 3.1698403453593715, + "learning_rate": 1.5555702330196024e-05, + "loss": 0.9229, "step": 7078 }, { - "epoch": 1.4843782763682114, - "grad_norm": 5.597508605891956, - "learning_rate": 1.0654925491996211e-05, - "loss": 0.6202, + "epoch": 0.9995763908500424, + "grad_norm": 3.063720326530578, + "learning_rate": 1.555443472933582e-05, + "loss": 0.9082, "step": 7079 }, { - "epoch": 1.4845879639337387, - "grad_norm": 6.641568660444248, - "learning_rate": 1.0652666451064873e-05, - "loss": 0.702, + "epoch": 0.9997175939000282, + "grad_norm": 3.2578613986798346, + "learning_rate": 1.5553166999392954e-05, + "loss": 1.0078, "step": 7080 }, { - "epoch": 1.484797651499266, - "grad_norm": 6.330997846313807, - "learning_rate": 1.0650407376683246e-05, - "loss": 0.8758, + "epoch": 0.9998587969500141, + "grad_norm": 2.8449006222963606, + "learning_rate": 1.5551899140396883e-05, + "loss": 0.7435, "step": 7081 }, { - "epoch": 1.4850073390647935, - "grad_norm": 7.884791234314769, - "learning_rate": 1.0648148268967112e-05, - "loss": 0.916, + "epoch": 1.0, + "grad_norm": 3.35298399538151, + "learning_rate": 1.5550631152377075e-05, + "loss": 1.0117, "step": 7082 }, { - "epoch": 1.4852170266303208, - "grad_norm": 8.354607136848015, - "learning_rate": 1.0645889128032256e-05, - "loss": 0.8773, + "epoch": 1.0001412030499859, + "grad_norm": 2.7703924359903715, + "learning_rate": 1.5549363035362995e-05, + "loss": 0.7188, "step": 7083 }, { - "epoch": 1.4854267141958482, - "grad_norm": 8.354384325179506, - "learning_rate": 1.064362995399446e-05, - "loss": 0.8201, + "epoch": 1.0002824060999718, + "grad_norm": 2.5030652566205127, + "learning_rate": 1.5548094789384113e-05, + "loss": 0.5239, "step": 7084 }, { - "epoch": 1.4856364017613757, - "grad_norm": 5.928129725964623, - "learning_rate": 1.0641370746969517e-05, - "loss": 0.7918, + "epoch": 1.0004236091499576, + "grad_norm": 2.463557242040833, + "learning_rate": 1.554682641446991e-05, + "loss": 0.5542, "step": 7085 }, { - "epoch": 1.485846089326903, - "grad_norm": 6.809599512678502, - "learning_rate": 1.0639111507073207e-05, - "loss": 0.9387, + "epoch": 1.0005648121999435, + "grad_norm": 3.125220239034055, + "learning_rate": 1.554555791064985e-05, + "loss": 0.766, "step": 7086 }, { - "epoch": 1.4860557768924303, - "grad_norm": 6.8006304783626375, - "learning_rate": 1.0636852234421327e-05, - "loss": 0.7452, + "epoch": 1.0007060152499294, + "grad_norm": 2.79247665278582, + "learning_rate": 1.554428927795342e-05, + "loss": 0.5893, "step": 7087 }, { - "epoch": 1.4862654644579576, - "grad_norm": 5.909975032555732, - "learning_rate": 1.0634592929129664e-05, - "loss": 0.6667, + "epoch": 1.0008472182999153, + "grad_norm": 2.62365243433691, + "learning_rate": 1.5543020516410104e-05, + "loss": 0.6436, "step": 7088 }, { - "epoch": 1.486475152023485, - "grad_norm": 6.788903863916933, - "learning_rate": 1.063233359131401e-05, - "loss": 0.7743, + "epoch": 1.0009884213499012, + "grad_norm": 2.970400234624762, + "learning_rate": 1.5541751626049387e-05, + "loss": 0.6496, "step": 7089 }, { - "epoch": 1.4866848395890124, - "grad_norm": 7.900361320384673, - "learning_rate": 1.0630074221090165e-05, - "loss": 0.7609, + "epoch": 1.001129624399887, + "grad_norm": 2.6191402739140432, + "learning_rate": 1.5540482606900752e-05, + "loss": 0.4953, "step": 7090 }, { - "epoch": 1.4868945271545397, - "grad_norm": 6.951496769696583, - "learning_rate": 1.0627814818573923e-05, - "loss": 0.8681, + "epoch": 1.001270827449873, + "grad_norm": 3.2537168382908503, + "learning_rate": 1.5539213458993697e-05, + "loss": 0.5547, "step": 7091 }, { - "epoch": 1.487104214720067, - "grad_norm": 6.666027230728512, - "learning_rate": 1.0625555383881085e-05, - "loss": 0.7307, + "epoch": 1.0014120304998588, + "grad_norm": 3.2061580467006197, + "learning_rate": 1.553794418235771e-05, + "loss": 0.6921, "step": 7092 }, { - "epoch": 1.4873139022855946, - "grad_norm": 7.0428805657809725, - "learning_rate": 1.0623295917127444e-05, - "loss": 0.7903, + "epoch": 1.0015532335498447, + "grad_norm": 3.3374077923941305, + "learning_rate": 1.5536674777022295e-05, + "loss": 0.7212, "step": 7093 }, { - "epoch": 1.4875235898511219, - "grad_norm": 7.692608990527666, - "learning_rate": 1.0621036418428811e-05, - "loss": 0.7342, + "epoch": 1.0016944365998306, + "grad_norm": 3.2258764650999083, + "learning_rate": 1.5535405243016945e-05, + "loss": 0.5642, "step": 7094 }, { - "epoch": 1.4877332774166492, - "grad_norm": 8.810065710230523, - "learning_rate": 1.0618776887900981e-05, - "loss": 0.995, + "epoch": 1.0018356396498165, + "grad_norm": 2.7229051676067093, + "learning_rate": 1.553413558037117e-05, + "loss": 0.5861, "step": 7095 }, { - "epoch": 1.4879429649821765, - "grad_norm": 7.943922376038015, - "learning_rate": 1.0616517325659762e-05, - "loss": 0.9918, + "epoch": 1.0019768426998024, + "grad_norm": 3.006919115153893, + "learning_rate": 1.5532865789114477e-05, + "loss": 0.6352, "step": 7096 }, { - "epoch": 1.4881526525477038, - "grad_norm": 5.885855247850953, - "learning_rate": 1.0614257731820966e-05, - "loss": 0.6516, + "epoch": 1.0021180457497882, + "grad_norm": 3.562415190628279, + "learning_rate": 1.5531595869276366e-05, + "loss": 0.6815, "step": 7097 }, { - "epoch": 1.4883623401132313, - "grad_norm": 7.575614536473457, - "learning_rate": 1.0611998106500392e-05, - "loss": 0.8066, + "epoch": 1.0022592487997741, + "grad_norm": 3.4815408536255816, + "learning_rate": 1.5530325820886366e-05, + "loss": 0.6794, "step": 7098 }, { - "epoch": 1.4885720276787586, - "grad_norm": 5.799156292745133, - "learning_rate": 1.0609738449813852e-05, - "loss": 0.7026, + "epoch": 1.00240045184976, + "grad_norm": 3.742737946473658, + "learning_rate": 1.5529055643973973e-05, + "loss": 0.758, "step": 7099 }, { - "epoch": 1.488781715244286, - "grad_norm": 7.73046256773992, - "learning_rate": 1.0607478761877162e-05, - "loss": 0.9519, + "epoch": 1.002541654899746, + "grad_norm": 2.9996750284984217, + "learning_rate": 1.5527785338568718e-05, + "loss": 0.5891, "step": 7100 }, { - "epoch": 1.4889914028098135, - "grad_norm": 6.728291396791191, - "learning_rate": 1.060521904280613e-05, - "loss": 0.7349, + "epoch": 1.0026828579497318, + "grad_norm": 2.6143613971084156, + "learning_rate": 1.552651490470012e-05, + "loss": 0.5652, "step": 7101 }, { - "epoch": 1.4892010903753408, - "grad_norm": 6.670827173872375, - "learning_rate": 1.0602959292716573e-05, - "loss": 0.7416, + "epoch": 1.0028240609997177, + "grad_norm": 3.0670215465162762, + "learning_rate": 1.55252443423977e-05, + "loss": 0.6603, "step": 7102 }, { - "epoch": 1.489410777940868, - "grad_norm": 7.850341996181215, - "learning_rate": 1.0600699511724307e-05, - "loss": 0.926, + "epoch": 1.0029652640497035, + "grad_norm": 2.9572139631907843, + "learning_rate": 1.552397365169099e-05, + "loss": 0.5554, "step": 7103 }, { - "epoch": 1.4896204655063956, - "grad_norm": 6.211664527651134, - "learning_rate": 1.059843969994515e-05, - "loss": 0.711, + "epoch": 1.0031064670996894, + "grad_norm": 2.931363176574739, + "learning_rate": 1.5522702832609516e-05, + "loss": 0.5618, "step": 7104 }, { - "epoch": 1.489830153071923, - "grad_norm": 6.784092200667709, - "learning_rate": 1.0596179857494915e-05, - "loss": 0.648, + "epoch": 1.0032476701496753, + "grad_norm": 3.437590991890577, + "learning_rate": 1.552143188518281e-05, + "loss": 0.644, "step": 7105 }, { - "epoch": 1.4900398406374502, - "grad_norm": 6.247031838771317, - "learning_rate": 1.0593919984489434e-05, - "loss": 0.6548, + "epoch": 1.0033888731996612, + "grad_norm": 3.194197917648109, + "learning_rate": 1.552016080944042e-05, + "loss": 0.609, "step": 7106 }, { - "epoch": 1.4902495282029775, - "grad_norm": 6.670129063461565, - "learning_rate": 1.0591660081044522e-05, - "loss": 0.6964, + "epoch": 1.003530076249647, + "grad_norm": 2.9300701710619785, + "learning_rate": 1.551888960541187e-05, + "loss": 0.5751, "step": 7107 }, { - "epoch": 1.4904592157685048, - "grad_norm": 9.076212510423709, - "learning_rate": 1.0589400147276003e-05, - "loss": 0.9677, + "epoch": 1.003671279299633, + "grad_norm": 3.0928977471480312, + "learning_rate": 1.5517618273126714e-05, + "loss": 0.7016, "step": 7108 }, { - "epoch": 1.4906689033340323, - "grad_norm": 7.004491665347137, - "learning_rate": 1.0587140183299707e-05, - "loss": 0.8185, + "epoch": 1.0038124823496188, + "grad_norm": 3.685278482667479, + "learning_rate": 1.5516346812614487e-05, + "loss": 0.7401, "step": 7109 }, { - "epoch": 1.4908785908995597, - "grad_norm": 5.926748577927218, - "learning_rate": 1.0584880189231456e-05, - "loss": 0.5383, + "epoch": 1.0039536853996047, + "grad_norm": 4.614679323716039, + "learning_rate": 1.5515075223904745e-05, + "loss": 0.8113, "step": 7110 }, { - "epoch": 1.491088278465087, - "grad_norm": 8.009512627556292, - "learning_rate": 1.0582620165187084e-05, - "loss": 0.7928, + "epoch": 1.0040948884495906, + "grad_norm": 3.2323602390210886, + "learning_rate": 1.551380350702704e-05, + "loss": 0.5941, "step": 7111 }, { - "epoch": 1.4912979660306145, - "grad_norm": 8.319617757536445, - "learning_rate": 1.0580360111282413e-05, - "loss": 0.817, + "epoch": 1.0042360914995765, + "grad_norm": 3.379148909601683, + "learning_rate": 1.5512531662010918e-05, + "loss": 0.752, "step": 7112 }, { - "epoch": 1.4915076535961418, - "grad_norm": 6.521398451776895, - "learning_rate": 1.0578100027633282e-05, - "loss": 0.8551, + "epoch": 1.0043772945495624, + "grad_norm": 2.870867398825745, + "learning_rate": 1.5511259688885943e-05, + "loss": 0.5199, "step": 7113 }, { - "epoch": 1.491717341161669, - "grad_norm": 6.501170337580323, - "learning_rate": 1.0575839914355524e-05, - "loss": 0.7031, + "epoch": 1.0045184975995483, + "grad_norm": 3.3080872652837097, + "learning_rate": 1.550998758768167e-05, + "loss": 0.6551, "step": 7114 }, { - "epoch": 1.4919270287271964, - "grad_norm": 7.70242112258516, - "learning_rate": 1.057357977156497e-05, - "loss": 0.849, + "epoch": 1.0046597006495341, + "grad_norm": 3.425594084549221, + "learning_rate": 1.5508715358427667e-05, + "loss": 0.7151, "step": 7115 }, { - "epoch": 1.4921367162927237, - "grad_norm": 8.273358334747023, - "learning_rate": 1.057131959937746e-05, - "loss": 0.8167, + "epoch": 1.00480090369952, + "grad_norm": 3.544107577569558, + "learning_rate": 1.5507443001153497e-05, + "loss": 0.6064, "step": 7116 }, { - "epoch": 1.4923464038582512, - "grad_norm": 6.55221399949956, - "learning_rate": 1.0569059397908828e-05, - "loss": 0.8744, + "epoch": 1.0049421067495057, + "grad_norm": 3.035923927722001, + "learning_rate": 1.5506170515888732e-05, + "loss": 0.5797, "step": 7117 }, { - "epoch": 1.4925560914237785, - "grad_norm": 6.602488510373889, - "learning_rate": 1.0566799167274913e-05, - "loss": 0.8125, + "epoch": 1.0050833097994916, + "grad_norm": 2.753522473876703, + "learning_rate": 1.550489790266294e-05, + "loss": 0.4597, "step": 7118 }, { - "epoch": 1.4927657789893058, - "grad_norm": 7.278827156180899, - "learning_rate": 1.056453890759156e-05, - "loss": 1.0564, + "epoch": 1.0052245128494774, + "grad_norm": 2.933612648461088, + "learning_rate": 1.5503625161505703e-05, + "loss": 0.5458, "step": 7119 }, { - "epoch": 1.4929754665548334, - "grad_norm": 6.533391902459943, - "learning_rate": 1.0562278618974607e-05, - "loss": 0.7626, + "epoch": 1.0053657158994633, + "grad_norm": 3.595955419597733, + "learning_rate": 1.550235229244659e-05, + "loss": 0.5885, "step": 7120 }, { - "epoch": 1.4931851541203607, - "grad_norm": 7.360576633789399, - "learning_rate": 1.0560018301539902e-05, - "loss": 0.8486, + "epoch": 1.0055069189494492, + "grad_norm": 2.9356881680492677, + "learning_rate": 1.5501079295515188e-05, + "loss": 0.4813, "step": 7121 }, { - "epoch": 1.493394841685888, - "grad_norm": 6.063777729639732, - "learning_rate": 1.0557757955403283e-05, - "loss": 0.629, + "epoch": 1.005648121999435, + "grad_norm": 2.678086026101822, + "learning_rate": 1.5499806170741073e-05, + "loss": 0.4927, "step": 7122 }, { - "epoch": 1.4936045292514155, - "grad_norm": 7.860633878440131, - "learning_rate": 1.0555497580680607e-05, - "loss": 0.9613, + "epoch": 1.005789325049421, + "grad_norm": 4.020260113678444, + "learning_rate": 1.5498532918153847e-05, + "loss": 0.7213, "step": 7123 }, { - "epoch": 1.4938142168169428, - "grad_norm": 6.607117353261462, - "learning_rate": 1.0553237177487714e-05, - "loss": 0.5875, + "epoch": 1.0059305280994069, + "grad_norm": 3.259703376243231, + "learning_rate": 1.5497259537783084e-05, + "loss": 0.5475, "step": 7124 }, { - "epoch": 1.4940239043824701, - "grad_norm": 6.18348446597019, - "learning_rate": 1.0550976745940455e-05, - "loss": 0.6631, + "epoch": 1.0060717311493927, + "grad_norm": 4.10785281086478, + "learning_rate": 1.5495986029658385e-05, + "loss": 0.7659, "step": 7125 }, { - "epoch": 1.4942335919479974, - "grad_norm": 6.493629319562998, - "learning_rate": 1.0548716286154685e-05, - "loss": 0.7772, + "epoch": 1.0062129341993786, + "grad_norm": 3.661764437442794, + "learning_rate": 1.5494712393809343e-05, + "loss": 0.7327, "step": 7126 }, { - "epoch": 1.4944432795135247, - "grad_norm": 7.247846480353934, - "learning_rate": 1.0546455798246253e-05, - "loss": 0.9058, + "epoch": 1.0063541372493645, + "grad_norm": 3.799365787502453, + "learning_rate": 1.549343863026556e-05, + "loss": 0.6346, "step": 7127 }, { - "epoch": 1.4946529670790523, - "grad_norm": 6.462098426719643, - "learning_rate": 1.054419528233101e-05, - "loss": 0.757, + "epoch": 1.0064953402993504, + "grad_norm": 3.1384282819025056, + "learning_rate": 1.5492164739056635e-05, + "loss": 0.5589, "step": 7128 }, { - "epoch": 1.4948626546445796, - "grad_norm": 7.536226537452106, - "learning_rate": 1.054193473852482e-05, - "loss": 0.753, + "epoch": 1.0066365433493363, + "grad_norm": 3.3999813184410006, + "learning_rate": 1.5490890720212176e-05, + "loss": 0.6209, "step": 7129 }, { - "epoch": 1.4950723422101069, - "grad_norm": 8.903094909867505, - "learning_rate": 1.0539674166943532e-05, - "loss": 1.1463, + "epoch": 1.0067777463993222, + "grad_norm": 3.084468791538457, + "learning_rate": 1.5489616573761784e-05, + "loss": 0.5641, "step": 7130 }, { - "epoch": 1.4952820297756344, - "grad_norm": 7.449940098681308, - "learning_rate": 1.0537413567703008e-05, - "loss": 0.8703, + "epoch": 1.006918949449308, + "grad_norm": 3.5695182673157837, + "learning_rate": 1.5488342299735077e-05, + "loss": 0.6313, "step": 7131 }, { - "epoch": 1.4954917173411617, - "grad_norm": 6.3937215255975355, - "learning_rate": 1.0535152940919105e-05, - "loss": 0.7058, + "epoch": 1.007060152499294, + "grad_norm": 3.2729248703124534, + "learning_rate": 1.548706789816166e-05, + "loss": 0.6199, "step": 7132 }, { - "epoch": 1.495701404906689, - "grad_norm": 6.5330801012804045, - "learning_rate": 1.0532892286707688e-05, - "loss": 0.6088, + "epoch": 1.0072013555492798, + "grad_norm": 4.467379926360003, + "learning_rate": 1.548579336907116e-05, + "loss": 0.7252, "step": 7133 }, { - "epoch": 1.4959110924722163, - "grad_norm": 6.741138782388606, - "learning_rate": 1.0530631605184614e-05, - "loss": 0.703, + "epoch": 1.0073425585992657, + "grad_norm": 3.214694857152753, + "learning_rate": 1.5484518712493188e-05, + "loss": 0.5526, "step": 7134 }, { - "epoch": 1.4961207800377436, - "grad_norm": 7.384332731443193, - "learning_rate": 1.0528370896465751e-05, - "loss": 0.6708, + "epoch": 1.0074837616492516, + "grad_norm": 4.136183980846995, + "learning_rate": 1.548324392845737e-05, + "loss": 0.7773, "step": 7135 }, { - "epoch": 1.4963304676032712, - "grad_norm": 6.023507675827106, - "learning_rate": 1.0526110160666964e-05, - "loss": 0.6056, + "epoch": 1.0076249646992375, + "grad_norm": 2.89759458905711, + "learning_rate": 1.5481969016993335e-05, + "loss": 0.5302, "step": 7136 }, { - "epoch": 1.4965401551687985, - "grad_norm": 7.97545497278103, - "learning_rate": 1.0523849397904118e-05, - "loss": 0.7755, + "epoch": 1.0077661677492233, + "grad_norm": 3.217114383951742, + "learning_rate": 1.5480693978130706e-05, + "loss": 0.6863, "step": 7137 }, { - "epoch": 1.4967498427343258, - "grad_norm": 6.725913918968319, - "learning_rate": 1.0521588608293082e-05, - "loss": 0.6248, + "epoch": 1.0079073707992092, + "grad_norm": 3.0974145177204973, + "learning_rate": 1.547941881189911e-05, + "loss": 0.5675, "step": 7138 }, { - "epoch": 1.4969595302998533, - "grad_norm": 7.299922568348619, - "learning_rate": 1.0519327791949725e-05, - "loss": 0.9233, + "epoch": 1.008048573849195, + "grad_norm": 3.2087558642186074, + "learning_rate": 1.5478143518328193e-05, + "loss": 0.5871, "step": 7139 }, { - "epoch": 1.4971692178653806, - "grad_norm": 7.613481060741226, - "learning_rate": 1.0517066948989919e-05, - "loss": 0.7219, + "epoch": 1.008189776899181, + "grad_norm": 3.287248272842225, + "learning_rate": 1.5476868097447586e-05, + "loss": 0.6136, "step": 7140 }, { - "epoch": 1.497378905430908, - "grad_norm": 7.706368618701546, - "learning_rate": 1.0514806079529534e-05, - "loss": 0.7726, + "epoch": 1.0083309799491669, + "grad_norm": 3.259776838327227, + "learning_rate": 1.547559254928693e-05, + "loss": 0.5697, "step": 7141 }, { - "epoch": 1.4975885929964354, - "grad_norm": 7.132751188557857, - "learning_rate": 1.0512545183684444e-05, - "loss": 0.8031, + "epoch": 1.0084721829991528, + "grad_norm": 4.102370229241689, + "learning_rate": 1.5474316873875864e-05, + "loss": 0.7838, "step": 7142 }, { - "epoch": 1.4977982805619627, - "grad_norm": 9.094150284104597, - "learning_rate": 1.0510284261570525e-05, - "loss": 1.0268, + "epoch": 1.0086133860491386, + "grad_norm": 2.8569125522854226, + "learning_rate": 1.547304107124404e-05, + "loss": 0.4813, "step": 7143 }, { - "epoch": 1.49800796812749, - "grad_norm": 5.861597995112914, - "learning_rate": 1.0508023313303655e-05, - "loss": 0.5802, + "epoch": 1.0087545890991245, + "grad_norm": 2.5652592701540424, + "learning_rate": 1.547176514142111e-05, + "loss": 0.4166, "step": 7144 }, { - "epoch": 1.4982176556930173, - "grad_norm": 4.892347243375909, - "learning_rate": 1.0505762338999704e-05, - "loss": 0.4412, + "epoch": 1.0088957921491104, + "grad_norm": 3.462431804523475, + "learning_rate": 1.5470489084436717e-05, + "loss": 0.5403, "step": 7145 }, { - "epoch": 1.4984273432585447, - "grad_norm": 6.860161570568778, - "learning_rate": 1.050350133877456e-05, - "loss": 0.8146, + "epoch": 1.0090369951990963, + "grad_norm": 2.9980376814615535, + "learning_rate": 1.546921290032052e-05, + "loss": 0.5405, "step": 7146 }, { - "epoch": 1.4986370308240722, - "grad_norm": 5.771369608917613, - "learning_rate": 1.0501240312744098e-05, - "loss": 0.6055, + "epoch": 1.0091781982490822, + "grad_norm": 2.7360981449880786, + "learning_rate": 1.546793658910218e-05, + "loss": 0.481, "step": 7147 }, { - "epoch": 1.4988467183895995, - "grad_norm": 5.493139122461287, - "learning_rate": 1.04989792610242e-05, + "epoch": 1.009319401299068, + "grad_norm": 3.5193793960579236, + "learning_rate": 1.546666015081135e-05, "loss": 0.5286, "step": 7148 }, { - "epoch": 1.4990564059551268, - "grad_norm": 7.373294583637393, - "learning_rate": 1.049671818373075e-05, - "loss": 0.767, + "epoch": 1.009460604349054, + "grad_norm": 2.9696808594793103, + "learning_rate": 1.54653835854777e-05, + "loss": 0.5117, "step": 7149 }, { - "epoch": 1.4992660935206543, - "grad_norm": 6.122362195600923, - "learning_rate": 1.0494457080979633e-05, - "loss": 0.6081, + "epoch": 1.0096018073990398, + "grad_norm": 3.4997153037921587, + "learning_rate": 1.5464106893130896e-05, + "loss": 0.5732, "step": 7150 }, { - "epoch": 1.4994757810861816, - "grad_norm": 7.762090281486593, - "learning_rate": 1.0492195952886728e-05, - "loss": 0.762, + "epoch": 1.0097430104490257, + "grad_norm": 3.341477600524824, + "learning_rate": 1.546283007380061e-05, + "loss": 0.6164, "step": 7151 }, { - "epoch": 1.499685468651709, - "grad_norm": 5.704882530260195, - "learning_rate": 1.0489934799567934e-05, - "loss": 0.5562, + "epoch": 1.0098842134990116, + "grad_norm": 3.76128168957887, + "learning_rate": 1.546155312751651e-05, + "loss": 0.7862, "step": 7152 }, { - "epoch": 1.4998951562172362, - "grad_norm": 7.216254968003204, - "learning_rate": 1.0487673621139127e-05, - "loss": 0.8937, + "epoch": 1.0100254165489975, + "grad_norm": 3.1644100175201846, + "learning_rate": 1.5460276054308276e-05, + "loss": 0.4801, "step": 7153 }, { - "epoch": 1.5001048437827635, - "grad_norm": 8.018995218808998, - "learning_rate": 1.0485412417716201e-05, - "loss": 0.898, + "epoch": 1.0101666195989834, + "grad_norm": 3.9111152938157567, + "learning_rate": 1.5458998854205585e-05, + "loss": 0.5954, "step": 7154 }, { - "epoch": 1.500314531348291, - "grad_norm": 7.535309299083411, - "learning_rate": 1.0483151189415048e-05, - "loss": 0.823, + "epoch": 1.0103078226489692, + "grad_norm": 3.524662838161592, + "learning_rate": 1.5457721527238118e-05, + "loss": 0.632, "step": 7155 }, { - "epoch": 1.5005242189138184, - "grad_norm": 6.6389659882095815, - "learning_rate": 1.048088993635156e-05, - "loss": 0.7675, + "epoch": 1.0104490256989551, + "grad_norm": 3.536214003037081, + "learning_rate": 1.5456444073435558e-05, + "loss": 0.6758, "step": 7156 }, { - "epoch": 1.5007339064793457, - "grad_norm": 6.137192533774715, - "learning_rate": 1.0478628658641625e-05, - "loss": 0.5862, + "epoch": 1.010590228748941, + "grad_norm": 2.7940536998714016, + "learning_rate": 1.5455166492827595e-05, + "loss": 0.4743, "step": 7157 }, { - "epoch": 1.5009435940448732, - "grad_norm": 9.724634951212895, - "learning_rate": 1.0476367356401145e-05, - "loss": 0.976, + "epoch": 1.0107314317989269, + "grad_norm": 4.292083860781415, + "learning_rate": 1.5453888785443916e-05, + "loss": 0.6942, "step": 7158 }, { - "epoch": 1.5011532816104005, - "grad_norm": 7.553107397457866, - "learning_rate": 1.047410602974601e-05, - "loss": 0.899, + "epoch": 1.0108726348489128, + "grad_norm": 3.2850495285187518, + "learning_rate": 1.545261095131422e-05, + "loss": 0.5796, "step": 7159 }, { - "epoch": 1.5013629691759278, - "grad_norm": 6.496685621029923, - "learning_rate": 1.0471844678792118e-05, - "loss": 0.635, + "epoch": 1.0110138378988986, + "grad_norm": 2.8636545546911387, + "learning_rate": 1.5451332990468202e-05, + "loss": 0.5915, "step": 7160 }, { - "epoch": 1.5015726567414553, - "grad_norm": 8.60126898721266, - "learning_rate": 1.0469583303655368e-05, - "loss": 0.9175, + "epoch": 1.0111550409488845, + "grad_norm": 2.9936729852413277, + "learning_rate": 1.5450054902935557e-05, + "loss": 0.4969, "step": 7161 }, { - "epoch": 1.5017823443069824, - "grad_norm": 6.102222299496603, - "learning_rate": 1.0467321904451659e-05, - "loss": 0.6187, + "epoch": 1.0112962439988704, + "grad_norm": 3.2457835319418686, + "learning_rate": 1.544877668874599e-05, + "loss": 0.5392, "step": 7162 }, { - "epoch": 1.50199203187251, - "grad_norm": 7.262707474690552, - "learning_rate": 1.0465060481296892e-05, - "loss": 0.8693, + "epoch": 1.0114374470488563, + "grad_norm": 3.4225143144801433, + "learning_rate": 1.5447498347929207e-05, + "loss": 0.5603, "step": 7163 }, { - "epoch": 1.5022017194380375, - "grad_norm": 7.534613266914976, - "learning_rate": 1.0462799034306972e-05, - "loss": 0.9057, + "epoch": 1.0115786500988422, + "grad_norm": 4.159376686751067, + "learning_rate": 1.5446219880514913e-05, + "loss": 0.6187, "step": 7164 }, { - "epoch": 1.5024114070035646, - "grad_norm": 7.388669203299701, - "learning_rate": 1.0460537563597795e-05, - "loss": 0.7615, + "epoch": 1.011719853148828, + "grad_norm": 2.858373468245726, + "learning_rate": 1.5444941286532822e-05, + "loss": 0.5637, "step": 7165 }, { - "epoch": 1.502621094569092, - "grad_norm": 6.353890458374306, - "learning_rate": 1.045827606928527e-05, - "loss": 0.7831, + "epoch": 1.011861056198814, + "grad_norm": 3.4141400901650454, + "learning_rate": 1.5443662566012645e-05, + "loss": 0.6489, "step": 7166 }, { - "epoch": 1.5028307821346194, - "grad_norm": 5.788662861917991, - "learning_rate": 1.0456014551485301e-05, - "loss": 0.5901, + "epoch": 1.0120022592487998, + "grad_norm": 3.117809639145722, + "learning_rate": 1.5442383718984103e-05, + "loss": 0.5875, "step": 7167 }, { - "epoch": 1.5030404697001467, - "grad_norm": 6.766691258491545, - "learning_rate": 1.0453753010313796e-05, - "loss": 0.7368, + "epoch": 1.0121434622987857, + "grad_norm": 3.198548540607094, + "learning_rate": 1.5441104745476913e-05, + "loss": 0.6765, "step": 7168 }, { - "epoch": 1.5032501572656742, - "grad_norm": 7.317303649848834, - "learning_rate": 1.0451491445886661e-05, - "loss": 0.6686, + "epoch": 1.0122846653487716, + "grad_norm": 3.9195734291345077, + "learning_rate": 1.54398256455208e-05, + "loss": 0.6523, "step": 7169 }, { - "epoch": 1.5034598448312015, - "grad_norm": 7.476714891516945, - "learning_rate": 1.0449229858319808e-05, - "loss": 0.6706, + "epoch": 1.0124258683987575, + "grad_norm": 4.480506027388131, + "learning_rate": 1.543854641914549e-05, + "loss": 0.8599, "step": 7170 }, { - "epoch": 1.5036695323967288, - "grad_norm": 6.157120425574784, - "learning_rate": 1.0446968247729145e-05, - "loss": 0.7993, + "epoch": 1.0125670714487434, + "grad_norm": 3.188886217596178, + "learning_rate": 1.5437267066380707e-05, + "loss": 0.5689, "step": 7171 }, { - "epoch": 1.5038792199622564, - "grad_norm": 7.5198079174717085, - "learning_rate": 1.0444706614230585e-05, - "loss": 1.0423, + "epoch": 1.0127082744987292, + "grad_norm": 4.089190761618454, + "learning_rate": 1.5435987587256183e-05, + "loss": 0.6878, "step": 7172 }, { - "epoch": 1.5040889075277835, - "grad_norm": 8.825016520129456, - "learning_rate": 1.0442444957940042e-05, - "loss": 0.9199, + "epoch": 1.0128494775487151, + "grad_norm": 2.63684320387964, + "learning_rate": 1.5434707981801658e-05, + "loss": 0.473, "step": 7173 }, { - "epoch": 1.504298595093311, - "grad_norm": 6.610466576115415, - "learning_rate": 1.0440183278973419e-05, - "loss": 0.6741, + "epoch": 1.012990680598701, + "grad_norm": 4.241732185816439, + "learning_rate": 1.543342825004687e-05, + "loss": 0.55, "step": 7174 }, { - "epoch": 1.5045082826588383, - "grad_norm": 7.212215351430875, - "learning_rate": 1.0437921577446648e-05, - "loss": 0.8964, + "epoch": 1.013131883648687, + "grad_norm": 4.152430106655621, + "learning_rate": 1.543214839202155e-05, + "loss": 0.8218, "step": 7175 }, { - "epoch": 1.5047179702243656, - "grad_norm": 7.11337712268857, - "learning_rate": 1.0435659853475633e-05, - "loss": 0.7989, + "epoch": 1.0132730866986728, + "grad_norm": 4.078382509198113, + "learning_rate": 1.543086840775545e-05, + "loss": 0.6177, "step": 7176 }, { - "epoch": 1.5049276577898931, - "grad_norm": 8.084673998427256, - "learning_rate": 1.0433398107176295e-05, - "loss": 0.8495, + "epoch": 1.0134142897486587, + "grad_norm": 4.1965713488735625, + "learning_rate": 1.5429588297278315e-05, + "loss": 0.7043, "step": 7177 }, { - "epoch": 1.5051373453554204, - "grad_norm": 6.262565861287114, - "learning_rate": 1.0431136338664554e-05, - "loss": 0.7085, + "epoch": 1.0135554927986445, + "grad_norm": 4.154017591111017, + "learning_rate": 1.5428308060619893e-05, + "loss": 0.6683, "step": 7178 }, { - "epoch": 1.5053470329209477, - "grad_norm": 7.353710557246072, - "learning_rate": 1.0428874548056327e-05, - "loss": 0.7513, + "epoch": 1.0136966958486304, + "grad_norm": 2.835390021926995, + "learning_rate": 1.542702769780993e-05, + "loss": 0.5195, "step": 7179 }, { - "epoch": 1.5055567204864753, - "grad_norm": 7.158545080632724, - "learning_rate": 1.0426612735467535e-05, - "loss": 0.7538, + "epoch": 1.0138378988986163, + "grad_norm": 2.7114279801533607, + "learning_rate": 1.5425747208878195e-05, + "loss": 0.6356, "step": 7180 }, { - "epoch": 1.5057664080520026, - "grad_norm": 6.908563202222414, - "learning_rate": 1.0424350901014102e-05, - "loss": 0.6736, + "epoch": 1.0139791019486022, + "grad_norm": 2.9185334066299573, + "learning_rate": 1.542446659385443e-05, + "loss": 0.5524, "step": 7181 }, { - "epoch": 1.5059760956175299, - "grad_norm": 7.466766633010677, - "learning_rate": 1.0422089044811947e-05, - "loss": 0.7598, + "epoch": 1.014120304998588, + "grad_norm": 2.9038994343500852, + "learning_rate": 1.542318585276841e-05, + "loss": 0.5155, "step": 7182 }, { - "epoch": 1.5061857831830574, - "grad_norm": 7.558929816372385, - "learning_rate": 1.0419827166976996e-05, - "loss": 0.8435, + "epoch": 1.014261508048574, + "grad_norm": 2.8204767960182187, + "learning_rate": 1.5421904985649892e-05, + "loss": 0.5502, "step": 7183 }, { - "epoch": 1.5063954707485845, - "grad_norm": 10.863052746407542, - "learning_rate": 1.0417565267625177e-05, - "loss": 1.2898, + "epoch": 1.0144027110985596, + "grad_norm": 3.4982689180468682, + "learning_rate": 1.5420623992528643e-05, + "loss": 0.6886, "step": 7184 }, { - "epoch": 1.506605158314112, - "grad_norm": 6.529234545538859, - "learning_rate": 1.0415303346872414e-05, - "loss": 0.7826, + "epoch": 1.0145439141485455, + "grad_norm": 2.913587096173661, + "learning_rate": 1.5419342873434434e-05, + "loss": 0.5358, "step": 7185 }, { - "epoch": 1.5068148458796393, - "grad_norm": 6.765601062704781, - "learning_rate": 1.0413041404834632e-05, - "loss": 0.6716, + "epoch": 1.0146851171985314, + "grad_norm": 2.837906748701576, + "learning_rate": 1.5418061628397037e-05, + "loss": 0.4769, "step": 7186 }, { - "epoch": 1.5070245334451666, - "grad_norm": 7.202051853890094, - "learning_rate": 1.0410779441627762e-05, - "loss": 0.7864, + "epoch": 1.0148263202485173, + "grad_norm": 3.2292989920337933, + "learning_rate": 1.5416780257446228e-05, + "loss": 0.5166, "step": 7187 }, { - "epoch": 1.5072342210106942, - "grad_norm": 6.3788151048314505, - "learning_rate": 1.0408517457367736e-05, - "loss": 0.7534, + "epoch": 1.0149675232985031, + "grad_norm": 3.257126757116606, + "learning_rate": 1.541549876061178e-05, + "loss": 0.7386, "step": 7188 }, { - "epoch": 1.5074439085762215, - "grad_norm": 8.477930517598896, - "learning_rate": 1.0406255452170478e-05, - "loss": 0.9195, + "epoch": 1.015108726348489, + "grad_norm": 3.7739124521570604, + "learning_rate": 1.5414217137923485e-05, + "loss": 0.6164, "step": 7189 }, { - "epoch": 1.5076535961417488, - "grad_norm": 6.317121233635021, - "learning_rate": 1.0403993426151923e-05, - "loss": 0.579, + "epoch": 1.015249929398475, + "grad_norm": 3.324963879021115, + "learning_rate": 1.5412935389411124e-05, + "loss": 0.54, "step": 7190 }, { - "epoch": 1.5078632837072763, - "grad_norm": 7.333313896323223, - "learning_rate": 1.0401731379428006e-05, - "loss": 0.7964, + "epoch": 1.0153911324484608, + "grad_norm": 3.1652886655740944, + "learning_rate": 1.5411653515104478e-05, + "loss": 0.6177, "step": 7191 }, { - "epoch": 1.5080729712728034, - "grad_norm": 7.065798215079973, - "learning_rate": 1.0399469312114661e-05, - "loss": 0.792, + "epoch": 1.0155323354984467, + "grad_norm": 3.2735112766824805, + "learning_rate": 1.5410371515033343e-05, + "loss": 0.5501, "step": 7192 }, { - "epoch": 1.508282658838331, - "grad_norm": 7.497041454262129, - "learning_rate": 1.039720722432782e-05, - "loss": 0.9291, + "epoch": 1.0156735385484326, + "grad_norm": 3.2336830257198943, + "learning_rate": 1.540908938922751e-05, + "loss": 0.7034, "step": 7193 }, { - "epoch": 1.5084923464038582, - "grad_norm": 5.177489335698658, - "learning_rate": 1.0394945116183419e-05, - "loss": 0.5616, + "epoch": 1.0158147415984184, + "grad_norm": 4.125005116817651, + "learning_rate": 1.5407807137716774e-05, + "loss": 0.74, "step": 7194 }, { - "epoch": 1.5087020339693855, - "grad_norm": 6.919790561306532, - "learning_rate": 1.0392682987797396e-05, - "loss": 0.8215, + "epoch": 1.0159559446484043, + "grad_norm": 3.2621598285542133, + "learning_rate": 1.540652476053094e-05, + "loss": 0.5576, "step": 7195 }, { - "epoch": 1.508911721534913, - "grad_norm": 7.1870891246285415, - "learning_rate": 1.0390420839285687e-05, - "loss": 0.944, + "epoch": 1.0160971476983902, + "grad_norm": 3.270942217060861, + "learning_rate": 1.54052422576998e-05, + "loss": 0.6965, "step": 7196 }, { - "epoch": 1.5091214091004403, - "grad_norm": 7.943277737057516, - "learning_rate": 1.0388158670764234e-05, - "loss": 0.7801, + "epoch": 1.016238350748376, + "grad_norm": 3.906316014897361, + "learning_rate": 1.5403959629253168e-05, + "loss": 0.6746, "step": 7197 }, { - "epoch": 1.5093310966659677, - "grad_norm": 6.202826949647392, - "learning_rate": 1.0385896482348976e-05, - "loss": 0.6971, + "epoch": 1.016379553798362, + "grad_norm": 3.1450405430365076, + "learning_rate": 1.5402676875220847e-05, + "loss": 0.6263, "step": 7198 }, { - "epoch": 1.5095407842314952, - "grad_norm": 8.699193719331987, - "learning_rate": 1.0383634274155854e-05, - "loss": 1.0111, + "epoch": 1.0165207568483479, + "grad_norm": 3.4371348945556703, + "learning_rate": 1.540139399563265e-05, + "loss": 0.5828, "step": 7199 }, { - "epoch": 1.5097504717970225, - "grad_norm": 7.73420748794142, - "learning_rate": 1.0381372046300813e-05, - "loss": 0.8511, + "epoch": 1.0166619598983337, + "grad_norm": 3.380551872934823, + "learning_rate": 1.5400110990518386e-05, + "loss": 0.6005, "step": 7200 }, { - "epoch": 1.5099601593625498, - "grad_norm": 6.6104197555678645, - "learning_rate": 1.0379109798899787e-05, - "loss": 0.8213, + "epoch": 1.0168031629483196, + "grad_norm": 4.59773651229296, + "learning_rate": 1.5398827859907878e-05, + "loss": 0.6946, "step": 7201 }, { - "epoch": 1.5101698469280773, - "grad_norm": 5.810383249912145, - "learning_rate": 1.0376847532068732e-05, - "loss": 0.8499, + "epoch": 1.0169443659983055, + "grad_norm": 3.22909432774919, + "learning_rate": 1.539754460383094e-05, + "loss": 0.5719, "step": 7202 }, { - "epoch": 1.5103795344936044, - "grad_norm": 7.077879558422452, - "learning_rate": 1.0374585245923583e-05, - "loss": 0.6363, + "epoch": 1.0170855690482914, + "grad_norm": 3.273546621398694, + "learning_rate": 1.5396261222317397e-05, + "loss": 0.5007, "step": 7203 }, { - "epoch": 1.510589222059132, - "grad_norm": 6.0736232434991155, - "learning_rate": 1.0372322940580295e-05, - "loss": 0.6029, + "epoch": 1.0172267720982773, + "grad_norm": 2.986950310765524, + "learning_rate": 1.5394977715397073e-05, + "loss": 0.4956, "step": 7204 }, { - "epoch": 1.5107989096246592, - "grad_norm": 7.686288769454458, - "learning_rate": 1.0370060616154807e-05, - "loss": 0.6943, + "epoch": 1.0173679751482632, + "grad_norm": 3.371032263679071, + "learning_rate": 1.53936940830998e-05, + "loss": 0.6042, "step": 7205 }, { - "epoch": 1.5110085971901865, - "grad_norm": 6.058421053168277, - "learning_rate": 1.0367798272763071e-05, - "loss": 0.693, + "epoch": 1.017509178198249, + "grad_norm": 3.279595037467894, + "learning_rate": 1.5392410325455397e-05, + "loss": 0.4924, "step": 7206 }, { - "epoch": 1.511218284755714, - "grad_norm": 6.41309111946237, - "learning_rate": 1.0365535910521036e-05, - "loss": 0.6205, + "epoch": 1.017650381248235, + "grad_norm": 3.1948154854260027, + "learning_rate": 1.5391126442493715e-05, + "loss": 0.5885, "step": 7207 }, { - "epoch": 1.5114279723212414, - "grad_norm": 6.758311557258199, - "learning_rate": 1.0363273529544655e-05, - "loss": 0.7815, + "epoch": 1.0177915842982208, + "grad_norm": 4.448682306313843, + "learning_rate": 1.538984243424458e-05, + "loss": 0.9346, "step": 7208 }, { - "epoch": 1.5116376598867687, - "grad_norm": 4.823608543630581, - "learning_rate": 1.036101112994987e-05, - "loss": 0.4516, + "epoch": 1.0179327873482067, + "grad_norm": 3.406259540168245, + "learning_rate": 1.538855830073784e-05, + "loss": 0.6966, "step": 7209 }, { - "epoch": 1.5118473474522962, - "grad_norm": 6.382616307968191, - "learning_rate": 1.0358748711852643e-05, - "loss": 0.4889, + "epoch": 1.0180739903981926, + "grad_norm": 3.567194703612795, + "learning_rate": 1.5387274042003327e-05, + "loss": 0.5877, "step": 7210 }, { - "epoch": 1.5120570350178233, - "grad_norm": 8.05645790725201, - "learning_rate": 1.035648627536892e-05, - "loss": 0.8713, + "epoch": 1.0182151934481785, + "grad_norm": 3.5516149059926363, + "learning_rate": 1.538598965807089e-05, + "loss": 0.5406, "step": 7211 }, { - "epoch": 1.5122667225833508, - "grad_norm": 7.039288261309405, - "learning_rate": 1.0354223820614655e-05, - "loss": 0.6426, + "epoch": 1.0183563964981643, + "grad_norm": 4.206829421788801, + "learning_rate": 1.5384705148970384e-05, + "loss": 0.8361, "step": 7212 }, { - "epoch": 1.5124764101488781, - "grad_norm": 6.969950464849892, - "learning_rate": 1.0351961347705809e-05, - "loss": 0.6918, + "epoch": 1.0184975995481502, + "grad_norm": 3.571647875827525, + "learning_rate": 1.5383420514731653e-05, + "loss": 0.6364, "step": 7213 }, { - "epoch": 1.5126860977144054, - "grad_norm": 6.863030918953516, - "learning_rate": 1.0349698856758332e-05, - "loss": 0.9141, + "epoch": 1.018638802598136, + "grad_norm": 3.140850042554657, + "learning_rate": 1.5382135755384554e-05, + "loss": 0.5521, "step": 7214 }, { - "epoch": 1.512895785279933, - "grad_norm": 7.293898322341821, - "learning_rate": 1.034743634788818e-05, - "loss": 0.9232, + "epoch": 1.018780005648122, + "grad_norm": 2.650281639448247, + "learning_rate": 1.5380850870958945e-05, + "loss": 0.505, "step": 7215 }, { - "epoch": 1.5131054728454603, - "grad_norm": 7.76004929003657, - "learning_rate": 1.0345173821211316e-05, - "loss": 1.0377, + "epoch": 1.0189212086981079, + "grad_norm": 4.009481058679087, + "learning_rate": 1.537956586148469e-05, + "loss": 0.6736, "step": 7216 }, { - "epoch": 1.5133151604109876, - "grad_norm": 7.919419896437171, - "learning_rate": 1.0342911276843692e-05, - "loss": 0.9963, + "epoch": 1.0190624117480938, + "grad_norm": 4.978755602432865, + "learning_rate": 1.5378280726991638e-05, + "loss": 0.6229, "step": 7217 }, { - "epoch": 1.513524847976515, - "grad_norm": 7.379983765498563, - "learning_rate": 1.0340648714901271e-05, - "loss": 0.8489, + "epoch": 1.0192036147980796, + "grad_norm": 3.703231219726251, + "learning_rate": 1.5376995467509673e-05, + "loss": 0.6891, "step": 7218 }, { - "epoch": 1.5137345355420424, - "grad_norm": 5.337259017427037, - "learning_rate": 1.0338386135500014e-05, - "loss": 0.5561, + "epoch": 1.0193448178480655, + "grad_norm": 3.174865875069421, + "learning_rate": 1.5375710083068653e-05, + "loss": 0.5142, "step": 7219 }, { - "epoch": 1.5139442231075697, - "grad_norm": 7.34990730614422, - "learning_rate": 1.0336123538755877e-05, - "loss": 0.8141, + "epoch": 1.0194860208980514, + "grad_norm": 3.1950152032541554, + "learning_rate": 1.5374424573698453e-05, + "loss": 0.4462, "step": 7220 }, { - "epoch": 1.5141539106730972, - "grad_norm": 6.871520929411175, - "learning_rate": 1.0333860924784829e-05, - "loss": 0.6368, + "epoch": 1.0196272239480373, + "grad_norm": 3.362487248111589, + "learning_rate": 1.5373138939428945e-05, + "loss": 0.4778, "step": 7221 }, { - "epoch": 1.5143635982386243, - "grad_norm": 5.657223691434179, - "learning_rate": 1.0331598293702829e-05, - "loss": 0.6661, + "epoch": 1.0197684269980232, + "grad_norm": 3.519852340488302, + "learning_rate": 1.537185318029001e-05, + "loss": 0.5585, "step": 7222 }, { - "epoch": 1.5145732858041518, - "grad_norm": 7.410537803410694, - "learning_rate": 1.0329335645625845e-05, - "loss": 0.8287, + "epoch": 1.019909630048009, + "grad_norm": 4.173641963727146, + "learning_rate": 1.5370567296311523e-05, + "loss": 0.7462, "step": 7223 }, { - "epoch": 1.5147829733696792, - "grad_norm": 8.441122876873957, - "learning_rate": 1.032707298066983e-05, - "loss": 0.9426, + "epoch": 1.020050833097995, + "grad_norm": 4.086431851669458, + "learning_rate": 1.5369281287523376e-05, + "loss": 0.6723, "step": 7224 }, { - "epoch": 1.5149926609352065, - "grad_norm": 9.247675299506014, - "learning_rate": 1.0324810298950765e-05, - "loss": 1.0272, + "epoch": 1.0201920361479808, + "grad_norm": 3.7946270213518147, + "learning_rate": 1.5367995153955447e-05, + "loss": 0.5936, "step": 7225 }, { - "epoch": 1.515202348500734, - "grad_norm": 7.794645679477385, - "learning_rate": 1.0322547600584605e-05, - "loss": 0.9103, + "epoch": 1.0203332391979667, + "grad_norm": 4.048652782544324, + "learning_rate": 1.536670889563763e-05, + "loss": 0.7304, "step": 7226 }, { - "epoch": 1.5154120360662613, - "grad_norm": 7.786649540905256, - "learning_rate": 1.032028488568732e-05, - "loss": 0.8813, + "epoch": 1.0204744422479526, + "grad_norm": 3.4333234946165523, + "learning_rate": 1.536542251259982e-05, + "loss": 0.5696, "step": 7227 }, { - "epoch": 1.5156217236317886, - "grad_norm": 6.655706695481443, - "learning_rate": 1.031802215437488e-05, - "loss": 0.7194, + "epoch": 1.0206156452979385, + "grad_norm": 3.2227697470408145, + "learning_rate": 1.5364136004871906e-05, + "loss": 0.5911, "step": 7228 }, { - "epoch": 1.5158314111973161, - "grad_norm": 6.954621659693343, - "learning_rate": 1.0315759406763258e-05, - "loss": 0.7674, + "epoch": 1.0207568483479244, + "grad_norm": 4.563373354093314, + "learning_rate": 1.5362849372483788e-05, + "loss": 0.5358, "step": 7229 }, { - "epoch": 1.5160410987628432, - "grad_norm": 6.984153889403306, - "learning_rate": 1.031349664296841e-05, - "loss": 0.8589, + "epoch": 1.0208980513979102, + "grad_norm": 4.292529823963489, + "learning_rate": 1.5361562615465366e-05, + "loss": 0.7066, "step": 7230 }, { - "epoch": 1.5162507863283707, - "grad_norm": 7.249698906023001, - "learning_rate": 1.0311233863106323e-05, - "loss": 0.6932, + "epoch": 1.0210392544478961, + "grad_norm": 3.274679871027537, + "learning_rate": 1.536027573384654e-05, + "loss": 0.5416, "step": 7231 }, { - "epoch": 1.516460473893898, - "grad_norm": 9.692383956716936, - "learning_rate": 1.0308971067292954e-05, - "loss": 0.899, + "epoch": 1.021180457497882, + "grad_norm": 3.423583157734008, + "learning_rate": 1.5358988727657227e-05, + "loss": 0.6501, "step": 7232 }, { - "epoch": 1.5166701614594253, - "grad_norm": 9.53633659426326, - "learning_rate": 1.0306708255644287e-05, - "loss": 0.9927, + "epoch": 1.0213216605478679, + "grad_norm": 3.4807870684669884, + "learning_rate": 1.535770159692733e-05, + "loss": 0.6156, "step": 7233 }, { - "epoch": 1.5168798490249529, - "grad_norm": 6.123473730677284, - "learning_rate": 1.0304445428276288e-05, - "loss": 0.7277, + "epoch": 1.0214628635978538, + "grad_norm": 3.2780120296300304, + "learning_rate": 1.5356414341686758e-05, + "loss": 0.6189, "step": 7234 }, { - "epoch": 1.5170895365904802, - "grad_norm": 8.102528268053565, - "learning_rate": 1.0302182585304932e-05, - "loss": 0.9082, + "epoch": 1.0216040666478396, + "grad_norm": 4.3423227163829345, + "learning_rate": 1.535512696196543e-05, + "loss": 0.639, "step": 7235 }, { - "epoch": 1.5172992241560075, - "grad_norm": 7.334231919023435, - "learning_rate": 1.0299919726846195e-05, - "loss": 0.6974, + "epoch": 1.0217452696978255, + "grad_norm": 3.6362748793726305, + "learning_rate": 1.535383945779327e-05, + "loss": 0.5801, "step": 7236 }, { - "epoch": 1.517508911721535, - "grad_norm": 7.123208459904657, - "learning_rate": 1.0297656853016055e-05, - "loss": 0.5872, + "epoch": 1.0218864727478114, + "grad_norm": 4.277821769871563, + "learning_rate": 1.5352551829200185e-05, + "loss": 0.7147, "step": 7237 }, { - "epoch": 1.5177185992870623, - "grad_norm": 8.009548849173681, - "learning_rate": 1.0295393963930481e-05, - "loss": 0.9495, + "epoch": 1.0220276757977973, + "grad_norm": 3.587534190319147, + "learning_rate": 1.5351264076216114e-05, + "loss": 0.6588, "step": 7238 }, { - "epoch": 1.5179282868525896, - "grad_norm": 7.284894923337498, - "learning_rate": 1.0293131059705456e-05, - "loss": 0.844, + "epoch": 1.0221688788477832, + "grad_norm": 3.8270448928798455, + "learning_rate": 1.5349976198870974e-05, + "loss": 0.6664, "step": 7239 }, { - "epoch": 1.5181379744181172, - "grad_norm": 7.016237756778589, - "learning_rate": 1.0290868140456956e-05, - "loss": 0.9339, + "epoch": 1.022310081897769, + "grad_norm": 3.728377692383585, + "learning_rate": 1.5348688197194696e-05, + "loss": 0.5461, "step": 7240 }, { - "epoch": 1.5183476619836442, - "grad_norm": 6.28625131915815, - "learning_rate": 1.0288605206300956e-05, - "loss": 0.7448, + "epoch": 1.022451284947755, + "grad_norm": 3.5497920726089176, + "learning_rate": 1.5347400071217217e-05, + "loss": 0.6393, "step": 7241 }, { - "epoch": 1.5185573495491718, - "grad_norm": 6.534574981172653, - "learning_rate": 1.0286342257353441e-05, - "loss": 0.7578, + "epoch": 1.0225924879977408, + "grad_norm": 3.2256845115960955, + "learning_rate": 1.534611182096847e-05, + "loss": 0.5814, "step": 7242 }, { - "epoch": 1.518767037114699, - "grad_norm": 5.611130303800543, - "learning_rate": 1.0284079293730392e-05, - "loss": 0.5366, + "epoch": 1.0227336910477267, + "grad_norm": 3.7531103641381476, + "learning_rate": 1.534482344647839e-05, + "loss": 0.5902, "step": 7243 }, { - "epoch": 1.5189767246802264, - "grad_norm": 4.5642509274877865, - "learning_rate": 1.0281816315547778e-05, - "loss": 0.4823, + "epoch": 1.0228748940977126, + "grad_norm": 4.223739103111618, + "learning_rate": 1.5343534947776924e-05, + "loss": 0.6904, "step": 7244 }, { - "epoch": 1.519186412245754, - "grad_norm": 6.401566719099642, - "learning_rate": 1.0279553322921598e-05, - "loss": 0.6233, + "epoch": 1.0230160971476985, + "grad_norm": 3.225214758808627, + "learning_rate": 1.534224632489401e-05, + "loss": 0.6206, "step": 7245 }, { - "epoch": 1.5193960998112812, - "grad_norm": 7.143747667768428, - "learning_rate": 1.0277290315967821e-05, - "loss": 0.8247, + "epoch": 1.0231573001976844, + "grad_norm": 3.1678050809306115, + "learning_rate": 1.5340957577859605e-05, + "loss": 0.5058, "step": 7246 }, { - "epoch": 1.5196057873768085, - "grad_norm": 8.408142469765536, - "learning_rate": 1.0275027294802436e-05, - "loss": 1.02, + "epoch": 1.0232985032476702, + "grad_norm": 3.058297180136965, + "learning_rate": 1.5339668706703648e-05, + "loss": 0.5962, "step": 7247 }, { - "epoch": 1.519815474942336, - "grad_norm": 6.059746834660585, - "learning_rate": 1.0272764259541423e-05, - "loss": 0.73, + "epoch": 1.0234397062976561, + "grad_norm": 4.836654556181267, + "learning_rate": 1.5338379711456096e-05, + "loss": 0.9196, "step": 7248 }, { - "epoch": 1.5200251625078631, - "grad_norm": 7.778963544199173, - "learning_rate": 1.027050121030077e-05, - "loss": 0.6743, + "epoch": 1.023580909347642, + "grad_norm": 3.9013312387310672, + "learning_rate": 1.5337090592146905e-05, + "loss": 0.7425, "step": 7249 }, { - "epoch": 1.5202348500733907, - "grad_norm": 6.672043312599691, - "learning_rate": 1.0268238147196459e-05, - "loss": 0.7725, + "epoch": 1.023722112397628, + "grad_norm": 3.7705050288015842, + "learning_rate": 1.5335801348806036e-05, + "loss": 0.6462, "step": 7250 }, { - "epoch": 1.520444537638918, - "grad_norm": 7.11011299767315, - "learning_rate": 1.0265975070344478e-05, - "loss": 0.8617, + "epoch": 1.0238633154476138, + "grad_norm": 3.7974687868876504, + "learning_rate": 1.5334511981463446e-05, + "loss": 0.655, "step": 7251 }, { - "epoch": 1.5206542252044453, - "grad_norm": 6.894065383808769, - "learning_rate": 1.0263711979860817e-05, - "loss": 0.7816, + "epoch": 1.0240045184975997, + "grad_norm": 3.7551405416541037, + "learning_rate": 1.53332224901491e-05, + "loss": 0.7087, "step": 7252 }, { - "epoch": 1.5208639127699728, - "grad_norm": 7.042356650676313, - "learning_rate": 1.0261448875861455e-05, - "loss": 0.8459, + "epoch": 1.0241457215475853, + "grad_norm": 3.4978244549280126, + "learning_rate": 1.533193287489297e-05, + "loss": 0.5627, "step": 7253 }, { - "epoch": 1.5210736003355, - "grad_norm": 7.990217081096455, - "learning_rate": 1.0259185758462389e-05, - "loss": 0.8728, + "epoch": 1.0242869245975712, + "grad_norm": 3.3166735054023078, + "learning_rate": 1.5330643135725022e-05, + "loss": 0.58, "step": 7254 }, { - "epoch": 1.5212832879010274, - "grad_norm": 7.909815437368781, - "learning_rate": 1.0256922627779599e-05, - "loss": 0.875, + "epoch": 1.024428127647557, + "grad_norm": 3.193471574925442, + "learning_rate": 1.5329353272675228e-05, + "loss": 0.5478, "step": 7255 }, { - "epoch": 1.521492975466555, - "grad_norm": 7.313489515598973, - "learning_rate": 1.025465948392908e-05, - "loss": 0.8089, + "epoch": 1.024569330697543, + "grad_norm": 2.8375410707470463, + "learning_rate": 1.5328063285773567e-05, + "loss": 0.474, "step": 7256 }, { - "epoch": 1.5217026630320822, - "grad_norm": 6.47980345112776, - "learning_rate": 1.0252396327026823e-05, - "loss": 0.6915, + "epoch": 1.0247105337475289, + "grad_norm": 3.9131124370848362, + "learning_rate": 1.532677317505001e-05, + "loss": 0.6844, "step": 7257 }, { - "epoch": 1.5219123505976095, - "grad_norm": 6.86446174612249, - "learning_rate": 1.0250133157188817e-05, - "loss": 0.7712, + "epoch": 1.0248517367975147, + "grad_norm": 3.585885233528202, + "learning_rate": 1.5325482940534554e-05, + "loss": 0.5834, "step": 7258 }, { - "epoch": 1.522122038163137, - "grad_norm": 7.41267377064116, - "learning_rate": 1.0247869974531048e-05, - "loss": 0.6989, + "epoch": 1.0249929398475006, + "grad_norm": 2.6430576386991027, + "learning_rate": 1.5324192582257173e-05, + "loss": 0.4776, "step": 7259 }, { - "epoch": 1.5223317257286642, - "grad_norm": 6.24550741734712, - "learning_rate": 1.0245606779169518e-05, - "loss": 0.6255, + "epoch": 1.0251341428974865, + "grad_norm": 3.921039976637262, + "learning_rate": 1.532290210024785e-05, + "loss": 0.8692, "step": 7260 }, { - "epoch": 1.5225414132941917, - "grad_norm": 6.0988744283174405, - "learning_rate": 1.0243343571220212e-05, - "loss": 0.6784, + "epoch": 1.0252753459474724, + "grad_norm": 2.9708415446190934, + "learning_rate": 1.5321611494536587e-05, + "loss": 0.5737, "step": 7261 }, { - "epoch": 1.522751100859719, - "grad_norm": 7.322015036871099, - "learning_rate": 1.0241080350799127e-05, - "loss": 0.7716, + "epoch": 1.0254165489974583, + "grad_norm": 5.023971930389562, + "learning_rate": 1.5320320765153367e-05, + "loss": 0.8844, "step": 7262 }, { - "epoch": 1.5229607884252463, - "grad_norm": 5.308205613697077, - "learning_rate": 1.0238817118022255e-05, - "loss": 0.6083, + "epoch": 1.0255577520474441, + "grad_norm": 3.5578084152084255, + "learning_rate": 1.531902991212819e-05, + "loss": 0.7231, "step": 7263 }, { - "epoch": 1.5231704759907738, - "grad_norm": 6.750314424286773, - "learning_rate": 1.0236553873005592e-05, - "loss": 0.8001, + "epoch": 1.02569895509743, + "grad_norm": 3.1660480012546515, + "learning_rate": 1.531773893549106e-05, + "loss": 0.5439, "step": 7264 }, { - "epoch": 1.5233801635563011, - "grad_norm": 6.9471760582639845, - "learning_rate": 1.0234290615865132e-05, - "loss": 0.8286, + "epoch": 1.025840158147416, + "grad_norm": 3.0703355996479784, + "learning_rate": 1.531644783527197e-05, + "loss": 0.5848, "step": 7265 }, { - "epoch": 1.5235898511218284, - "grad_norm": 8.595175343566673, - "learning_rate": 1.0232027346716872e-05, - "loss": 0.7378, + "epoch": 1.0259813611974018, + "grad_norm": 3.295676235539044, + "learning_rate": 1.5315156611500927e-05, + "loss": 0.6646, "step": 7266 }, { - "epoch": 1.523799538687356, - "grad_norm": 6.978178774724283, - "learning_rate": 1.0229764065676808e-05, - "loss": 0.767, + "epoch": 1.0261225642473877, + "grad_norm": 3.136151365208178, + "learning_rate": 1.531386526420794e-05, + "loss": 0.5156, "step": 7267 }, { - "epoch": 1.524009226252883, - "grad_norm": 7.909728023217, - "learning_rate": 1.0227500772860938e-05, - "loss": 0.7439, + "epoch": 1.0262637672973736, + "grad_norm": 3.4156687526260168, + "learning_rate": 1.531257379342302e-05, + "loss": 0.6494, "step": 7268 }, { - "epoch": 1.5242189138184106, - "grad_norm": 6.7516061056682615, - "learning_rate": 1.0225237468385258e-05, - "loss": 0.8037, + "epoch": 1.0264049703473594, + "grad_norm": 3.495032888541256, + "learning_rate": 1.531128219917618e-05, + "loss": 0.7187, "step": 7269 }, { - "epoch": 1.524428601383938, - "grad_norm": 6.825298250966481, - "learning_rate": 1.0222974152365768e-05, - "loss": 0.7308, + "epoch": 1.0265461733973453, + "grad_norm": 3.1638700070585233, + "learning_rate": 1.5309990481497438e-05, + "loss": 0.654, "step": 7270 }, { - "epoch": 1.5246382889494652, - "grad_norm": 5.93066276224902, - "learning_rate": 1.0220710824918465e-05, - "loss": 0.6715, + "epoch": 1.0266873764473312, + "grad_norm": 3.660734305614395, + "learning_rate": 1.5308698640416806e-05, + "loss": 0.8277, "step": 7271 }, { - "epoch": 1.5248479765149927, - "grad_norm": 8.634307663669126, - "learning_rate": 1.0218447486159348e-05, - "loss": 0.8895, + "epoch": 1.026828579497317, + "grad_norm": 3.377396052765143, + "learning_rate": 1.5307406675964315e-05, + "loss": 0.6784, "step": 7272 }, { - "epoch": 1.52505766408052, - "grad_norm": 7.960698578537101, - "learning_rate": 1.021618413620442e-05, - "loss": 0.8666, + "epoch": 1.026969782547303, + "grad_norm": 4.317938885818584, + "learning_rate": 1.530611458816998e-05, + "loss": 0.5993, "step": 7273 }, { - "epoch": 1.5252673516460473, - "grad_norm": 6.555442657152822, - "learning_rate": 1.0213920775169681e-05, - "loss": 0.7264, + "epoch": 1.0271109855972889, + "grad_norm": 2.859528187443678, + "learning_rate": 1.530482237706383e-05, + "loss": 0.5508, "step": 7274 }, { - "epoch": 1.5254770392115748, - "grad_norm": 7.076378488169698, - "learning_rate": 1.0211657403171132e-05, - "loss": 0.722, + "epoch": 1.0272521886472747, + "grad_norm": 3.312074133834732, + "learning_rate": 1.5303530042675907e-05, + "loss": 0.6926, "step": 7275 }, { - "epoch": 1.5256867267771022, - "grad_norm": 7.995447325329171, - "learning_rate": 1.020939402032477e-05, - "loss": 0.9173, + "epoch": 1.0273933916972606, + "grad_norm": 3.2755348681257628, + "learning_rate": 1.5302237585036236e-05, + "loss": 0.6721, "step": 7276 }, { - "epoch": 1.5258964143426295, - "grad_norm": 7.672499086111196, - "learning_rate": 1.0207130626746604e-05, - "loss": 0.7231, + "epoch": 1.0275345947472465, + "grad_norm": 3.1054530640157987, + "learning_rate": 1.530094500417485e-05, + "loss": 0.5919, "step": 7277 }, { - "epoch": 1.526106101908157, - "grad_norm": 6.595953215911844, - "learning_rate": 1.0204867222552632e-05, - "loss": 0.6305, + "epoch": 1.0276757977972324, + "grad_norm": 3.1320750140093367, + "learning_rate": 1.5299652300121792e-05, + "loss": 0.5834, "step": 7278 }, { - "epoch": 1.526315789473684, - "grad_norm": 8.422016427253935, - "learning_rate": 1.0202603807858862e-05, - "loss": 0.8106, + "epoch": 1.0278170008472183, + "grad_norm": 3.1947610046654904, + "learning_rate": 1.5298359472907104e-05, + "loss": 0.5835, "step": 7279 }, { - "epoch": 1.5265254770392116, - "grad_norm": 7.201992731771172, - "learning_rate": 1.0200340382781294e-05, - "loss": 0.7639, + "epoch": 1.0279582038972042, + "grad_norm": 2.6395241884951655, + "learning_rate": 1.529706652256083e-05, + "loss": 0.4841, "step": 7280 }, { - "epoch": 1.526735164604739, - "grad_norm": 7.14176316732443, - "learning_rate": 1.0198076947435938e-05, - "loss": 0.7653, + "epoch": 1.02809940694719, + "grad_norm": 2.925001823496966, + "learning_rate": 1.5295773449113016e-05, + "loss": 0.5507, "step": 7281 }, { - "epoch": 1.5269448521702662, - "grad_norm": 7.898189974244631, - "learning_rate": 1.0195813501938789e-05, - "loss": 0.9381, + "epoch": 1.028240609997176, + "grad_norm": 3.4967412288957114, + "learning_rate": 1.5294480252593718e-05, + "loss": 0.6396, "step": 7282 }, { - "epoch": 1.5271545397357937, - "grad_norm": 6.279219730579641, - "learning_rate": 1.0193550046405862e-05, - "loss": 0.6652, + "epoch": 1.0283818130471618, + "grad_norm": 3.565400184956848, + "learning_rate": 1.529318693303298e-05, + "loss": 0.6525, "step": 7283 }, { - "epoch": 1.527364227301321, - "grad_norm": 6.380413107099824, - "learning_rate": 1.019128658095316e-05, - "loss": 0.6105, + "epoch": 1.0285230160971477, + "grad_norm": 3.209365241740629, + "learning_rate": 1.5291893490460868e-05, + "loss": 0.6448, "step": 7284 }, { - "epoch": 1.5275739148668483, - "grad_norm": 5.750222569963986, - "learning_rate": 1.0189023105696684e-05, - "loss": 0.5727, + "epoch": 1.0286642191471336, + "grad_norm": 3.259156534328898, + "learning_rate": 1.5290599924907435e-05, + "loss": 0.7105, "step": 7285 }, { - "epoch": 1.5277836024323759, - "grad_norm": 6.753279777162709, - "learning_rate": 1.018675962075245e-05, - "loss": 0.7523, + "epoch": 1.0288054221971195, + "grad_norm": 3.0491678078259157, + "learning_rate": 1.5289306236402744e-05, + "loss": 0.5773, "step": 7286 }, { - "epoch": 1.527993289997903, - "grad_norm": 7.759657203738766, - "learning_rate": 1.0184496126236462e-05, - "loss": 0.941, + "epoch": 1.0289466252471053, + "grad_norm": 4.0170831717097935, + "learning_rate": 1.5288012424976863e-05, + "loss": 0.6881, "step": 7287 }, { - "epoch": 1.5282029775634305, - "grad_norm": 4.688936286266487, - "learning_rate": 1.0182232622264725e-05, - "loss": 0.471, + "epoch": 1.0290878282970912, + "grad_norm": 3.573003843544896, + "learning_rate": 1.5286718490659854e-05, + "loss": 0.6259, "step": 7288 }, { - "epoch": 1.528412665128958, - "grad_norm": 7.366677720685867, - "learning_rate": 1.0179969108953254e-05, - "loss": 0.7325, + "epoch": 1.029229031347077, + "grad_norm": 4.094586919145629, + "learning_rate": 1.528542443348179e-05, + "loss": 0.7148, "step": 7289 }, { - "epoch": 1.528622352694485, - "grad_norm": 6.918557412332455, - "learning_rate": 1.017770558641805e-05, - "loss": 0.8737, + "epoch": 1.029370234397063, + "grad_norm": 2.6475493034602207, + "learning_rate": 1.5284130253472746e-05, + "loss": 0.4657, "step": 7290 }, { - "epoch": 1.5288320402600126, - "grad_norm": 6.703537761683248, - "learning_rate": 1.0175442054775126e-05, - "loss": 0.7144, + "epoch": 1.0295114374470489, + "grad_norm": 3.2796552402351766, + "learning_rate": 1.5282835950662798e-05, + "loss": 0.6006, "step": 7291 }, { - "epoch": 1.52904172782554, - "grad_norm": 7.629493612637875, - "learning_rate": 1.0173178514140496e-05, - "loss": 0.7616, + "epoch": 1.0296526404970348, + "grad_norm": 3.614147371898756, + "learning_rate": 1.5281541525082024e-05, + "loss": 0.6198, "step": 7292 }, { - "epoch": 1.5292514153910672, - "grad_norm": 6.519719882508098, - "learning_rate": 1.0170914964630165e-05, - "loss": 0.7256, + "epoch": 1.0297938435470206, + "grad_norm": 2.965731221069141, + "learning_rate": 1.5280246976760508e-05, + "loss": 0.5667, "step": 7293 }, { - "epoch": 1.5294611029565948, - "grad_norm": 6.141777620174371, - "learning_rate": 1.0168651406360144e-05, - "loss": 0.6655, + "epoch": 1.0299350465970065, + "grad_norm": 3.8603808823558214, + "learning_rate": 1.5278952305728325e-05, + "loss": 0.7842, "step": 7294 }, { - "epoch": 1.529670790522122, - "grad_norm": 7.033435258898474, - "learning_rate": 1.016638783944645e-05, - "loss": 0.7145, + "epoch": 1.0300762496469924, + "grad_norm": 3.7078664476820005, + "learning_rate": 1.5277657512015577e-05, + "loss": 0.7197, "step": 7295 }, { - "epoch": 1.5298804780876494, - "grad_norm": 7.151885961000227, - "learning_rate": 1.0164124264005086e-05, - "loss": 0.807, + "epoch": 1.0302174526969783, + "grad_norm": 2.7619158550215768, + "learning_rate": 1.5276362595652347e-05, + "loss": 0.5684, "step": 7296 }, { - "epoch": 1.530090165653177, - "grad_norm": 7.885985467659819, - "learning_rate": 1.0161860680152073e-05, - "loss": 0.8227, + "epoch": 1.0303586557469642, + "grad_norm": 3.2070022450375912, + "learning_rate": 1.5275067556668727e-05, + "loss": 0.5771, "step": 7297 }, { - "epoch": 1.530299853218704, - "grad_norm": 7.425592980463889, - "learning_rate": 1.0159597088003417e-05, - "loss": 0.7464, + "epoch": 1.03049985879695, + "grad_norm": 3.882799678914467, + "learning_rate": 1.5273772395094814e-05, + "loss": 0.7435, "step": 7298 }, { - "epoch": 1.5305095407842315, - "grad_norm": 8.65554673087192, - "learning_rate": 1.0157333487675134e-05, - "loss": 0.8471, + "epoch": 1.030641061846936, + "grad_norm": 2.89210012233448, + "learning_rate": 1.527247711096071e-05, + "loss": 0.488, "step": 7299 }, { - "epoch": 1.5307192283497588, - "grad_norm": 7.028338666433179, - "learning_rate": 1.0155069879283238e-05, - "loss": 0.8137, + "epoch": 1.0307822648969218, + "grad_norm": 3.3600617808852546, + "learning_rate": 1.5271181704296513e-05, + "loss": 0.62, "step": 7300 }, { - "epoch": 1.5309289159152861, - "grad_norm": 6.794506875293018, - "learning_rate": 1.0152806262943737e-05, - "loss": 0.7932, + "epoch": 1.0309234679469077, + "grad_norm": 3.353807355954199, + "learning_rate": 1.5269886175132335e-05, + "loss": 0.5732, "step": 7301 }, { - "epoch": 1.5311386034808137, - "grad_norm": 7.977245355990587, - "learning_rate": 1.0150542638772654e-05, - "loss": 0.8832, + "epoch": 1.0310646709968936, + "grad_norm": 4.382129451332229, + "learning_rate": 1.526859052349827e-05, + "loss": 0.6818, "step": 7302 }, { - "epoch": 1.531348291046341, - "grad_norm": 7.558654451742566, - "learning_rate": 1.0148279006886e-05, - "loss": 0.763, + "epoch": 1.0312058740468795, + "grad_norm": 3.047849373570398, + "learning_rate": 1.526729474942444e-05, + "loss": 0.5679, "step": 7303 }, { - "epoch": 1.5315579786118683, - "grad_norm": 7.816927046299833, - "learning_rate": 1.014601536739979e-05, - "loss": 0.9116, + "epoch": 1.0313470770968654, + "grad_norm": 3.4644139597527848, + "learning_rate": 1.526599885294096e-05, + "loss": 0.6065, "step": 7304 }, { - "epoch": 1.5317676661773958, - "grad_norm": 6.539313831222138, - "learning_rate": 1.0143751720430036e-05, - "loss": 0.6733, + "epoch": 1.0314882801468512, + "grad_norm": 3.3240762818062533, + "learning_rate": 1.5264702834077936e-05, + "loss": 0.6895, "step": 7305 }, { - "epoch": 1.531977353742923, - "grad_norm": 8.173099263599498, - "learning_rate": 1.014148806609276e-05, - "loss": 0.8424, + "epoch": 1.0316294831968371, + "grad_norm": 4.379825786643765, + "learning_rate": 1.5263406692865494e-05, + "loss": 0.7635, "step": 7306 }, { - "epoch": 1.5321870413084504, - "grad_norm": 8.270424255388624, - "learning_rate": 1.0139224404503974e-05, - "loss": 0.8122, + "epoch": 1.031770686246823, + "grad_norm": 4.522413688316297, + "learning_rate": 1.5262110429333752e-05, + "loss": 0.8144, "step": 7307 }, { - "epoch": 1.532396728873978, - "grad_norm": 7.466165008970468, - "learning_rate": 1.0136960735779695e-05, - "loss": 0.9181, + "epoch": 1.0319118892968089, + "grad_norm": 2.587812204245488, + "learning_rate": 1.5260814043512838e-05, + "loss": 0.3828, "step": 7308 }, { - "epoch": 1.532606416439505, - "grad_norm": 7.204561997311021, - "learning_rate": 1.0134697060035943e-05, - "loss": 0.7996, + "epoch": 1.0320530923467948, + "grad_norm": 3.412593582122455, + "learning_rate": 1.5259517535432875e-05, + "loss": 0.655, "step": 7309 }, { - "epoch": 1.5328161040050325, - "grad_norm": 6.789451362186487, - "learning_rate": 1.0132433377388733e-05, - "loss": 0.8782, + "epoch": 1.0321942953967806, + "grad_norm": 4.125467476664053, + "learning_rate": 1.5258220905123997e-05, + "loss": 0.8434, "step": 7310 }, { - "epoch": 1.5330257915705598, - "grad_norm": 7.758820342234318, - "learning_rate": 1.0130169687954078e-05, - "loss": 0.8946, + "epoch": 1.0323354984467665, + "grad_norm": 3.7093074851905286, + "learning_rate": 1.5256924152616333e-05, + "loss": 0.6841, "step": 7311 }, { - "epoch": 1.5332354791360872, - "grad_norm": 8.588861096994258, - "learning_rate": 1.0127905991848009e-05, - "loss": 0.8825, + "epoch": 1.0324767014967524, + "grad_norm": 3.2010603443376473, + "learning_rate": 1.5255627277940023e-05, + "loss": 0.5965, "step": 7312 }, { - "epoch": 1.5334451667016147, - "grad_norm": 5.799675147245732, - "learning_rate": 1.012564228918653e-05, - "loss": 0.6987, + "epoch": 1.0326179045467383, + "grad_norm": 4.22465757319989, + "learning_rate": 1.525433028112521e-05, + "loss": 0.824, "step": 7313 }, { - "epoch": 1.533654854267142, - "grad_norm": 6.232088840101841, - "learning_rate": 1.0123378580085667e-05, - "loss": 0.4624, + "epoch": 1.0327591075967242, + "grad_norm": 3.2734729026579323, + "learning_rate": 1.5253033162202027e-05, + "loss": 0.6886, "step": 7314 }, { - "epoch": 1.5338645418326693, - "grad_norm": 8.451537630388748, - "learning_rate": 1.0121114864661436e-05, - "loss": 0.7845, + "epoch": 1.03290031064671, + "grad_norm": 3.5567594742369373, + "learning_rate": 1.5251735921200622e-05, + "loss": 0.625, "step": 7315 }, { - "epoch": 1.5340742293981968, - "grad_norm": 8.013331779572823, - "learning_rate": 1.0118851143029865e-05, - "loss": 0.9236, + "epoch": 1.033041513696696, + "grad_norm": 2.9969436988915064, + "learning_rate": 1.5250438558151142e-05, + "loss": 0.4931, "step": 7316 }, { - "epoch": 1.534283916963724, - "grad_norm": 8.287058797066555, - "learning_rate": 1.0116587415306958e-05, - "loss": 0.9076, + "epoch": 1.0331827167466818, + "grad_norm": 3.149951444129728, + "learning_rate": 1.5249141073083732e-05, + "loss": 0.5772, "step": 7317 }, { - "epoch": 1.5344936045292514, - "grad_norm": 7.091103764952184, - "learning_rate": 1.0114323681608748e-05, - "loss": 0.7458, + "epoch": 1.0333239197966677, + "grad_norm": 3.4615132257074674, + "learning_rate": 1.524784346602856e-05, + "loss": 0.5361, "step": 7318 }, { - "epoch": 1.5347032920947787, - "grad_norm": 8.257460961578008, - "learning_rate": 1.011205994205125e-05, - "loss": 0.7686, + "epoch": 1.0334651228466536, + "grad_norm": 3.843233333150441, + "learning_rate": 1.524654573701577e-05, + "loss": 0.543, "step": 7319 }, { - "epoch": 1.534912979660306, - "grad_norm": 7.6694782967617705, - "learning_rate": 1.0109796196750487e-05, - "loss": 0.6978, + "epoch": 1.0336063258966393, + "grad_norm": 4.812809753533892, + "learning_rate": 1.5245247886075518e-05, + "loss": 0.805, "step": 7320 }, { - "epoch": 1.5351226672258336, - "grad_norm": 8.286857548109896, - "learning_rate": 1.0107532445822477e-05, - "loss": 0.7187, + "epoch": 1.0337475289466251, + "grad_norm": 5.957928965738149, + "learning_rate": 1.5243949913237975e-05, + "loss": 0.8411, "step": 7321 }, { - "epoch": 1.5353323547913609, - "grad_norm": 7.054008020796386, - "learning_rate": 1.0105268689383245e-05, - "loss": 0.7754, + "epoch": 1.033888731996611, + "grad_norm": 3.1164368800921554, + "learning_rate": 1.5242651818533299e-05, + "loss": 0.5108, "step": 7322 }, { - "epoch": 1.5355420423568882, - "grad_norm": 8.845225316840892, - "learning_rate": 1.0103004927548808e-05, - "loss": 0.9422, + "epoch": 1.034029935046597, + "grad_norm": 3.9059937911442075, + "learning_rate": 1.524135360199166e-05, + "loss": 0.7906, "step": 7323 }, { - "epoch": 1.5357517299224157, - "grad_norm": 7.347927872566806, - "learning_rate": 1.010074116043519e-05, - "loss": 0.7987, + "epoch": 1.0341711380965828, + "grad_norm": 3.7424998217122036, + "learning_rate": 1.5240055263643223e-05, + "loss": 0.6847, "step": 7324 }, { - "epoch": 1.535961417487943, - "grad_norm": 6.345647155606473, - "learning_rate": 1.0098477388158412e-05, - "loss": 0.6966, + "epoch": 1.0343123411465687, + "grad_norm": 3.3131562789859736, + "learning_rate": 1.5238756803518168e-05, + "loss": 0.5221, "step": 7325 }, { - "epoch": 1.5361711050534703, - "grad_norm": 5.77880545480286, - "learning_rate": 1.00962136108345e-05, - "loss": 0.6293, + "epoch": 1.0344535441965546, + "grad_norm": 3.7647655097323476, + "learning_rate": 1.5237458221646668e-05, + "loss": 0.5773, "step": 7326 }, { - "epoch": 1.5363807926189978, - "grad_norm": 8.560415297830199, - "learning_rate": 1.009394982857947e-05, - "loss": 0.9623, + "epoch": 1.0345947472465404, + "grad_norm": 2.9186807654508433, + "learning_rate": 1.5236159518058899e-05, + "loss": 0.5646, "step": 7327 }, { - "epoch": 1.536590480184525, - "grad_norm": 7.26537345789201, - "learning_rate": 1.009168604150935e-05, - "loss": 0.904, + "epoch": 1.0347359502965263, + "grad_norm": 4.1361126685425145, + "learning_rate": 1.5234860692785045e-05, + "loss": 0.8542, "step": 7328 }, { - "epoch": 1.5368001677500525, - "grad_norm": 11.573828364253833, - "learning_rate": 1.0089422249740162e-05, - "loss": 1.1564, + "epoch": 1.0348771533465122, + "grad_norm": 3.55142634764309, + "learning_rate": 1.523356174585529e-05, + "loss": 0.5493, "step": 7329 }, { - "epoch": 1.5370098553155798, - "grad_norm": 5.907906731351163, - "learning_rate": 1.0087158453387926e-05, - "loss": 0.7361, + "epoch": 1.035018356396498, + "grad_norm": 3.64123543347846, + "learning_rate": 1.5232262677299816e-05, + "loss": 0.623, "step": 7330 }, { - "epoch": 1.537219542881107, - "grad_norm": 7.54048879527474, - "learning_rate": 1.0084894652568671e-05, - "loss": 0.8141, + "epoch": 1.035159559446484, + "grad_norm": 4.032997030393602, + "learning_rate": 1.5230963487148822e-05, + "loss": 0.6619, "step": 7331 }, { - "epoch": 1.5374292304466346, - "grad_norm": 6.969313078367815, - "learning_rate": 1.0082630847398416e-05, - "loss": 0.6609, + "epoch": 1.0353007624964699, + "grad_norm": 4.371134730516989, + "learning_rate": 1.5229664175432494e-05, + "loss": 0.7491, "step": 7332 }, { - "epoch": 1.537638918012162, - "grad_norm": 6.105329665419921, - "learning_rate": 1.0080367037993191e-05, - "loss": 0.7284, + "epoch": 1.0354419655464557, + "grad_norm": 3.751046878987232, + "learning_rate": 1.522836474218103e-05, + "loss": 0.7113, "step": 7333 }, { - "epoch": 1.5378486055776892, - "grad_norm": 6.6100200195733425, - "learning_rate": 1.0078103224469009e-05, - "loss": 0.8241, + "epoch": 1.0355831685964416, + "grad_norm": 3.0360370045837675, + "learning_rate": 1.5227065187424623e-05, + "loss": 0.5548, "step": 7334 }, { - "epoch": 1.5380582931432167, - "grad_norm": 8.642240882371405, - "learning_rate": 1.0075839406941907e-05, - "loss": 0.8906, + "epoch": 1.0357243716464275, + "grad_norm": 3.7096994522360682, + "learning_rate": 1.5225765511193484e-05, + "loss": 0.5859, "step": 7335 }, { - "epoch": 1.5382679807087438, - "grad_norm": 9.21746784159912, - "learning_rate": 1.00735755855279e-05, - "loss": 0.8199, + "epoch": 1.0358655746964134, + "grad_norm": 2.961591621264662, + "learning_rate": 1.5224465713517811e-05, + "loss": 0.5144, "step": 7336 }, { - "epoch": 1.5384776682742713, - "grad_norm": 6.234086328470299, - "learning_rate": 1.007131176034302e-05, - "loss": 0.7233, + "epoch": 1.0360067777463993, + "grad_norm": 3.5748022403040385, + "learning_rate": 1.5223165794427806e-05, + "loss": 0.6136, "step": 7337 }, { - "epoch": 1.5386873558397987, - "grad_norm": 6.571711999693177, - "learning_rate": 1.0069047931503286e-05, - "loss": 0.7688, + "epoch": 1.0361479807963851, + "grad_norm": 3.8212245714036444, + "learning_rate": 1.522186575395369e-05, + "loss": 0.706, "step": 7338 }, { - "epoch": 1.538897043405326, - "grad_norm": 6.003520274056556, - "learning_rate": 1.006678409912473e-05, - "loss": 0.6299, + "epoch": 1.036289183846371, + "grad_norm": 3.107151356138247, + "learning_rate": 1.5220565592125667e-05, + "loss": 0.6219, "step": 7339 }, { - "epoch": 1.5391067309708535, - "grad_norm": 5.83944702746518, - "learning_rate": 1.0064520263323364e-05, - "loss": 0.6076, + "epoch": 1.036430386896357, + "grad_norm": 3.385505739314959, + "learning_rate": 1.5219265308973952e-05, + "loss": 0.6232, "step": 7340 }, { - "epoch": 1.5393164185363808, - "grad_norm": 5.649722688795755, - "learning_rate": 1.0062256424215233e-05, - "loss": 0.6365, + "epoch": 1.0365715899463428, + "grad_norm": 3.256543567810861, + "learning_rate": 1.5217964904528763e-05, + "loss": 0.5081, "step": 7341 }, { - "epoch": 1.539526106101908, - "grad_norm": 6.439930669469153, - "learning_rate": 1.0059992581916348e-05, - "loss": 0.96, + "epoch": 1.0367127929963287, + "grad_norm": 3.072779340396042, + "learning_rate": 1.5216664378820327e-05, + "loss": 0.5476, "step": 7342 }, { - "epoch": 1.5397357936674356, - "grad_norm": 9.902383883054467, - "learning_rate": 1.0057728736542738e-05, - "loss": 1.0087, + "epoch": 1.0368539960463146, + "grad_norm": 4.128101825656565, + "learning_rate": 1.5215363731878864e-05, + "loss": 0.7562, "step": 7343 }, { - "epoch": 1.539945481232963, - "grad_norm": 6.391528948494521, - "learning_rate": 1.005546488821043e-05, - "loss": 0.6244, + "epoch": 1.0369951990963004, + "grad_norm": 4.467909805898148, + "learning_rate": 1.5214062963734599e-05, + "loss": 0.8103, "step": 7344 }, { - "epoch": 1.5401551687984902, - "grad_norm": 6.383548870939659, - "learning_rate": 1.0053201037035453e-05, - "loss": 0.6891, + "epoch": 1.0371364021462863, + "grad_norm": 3.8224830887596326, + "learning_rate": 1.5212762074417766e-05, + "loss": 0.632, "step": 7345 }, { - "epoch": 1.5403648563640178, - "grad_norm": 7.598552313921071, - "learning_rate": 1.0050937183133826e-05, - "loss": 0.9693, + "epoch": 1.0372776051962722, + "grad_norm": 3.3097726839248285, + "learning_rate": 1.5211461063958589e-05, + "loss": 0.5466, "step": 7346 }, { - "epoch": 1.5405745439295448, - "grad_norm": 6.547856236853699, - "learning_rate": 1.0048673326621585e-05, - "loss": 0.6411, + "epoch": 1.037418808246258, + "grad_norm": 3.651826705840981, + "learning_rate": 1.5210159932387307e-05, + "loss": 0.6395, "step": 7347 }, { - "epoch": 1.5407842314950724, - "grad_norm": 5.427315215263923, - "learning_rate": 1.0046409467614748e-05, - "loss": 0.5787, + "epoch": 1.037560011296244, + "grad_norm": 3.211677103404582, + "learning_rate": 1.5208858679734161e-05, + "loss": 0.5243, "step": 7348 }, { - "epoch": 1.5409939190605997, - "grad_norm": 6.551414962943264, - "learning_rate": 1.0044145606229347e-05, - "loss": 0.6684, + "epoch": 1.0377012143462299, + "grad_norm": 3.1099727950799663, + "learning_rate": 1.5207557306029391e-05, + "loss": 0.6373, "step": 7349 }, { - "epoch": 1.541203606626127, - "grad_norm": 7.182514978300086, - "learning_rate": 1.0041881742581407e-05, - "loss": 0.8352, + "epoch": 1.0378424173962157, + "grad_norm": 3.6614921879530997, + "learning_rate": 1.5206255811303235e-05, + "loss": 0.7319, "step": 7350 }, { - "epoch": 1.5414132941916545, - "grad_norm": 6.466233656104143, - "learning_rate": 1.0039617876786954e-05, - "loss": 0.8065, + "epoch": 1.0379836204462016, + "grad_norm": 3.392348105198865, + "learning_rate": 1.5204954195585942e-05, + "loss": 0.5851, "step": 7351 }, { - "epoch": 1.5416229817571818, - "grad_norm": 6.088522211625142, - "learning_rate": 1.0037354008962018e-05, - "loss": 0.5606, + "epoch": 1.0381248234961875, + "grad_norm": 3.641975669685366, + "learning_rate": 1.5203652458907763e-05, + "loss": 0.5136, "step": 7352 }, { - "epoch": 1.5418326693227091, - "grad_norm": 7.773234586128021, - "learning_rate": 1.0035090139222624e-05, - "loss": 1.0263, + "epoch": 1.0382660265461734, + "grad_norm": 3.6645895624703386, + "learning_rate": 1.5202350601298945e-05, + "loss": 0.5675, "step": 7353 }, { - "epoch": 1.5420423568882367, - "grad_norm": 6.845897787158895, - "learning_rate": 1.0032826267684796e-05, - "loss": 0.7058, + "epoch": 1.0384072295961593, + "grad_norm": 3.9729202018819234, + "learning_rate": 1.5201048622789747e-05, + "loss": 0.7805, "step": 7354 }, { - "epoch": 1.5422520444537637, - "grad_norm": 5.3232289991003725, - "learning_rate": 1.003056239446457e-05, - "loss": 0.5401, + "epoch": 1.0385484326461452, + "grad_norm": 3.4089403946127876, + "learning_rate": 1.5199746523410425e-05, + "loss": 0.5358, "step": 7355 }, { - "epoch": 1.5424617320192913, - "grad_norm": 7.171126056712241, - "learning_rate": 1.0028298519677966e-05, - "loss": 0.8042, + "epoch": 1.038689635696131, + "grad_norm": 3.8058085593983733, + "learning_rate": 1.5198444303191244e-05, + "loss": 0.7142, "step": 7356 }, { - "epoch": 1.5426714195848186, - "grad_norm": 8.433038781632222, - "learning_rate": 1.0026034643441011e-05, - "loss": 0.9363, + "epoch": 1.038830838746117, + "grad_norm": 4.033607681245991, + "learning_rate": 1.5197141962162456e-05, + "loss": 0.816, "step": 7357 }, { - "epoch": 1.5428811071503459, - "grad_norm": 8.134643153062074, - "learning_rate": 1.0023770765869739e-05, - "loss": 0.9954, + "epoch": 1.0389720417961028, + "grad_norm": 3.4404088652922686, + "learning_rate": 1.5195839500354337e-05, + "loss": 0.5964, "step": 7358 }, { - "epoch": 1.5430907947158734, - "grad_norm": 7.124723165032045, - "learning_rate": 1.0021506887080172e-05, - "loss": 0.7058, + "epoch": 1.0391132448460887, + "grad_norm": 3.1172181180661904, + "learning_rate": 1.5194536917797151e-05, + "loss": 0.4997, "step": 7359 }, { - "epoch": 1.5433004822814007, - "grad_norm": 8.50121184521389, - "learning_rate": 1.0019243007188342e-05, - "loss": 0.8233, + "epoch": 1.0392544478960746, + "grad_norm": 3.689935224655931, + "learning_rate": 1.519323421452117e-05, + "loss": 0.6379, "step": 7360 }, { - "epoch": 1.543510169846928, - "grad_norm": 7.651948067525507, - "learning_rate": 1.0016979126310271e-05, - "loss": 0.7866, + "epoch": 1.0393956509460605, + "grad_norm": 14.722567003616996, + "learning_rate": 1.519193139055667e-05, + "loss": 0.5282, "step": 7361 }, { - "epoch": 1.5437198574124555, - "grad_norm": 8.082797857629302, - "learning_rate": 1.0014715244561995e-05, - "loss": 0.792, + "epoch": 1.0395368539960463, + "grad_norm": 4.834506994641452, + "learning_rate": 1.5190628445933925e-05, + "loss": 0.7767, "step": 7362 }, { - "epoch": 1.5439295449779828, - "grad_norm": 6.998884789419031, - "learning_rate": 1.0012451362059533e-05, - "loss": 0.8672, + "epoch": 1.0396780570460322, + "grad_norm": 3.460593472986546, + "learning_rate": 1.5189325380683217e-05, + "loss": 0.5374, "step": 7363 }, { - "epoch": 1.5441392325435102, - "grad_norm": 9.8663186863454, - "learning_rate": 1.001018747891892e-05, - "loss": 1.1342, + "epoch": 1.039819260096018, + "grad_norm": 3.1707477384831915, + "learning_rate": 1.5188022194834831e-05, + "loss": 0.5414, "step": 7364 }, { - "epoch": 1.5443489201090377, - "grad_norm": 7.299144624447819, - "learning_rate": 1.0007923595256179e-05, - "loss": 0.7506, + "epoch": 1.039960463146004, + "grad_norm": 3.700137494546321, + "learning_rate": 1.5186718888419046e-05, + "loss": 0.5391, "step": 7365 }, { - "epoch": 1.5445586076745648, - "grad_norm": 7.520050021767935, - "learning_rate": 1.000565971118734e-05, - "loss": 0.8727, + "epoch": 1.0401016661959899, + "grad_norm": 4.028904853731261, + "learning_rate": 1.5185415461466155e-05, + "loss": 0.6582, "step": 7366 }, { - "epoch": 1.5447682952400923, - "grad_norm": 5.602260144967309, - "learning_rate": 1.0003395826828433e-05, - "loss": 0.5291, + "epoch": 1.0402428692459758, + "grad_norm": 3.1556187236516515, + "learning_rate": 1.5184111914006447e-05, + "loss": 0.4892, "step": 7367 }, { - "epoch": 1.5449779828056196, - "grad_norm": 7.031603714470065, - "learning_rate": 1.0001131942295485e-05, - "loss": 0.7103, + "epoch": 1.0403840722959616, + "grad_norm": 3.44326418341099, + "learning_rate": 1.5182808246070222e-05, + "loss": 0.6142, "step": 7368 }, { - "epoch": 1.545187670371147, - "grad_norm": 6.76785456779502, - "learning_rate": 9.998868057704519e-06, - "loss": 0.8379, + "epoch": 1.0405252753459475, + "grad_norm": 4.35190487593451, + "learning_rate": 1.5181504457687766e-05, + "loss": 0.7644, "step": 7369 }, { - "epoch": 1.5453973579366744, - "grad_norm": 6.514405194543272, - "learning_rate": 9.996604173171567e-06, - "loss": 0.6836, + "epoch": 1.0406664783959334, + "grad_norm": 4.740148420597818, + "learning_rate": 1.5180200548889387e-05, + "loss": 0.5779, "step": 7370 }, { - "epoch": 1.5456070455022017, - "grad_norm": 6.390203505480608, - "learning_rate": 9.994340288812661e-06, - "loss": 0.5797, + "epoch": 1.0408076814459193, + "grad_norm": 3.7817542090604004, + "learning_rate": 1.5178896519705381e-05, + "loss": 0.6627, "step": 7371 }, { - "epoch": 1.545816733067729, - "grad_norm": 6.114566608284368, - "learning_rate": 9.992076404743823e-06, - "loss": 0.7431, + "epoch": 1.0409488844959052, + "grad_norm": 3.728363448316156, + "learning_rate": 1.517759237016606e-05, + "loss": 0.6701, "step": 7372 }, { - "epoch": 1.5460264206332566, - "grad_norm": 9.2910983968259, - "learning_rate": 9.989812521081081e-06, - "loss": 1.0739, + "epoch": 1.041090087545891, + "grad_norm": 3.2582678725989944, + "learning_rate": 1.517628810030173e-05, + "loss": 0.6913, "step": 7373 }, { - "epoch": 1.5462361081987837, - "grad_norm": 7.383611383307065, - "learning_rate": 9.98754863794047e-06, - "loss": 0.9107, + "epoch": 1.041231290595877, + "grad_norm": 3.9472504242622124, + "learning_rate": 1.5174983710142694e-05, + "loss": 0.686, "step": 7374 }, { - "epoch": 1.5464457957643112, - "grad_norm": 6.911824932750947, - "learning_rate": 9.985284755438006e-06, - "loss": 0.8516, + "epoch": 1.0413724936458628, + "grad_norm": 3.8112333912673853, + "learning_rate": 1.5173679199719277e-05, + "loss": 0.7034, "step": 7375 }, { - "epoch": 1.5466554833298385, - "grad_norm": 6.499021184220128, - "learning_rate": 9.983020873689729e-06, - "loss": 0.6742, + "epoch": 1.0415136966958487, + "grad_norm": 3.9276258746303494, + "learning_rate": 1.5172374569061787e-05, + "loss": 0.7805, "step": 7376 }, { - "epoch": 1.5468651708953658, - "grad_norm": 7.037560189645708, - "learning_rate": 9.980756992811662e-06, - "loss": 0.7315, + "epoch": 1.0416548997458346, + "grad_norm": 3.169926377723543, + "learning_rate": 1.5171069818200548e-05, + "loss": 0.5705, "step": 7377 }, { - "epoch": 1.5470748584608933, - "grad_norm": 7.150666859336036, - "learning_rate": 9.978493112919831e-06, - "loss": 0.87, + "epoch": 1.0417961027958205, + "grad_norm": 3.196572547897355, + "learning_rate": 1.5169764947165879e-05, + "loss": 0.6439, "step": 7378 }, { - "epoch": 1.5472845460264206, - "grad_norm": 8.515569495317697, - "learning_rate": 9.976229234130261e-06, - "loss": 1.0724, + "epoch": 1.0419373058458063, + "grad_norm": 4.129661497191095, + "learning_rate": 1.5168459955988101e-05, + "loss": 0.8093, "step": 7379 }, { - "epoch": 1.547494233591948, - "grad_norm": 6.84047306235906, - "learning_rate": 9.97396535655899e-06, - "loss": 0.7746, + "epoch": 1.0420785088957922, + "grad_norm": 3.0023363654269546, + "learning_rate": 1.5167154844697549e-05, + "loss": 0.5028, "step": 7380 }, { - "epoch": 1.5477039211574755, - "grad_norm": 8.25125969698863, - "learning_rate": 9.971701480322038e-06, - "loss": 1.1777, + "epoch": 1.0422197119457781, + "grad_norm": 3.12638699450999, + "learning_rate": 1.5165849613324552e-05, + "loss": 0.5875, "step": 7381 }, { - "epoch": 1.5479136087230028, - "grad_norm": 7.422910125224376, - "learning_rate": 9.969437605535434e-06, - "loss": 0.8754, + "epoch": 1.042360914995764, + "grad_norm": 3.1267380485174043, + "learning_rate": 1.5164544261899439e-05, + "loss": 0.4964, "step": 7382 }, { - "epoch": 1.54812329628853, - "grad_norm": 6.36336117328942, - "learning_rate": 9.967173732315207e-06, - "loss": 0.6979, + "epoch": 1.0425021180457499, + "grad_norm": 3.7777611824247437, + "learning_rate": 1.5163238790452549e-05, + "loss": 0.5903, "step": 7383 }, { - "epoch": 1.5483329838540576, - "grad_norm": 5.766541204693824, - "learning_rate": 9.964909860777378e-06, - "loss": 0.6505, + "epoch": 1.0426433210957358, + "grad_norm": 3.2267364985383207, + "learning_rate": 1.5161933199014216e-05, + "loss": 0.6329, "step": 7384 }, { - "epoch": 1.5485426714195847, - "grad_norm": 8.4724776724924, - "learning_rate": 9.962645991037984e-06, - "loss": 0.9283, + "epoch": 1.0427845241457216, + "grad_norm": 3.773615944071404, + "learning_rate": 1.5160627487614788e-05, + "loss": 0.6522, "step": 7385 }, { - "epoch": 1.5487523589851122, - "grad_norm": 5.937185265173824, - "learning_rate": 9.960382123213049e-06, - "loss": 0.705, + "epoch": 1.0429257271957075, + "grad_norm": 3.3452590788957, + "learning_rate": 1.5159321656284602e-05, + "loss": 0.66, "step": 7386 }, { - "epoch": 1.5489620465506395, - "grad_norm": 7.36011256077543, - "learning_rate": 9.958118257418596e-06, - "loss": 0.6625, + "epoch": 1.0430669302456934, + "grad_norm": 3.596295699278905, + "learning_rate": 1.5158015705054014e-05, + "loss": 0.6309, "step": 7387 }, { - "epoch": 1.5491717341161668, - "grad_norm": 6.244074231971001, - "learning_rate": 9.955854393770655e-06, - "loss": 0.6501, + "epoch": 1.0432081332956793, + "grad_norm": 3.7115718986481103, + "learning_rate": 1.5156709633953364e-05, + "loss": 0.7384, "step": 7388 }, { - "epoch": 1.5493814216816943, - "grad_norm": 7.774768067067479, - "learning_rate": 9.953590532385256e-06, - "loss": 0.9274, + "epoch": 1.0433493363456652, + "grad_norm": 4.216258153471969, + "learning_rate": 1.5155403443013011e-05, + "loss": 0.691, "step": 7389 }, { - "epoch": 1.5495911092472217, - "grad_norm": 7.553429689084709, - "learning_rate": 9.951326673378418e-06, - "loss": 0.7823, + "epoch": 1.0434905393956508, + "grad_norm": 3.92343916859213, + "learning_rate": 1.515409713226331e-05, + "loss": 0.652, "step": 7390 }, { - "epoch": 1.549800796812749, - "grad_norm": 6.5687810237587, - "learning_rate": 9.949062816866176e-06, - "loss": 0.6806, + "epoch": 1.0436317424456367, + "grad_norm": 3.2066603458712497, + "learning_rate": 1.5152790701734614e-05, + "loss": 0.5989, "step": 7391 }, { - "epoch": 1.5500104843782765, - "grad_norm": 7.082892471930119, - "learning_rate": 9.946798962964552e-06, - "loss": 0.7919, + "epoch": 1.0437729454956226, + "grad_norm": 3.555891824203542, + "learning_rate": 1.5151484151457292e-05, + "loss": 0.5144, "step": 7392 }, { - "epoch": 1.5502201719438036, - "grad_norm": 9.797756382432269, - "learning_rate": 9.944535111789572e-06, - "loss": 0.9143, + "epoch": 1.0439141485456085, + "grad_norm": 3.3866238029598437, + "learning_rate": 1.51501774814617e-05, + "loss": 0.6185, "step": 7393 }, { - "epoch": 1.550429859509331, - "grad_norm": 5.60964219046708, - "learning_rate": 9.942271263457264e-06, - "loss": 0.5399, + "epoch": 1.0440553515955944, + "grad_norm": 5.3269643067542285, + "learning_rate": 1.5148870691778208e-05, + "loss": 0.7135, "step": 7394 }, { - "epoch": 1.5506395470748586, - "grad_norm": 7.972707391497301, - "learning_rate": 9.940007418083657e-06, - "loss": 0.9266, + "epoch": 1.0441965546455803, + "grad_norm": 2.7763628983341953, + "learning_rate": 1.5147563782437184e-05, + "loss": 0.4414, "step": 7395 }, { - "epoch": 1.5508492346403857, - "grad_norm": 8.286666657127727, - "learning_rate": 9.93774357578477e-06, - "loss": 0.9395, + "epoch": 1.0443377576955661, + "grad_norm": 4.079596287736071, + "learning_rate": 1.5146256753469004e-05, + "loss": 0.731, "step": 7396 }, { - "epoch": 1.5510589222059132, - "grad_norm": 7.8268969623340094, - "learning_rate": 9.935479736676634e-06, - "loss": 0.95, + "epoch": 1.044478960745552, + "grad_norm": 3.392449552655842, + "learning_rate": 1.5144949604904036e-05, + "loss": 0.5996, "step": 7397 }, { - "epoch": 1.5512686097714405, - "grad_norm": 7.044189881490119, - "learning_rate": 9.933215900875275e-06, - "loss": 0.6886, + "epoch": 1.044620163795538, + "grad_norm": 4.4421894997882, + "learning_rate": 1.5143642336772663e-05, + "loss": 0.734, "step": 7398 }, { - "epoch": 1.5514782973369678, - "grad_norm": 5.651967220808202, - "learning_rate": 9.930952068496716e-06, - "loss": 0.4918, + "epoch": 1.0447613668455238, + "grad_norm": 3.3691930255061298, + "learning_rate": 1.5142334949105264e-05, + "loss": 0.6903, "step": 7399 }, { - "epoch": 1.5516879849024954, - "grad_norm": 5.061012068538018, - "learning_rate": 9.928688239656982e-06, - "loss": 0.5931, + "epoch": 1.0449025698955097, + "grad_norm": 2.8613528542680813, + "learning_rate": 1.5141027441932217e-05, + "loss": 0.5719, "step": 7400 }, { - "epoch": 1.5518976724680227, - "grad_norm": 8.138325659370816, - "learning_rate": 9.926424414472104e-06, - "loss": 1.005, + "epoch": 1.0450437729454956, + "grad_norm": 4.229748555029211, + "learning_rate": 1.5139719815283918e-05, + "loss": 0.7122, "step": 7401 }, { - "epoch": 1.55210736003355, - "grad_norm": 6.886416599279407, - "learning_rate": 9.924160593058095e-06, - "loss": 0.7032, + "epoch": 1.0451849759954814, + "grad_norm": 3.7921642466616046, + "learning_rate": 1.5138412069190747e-05, + "loss": 0.6298, "step": 7402 }, { - "epoch": 1.5523170475990775, - "grad_norm": 5.901054460204976, - "learning_rate": 9.921896775530991e-06, - "loss": 0.5227, + "epoch": 1.0453261790454673, + "grad_norm": 2.960698740254174, + "learning_rate": 1.5137104203683101e-05, + "loss": 0.4793, "step": 7403 }, { - "epoch": 1.5525267351646046, - "grad_norm": 6.940740870612526, - "learning_rate": 9.919632962006814e-06, - "loss": 0.7305, + "epoch": 1.0454673820954532, + "grad_norm": 3.634729665811572, + "learning_rate": 1.513579621879137e-05, + "loss": 0.6725, "step": 7404 }, { - "epoch": 1.5527364227301321, - "grad_norm": 5.97440926309287, - "learning_rate": 9.917369152601586e-06, - "loss": 0.4689, + "epoch": 1.045608585145439, + "grad_norm": 3.698706245376152, + "learning_rate": 1.5134488114545955e-05, + "loss": 0.7789, "step": 7405 }, { - "epoch": 1.5529461102956594, - "grad_norm": 7.1044651219747115, - "learning_rate": 9.91510534743133e-06, - "loss": 0.605, + "epoch": 1.045749788195425, + "grad_norm": 3.2339822521861903, + "learning_rate": 1.513317989097725e-05, + "loss": 0.5946, "step": 7406 }, { - "epoch": 1.5531557978611867, - "grad_norm": 5.713273754640501, - "learning_rate": 9.912841546612075e-06, - "loss": 0.5209, + "epoch": 1.0458909912454109, + "grad_norm": 2.7188177783373386, + "learning_rate": 1.5131871548115665e-05, + "loss": 0.4835, "step": 7407 }, { - "epoch": 1.5533654854267143, - "grad_norm": 6.536196489934422, - "learning_rate": 9.910577750259841e-06, - "loss": 0.5689, + "epoch": 1.0460321942953967, + "grad_norm": 3.381232540024849, + "learning_rate": 1.5130563085991599e-05, + "loss": 0.6565, "step": 7408 }, { - "epoch": 1.5535751729922416, - "grad_norm": 6.230477255932016, - "learning_rate": 9.908313958490652e-06, - "loss": 0.6734, + "epoch": 1.0461733973453826, + "grad_norm": 4.1708425320806075, + "learning_rate": 1.5129254504635462e-05, + "loss": 0.6589, "step": 7409 }, { - "epoch": 1.5537848605577689, - "grad_norm": 7.439722408684949, - "learning_rate": 9.906050171420534e-06, - "loss": 0.7466, + "epoch": 1.0463146003953685, + "grad_norm": 3.149765447764325, + "learning_rate": 1.5127945804077668e-05, + "loss": 0.5909, "step": 7410 }, { - "epoch": 1.5539945481232964, - "grad_norm": 6.7371071189214815, - "learning_rate": 9.903786389165503e-06, - "loss": 0.7161, + "epoch": 1.0464558034453544, + "grad_norm": 3.195517827037724, + "learning_rate": 1.5126636984348627e-05, + "loss": 0.5736, "step": 7411 }, { - "epoch": 1.5542042356888235, - "grad_norm": 5.238648078897976, - "learning_rate": 9.901522611841588e-06, - "loss": 0.614, + "epoch": 1.0465970064953403, + "grad_norm": 3.6864764349453756, + "learning_rate": 1.5125328045478755e-05, + "loss": 0.659, "step": 7412 }, { - "epoch": 1.554413923254351, - "grad_norm": 7.648841098154688, - "learning_rate": 9.899258839564814e-06, - "loss": 0.8154, + "epoch": 1.0467382095453261, + "grad_norm": 3.5001391864683016, + "learning_rate": 1.5124018987498476e-05, + "loss": 0.6102, "step": 7413 }, { - "epoch": 1.5546236108198785, - "grad_norm": 6.435799416317733, - "learning_rate": 9.896995072451197e-06, - "loss": 0.6055, + "epoch": 1.046879412595312, + "grad_norm": 3.288436257627931, + "learning_rate": 1.5122709810438205e-05, + "loss": 0.5519, "step": 7414 }, { - "epoch": 1.5548332983854056, - "grad_norm": 6.061287014767684, - "learning_rate": 9.894731310616757e-06, - "loss": 0.5308, + "epoch": 1.047020615645298, + "grad_norm": 3.836670119804425, + "learning_rate": 1.5121400514328372e-05, + "loss": 0.6489, "step": 7415 }, { - "epoch": 1.5550429859509332, - "grad_norm": 8.491850374920919, - "learning_rate": 9.892467554177527e-06, - "loss": 0.9534, + "epoch": 1.0471618186952838, + "grad_norm": 4.488628795241171, + "learning_rate": 1.5120091099199403e-05, + "loss": 0.8574, "step": 7416 }, { - "epoch": 1.5552526735164605, - "grad_norm": 7.269798493482319, - "learning_rate": 9.890203803249515e-06, - "loss": 0.6649, + "epoch": 1.0473030217452697, + "grad_norm": 3.4142512320133003, + "learning_rate": 1.5118781565081727e-05, + "loss": 0.5328, "step": 7417 }, { - "epoch": 1.5554623610819878, - "grad_norm": 7.400409560532959, - "learning_rate": 9.887940057948751e-06, - "loss": 0.8717, + "epoch": 1.0474442247952556, + "grad_norm": 3.4779448882904034, + "learning_rate": 1.511747191200578e-05, + "loss": 0.558, "step": 7418 }, { - "epoch": 1.5556720486475153, - "grad_norm": 4.9203649847954, - "learning_rate": 9.885676318391257e-06, - "loss": 0.439, + "epoch": 1.0475854278452414, + "grad_norm": 3.672707093549081, + "learning_rate": 1.5116162140001995e-05, + "loss": 0.6446, "step": 7419 }, { - "epoch": 1.5558817362130426, - "grad_norm": 7.194978466393346, - "learning_rate": 9.883412584693046e-06, - "loss": 0.6881, + "epoch": 1.0477266308952273, + "grad_norm": 3.859986387093808, + "learning_rate": 1.5114852249100811e-05, + "loss": 0.7047, "step": 7420 }, { - "epoch": 1.55609142377857, - "grad_norm": 5.993666010139874, - "learning_rate": 9.88114885697014e-06, - "loss": 0.5605, + "epoch": 1.0478678339452132, + "grad_norm": 3.1228017869302986, + "learning_rate": 1.511354223933267e-05, + "loss": 0.561, "step": 7421 }, { - "epoch": 1.5563011113440974, - "grad_norm": 7.826221927628422, - "learning_rate": 9.878885135338567e-06, - "loss": 0.8106, + "epoch": 1.048009036995199, + "grad_norm": 3.2214560491304454, + "learning_rate": 1.5112232110728016e-05, + "loss": 0.6111, "step": 7422 }, { - "epoch": 1.5565107989096245, - "grad_norm": 8.556001663078536, - "learning_rate": 9.876621419914336e-06, - "loss": 0.825, + "epoch": 1.048150240045185, + "grad_norm": 2.8999403517133695, + "learning_rate": 1.5110921863317293e-05, + "loss": 0.435, "step": 7423 }, { - "epoch": 1.556720486475152, - "grad_norm": 6.4766126111350175, - "learning_rate": 9.874357710813473e-06, - "loss": 0.6068, + "epoch": 1.0482914430951709, + "grad_norm": 2.7053959193543307, + "learning_rate": 1.5109611497130959e-05, + "loss": 0.3695, "step": 7424 }, { - "epoch": 1.5569301740406793, - "grad_norm": 7.039615496126097, - "learning_rate": 9.872094008151998e-06, - "loss": 0.7329, + "epoch": 1.0484326461451567, + "grad_norm": 3.186433997251534, + "learning_rate": 1.5108301012199453e-05, + "loss": 0.4578, "step": 7425 }, { - "epoch": 1.5571398616062067, - "grad_norm": 7.808146805302258, - "learning_rate": 9.869830312045923e-06, - "loss": 0.7329, + "epoch": 1.0485738491951426, + "grad_norm": 3.460373505882675, + "learning_rate": 1.510699040855324e-05, + "loss": 0.5979, "step": 7426 }, { - "epoch": 1.5573495491717342, - "grad_norm": 9.181438045786505, - "learning_rate": 9.86756662261127e-06, - "loss": 0.8702, + "epoch": 1.0487150522451285, + "grad_norm": 3.7093788242314094, + "learning_rate": 1.5105679686222778e-05, + "loss": 0.6624, "step": 7427 }, { - "epoch": 1.5575592367372615, - "grad_norm": 7.221367404832524, - "learning_rate": 9.86530293996406e-06, - "loss": 0.8016, + "epoch": 1.0488562552951144, + "grad_norm": 3.3848084175705893, + "learning_rate": 1.5104368845238525e-05, + "loss": 0.5859, "step": 7428 }, { - "epoch": 1.5577689243027888, - "grad_norm": 7.910725948399105, - "learning_rate": 9.863039264220307e-06, - "loss": 0.9236, + "epoch": 1.0489974583451003, + "grad_norm": 2.7376348109628528, + "learning_rate": 1.5103057885630943e-05, + "loss": 0.479, "step": 7429 }, { - "epoch": 1.5579786118683163, - "grad_norm": 7.6315821739079235, - "learning_rate": 9.860775595496028e-06, - "loss": 0.6841, + "epoch": 1.0491386613950862, + "grad_norm": 2.7689422133909005, + "learning_rate": 1.5101746807430502e-05, + "loss": 0.4889, "step": 7430 }, { - "epoch": 1.5581882994338436, - "grad_norm": 9.748350604359869, - "learning_rate": 9.85851193390724e-06, - "loss": 1.0554, + "epoch": 1.049279864445072, + "grad_norm": 2.9965226804700698, + "learning_rate": 1.5100435610667662e-05, + "loss": 0.5514, "step": 7431 }, { - "epoch": 1.558397986999371, - "grad_norm": 9.683576405345406, - "learning_rate": 9.856248279569965e-06, - "loss": 0.961, + "epoch": 1.049421067495058, + "grad_norm": 3.093923538565264, + "learning_rate": 1.509912429537291e-05, + "loss": 0.6037, "step": 7432 }, { - "epoch": 1.5586076745648985, - "grad_norm": 6.301012582428548, - "learning_rate": 9.853984632600213e-06, - "loss": 0.6483, + "epoch": 1.0495622705450438, + "grad_norm": 3.413616208841141, + "learning_rate": 1.5097812861576704e-05, + "loss": 0.7186, "step": 7433 }, { - "epoch": 1.5588173621304255, - "grad_norm": 7.639637671200591, - "learning_rate": 9.851720993114002e-06, - "loss": 0.7748, + "epoch": 1.0497034735950297, + "grad_norm": 3.8278888544529743, + "learning_rate": 1.509650130930953e-05, + "loss": 0.7622, "step": 7434 }, { - "epoch": 1.559027049695953, - "grad_norm": 6.989799799631193, - "learning_rate": 9.849457361227348e-06, - "loss": 0.7327, + "epoch": 1.0498446766450156, + "grad_norm": 4.016613240521506, + "learning_rate": 1.509518963860187e-05, + "loss": 0.6444, "step": 7435 }, { - "epoch": 1.5592367372614804, - "grad_norm": 6.6445969175248045, - "learning_rate": 9.847193737056265e-06, - "loss": 0.5949, + "epoch": 1.0499858796950015, + "grad_norm": 3.733262811227689, + "learning_rate": 1.5093877849484201e-05, + "loss": 0.6367, "step": 7436 }, { - "epoch": 1.5594464248270077, - "grad_norm": 8.196983726113416, - "learning_rate": 9.844930120716764e-06, - "loss": 0.9433, + "epoch": 1.0501270827449873, + "grad_norm": 2.9989286398882418, + "learning_rate": 1.5092565941987012e-05, + "loss": 0.6006, "step": 7437 }, { - "epoch": 1.5596561123925352, - "grad_norm": 7.044213272565848, - "learning_rate": 9.84266651232487e-06, - "loss": 0.764, + "epoch": 1.0502682857949732, + "grad_norm": 2.9777676834748843, + "learning_rate": 1.5091253916140789e-05, + "loss": 0.5948, "step": 7438 }, { - "epoch": 1.5598657999580625, - "grad_norm": 5.699596672260946, - "learning_rate": 9.840402911996587e-06, - "loss": 0.5077, + "epoch": 1.050409488844959, + "grad_norm": 3.455822462089069, + "learning_rate": 1.5089941771976024e-05, + "loss": 0.601, "step": 7439 }, { - "epoch": 1.5600754875235898, - "grad_norm": 7.175423056665053, - "learning_rate": 9.83813931984793e-06, - "loss": 0.7726, + "epoch": 1.050550691894945, + "grad_norm": 3.743174034710143, + "learning_rate": 1.5088629509523207e-05, + "loss": 0.7842, "step": 7440 }, { - "epoch": 1.5602851750891173, - "grad_norm": 6.055914141470011, - "learning_rate": 9.835875735994917e-06, - "loss": 0.7445, + "epoch": 1.0506918949449309, + "grad_norm": 3.434393429880277, + "learning_rate": 1.5087317128812844e-05, + "loss": 0.6776, "step": 7441 }, { - "epoch": 1.5604948626546444, - "grad_norm": 6.866791139508365, - "learning_rate": 9.833612160553553e-06, - "loss": 0.8377, + "epoch": 1.0508330979949168, + "grad_norm": 3.096036638051984, + "learning_rate": 1.5086004629875426e-05, + "loss": 0.5185, "step": 7442 }, { - "epoch": 1.560704550220172, - "grad_norm": 5.682240899799926, - "learning_rate": 9.831348593639856e-06, - "loss": 0.6318, + "epoch": 1.0509743010449026, + "grad_norm": 3.1307629881956367, + "learning_rate": 1.5084692012741454e-05, + "loss": 0.5604, "step": 7443 }, { - "epoch": 1.5609142377856993, - "grad_norm": 6.697821738660285, - "learning_rate": 9.829085035369839e-06, - "loss": 0.7465, + "epoch": 1.0511155040948885, + "grad_norm": 3.211231306395711, + "learning_rate": 1.5083379277441437e-05, + "loss": 0.5857, "step": 7444 }, { - "epoch": 1.5611239253512266, - "grad_norm": 8.4265197811356, - "learning_rate": 9.826821485859508e-06, - "loss": 0.84, + "epoch": 1.0512567071448744, + "grad_norm": 4.262452023740573, + "learning_rate": 1.5082066424005882e-05, + "loss": 0.9585, "step": 7445 }, { - "epoch": 1.561333612916754, - "grad_norm": 7.758443727004098, - "learning_rate": 9.824557945224872e-06, - "loss": 0.8567, + "epoch": 1.0513979101948603, + "grad_norm": 3.5637730166189874, + "learning_rate": 1.5080753452465296e-05, + "loss": 0.6088, "step": 7446 }, { - "epoch": 1.5615433004822814, - "grad_norm": 6.32218140957602, - "learning_rate": 9.822294413581956e-06, - "loss": 0.6237, + "epoch": 1.0515391132448462, + "grad_norm": 3.983384304827401, + "learning_rate": 1.5079440362850195e-05, + "loss": 0.5687, "step": 7447 }, { - "epoch": 1.5617529880478087, - "grad_norm": 5.738486413228919, - "learning_rate": 9.82003089104675e-06, - "loss": 0.6485, + "epoch": 1.051680316294832, + "grad_norm": 4.342816747738613, + "learning_rate": 1.5078127155191094e-05, + "loss": 0.5527, "step": 7448 }, { - "epoch": 1.5619626756133362, - "grad_norm": 6.243931410198658, - "learning_rate": 9.817767377735277e-06, - "loss": 0.7206, + "epoch": 1.051821519344818, + "grad_norm": 2.740461337857034, + "learning_rate": 1.5076813829518512e-05, + "loss": 0.4501, "step": 7449 }, { - "epoch": 1.5621723631788635, - "grad_norm": 9.12705963059562, - "learning_rate": 9.815503873763543e-06, - "loss": 0.9281, + "epoch": 1.0519627223948038, + "grad_norm": 3.452511235547144, + "learning_rate": 1.5075500385862967e-05, + "loss": 0.6266, "step": 7450 }, { - "epoch": 1.5623820507443908, - "grad_norm": 6.853861817698881, - "learning_rate": 9.813240379247553e-06, - "loss": 0.7684, + "epoch": 1.0521039254447897, + "grad_norm": 4.5790176224006, + "learning_rate": 1.5074186824254983e-05, + "loss": 0.6926, "step": 7451 }, { - "epoch": 1.5625917383099184, - "grad_norm": 6.2619693168152954, - "learning_rate": 9.810976894303316e-06, - "loss": 0.7056, + "epoch": 1.0522451284947756, + "grad_norm": 3.180783483351663, + "learning_rate": 1.5072873144725093e-05, + "loss": 0.6156, "step": 7452 }, { - "epoch": 1.5628014258754455, - "grad_norm": 6.327822853310262, - "learning_rate": 9.808713419046847e-06, - "loss": 0.6128, + "epoch": 1.0523863315447615, + "grad_norm": 4.9027710457083495, + "learning_rate": 1.5071559347303823e-05, + "loss": 0.7018, "step": 7453 }, { - "epoch": 1.563011113440973, - "grad_norm": 9.16285206304075, - "learning_rate": 9.806449953594141e-06, - "loss": 1.195, + "epoch": 1.0525275345947473, + "grad_norm": 2.748495485534521, + "learning_rate": 1.5070245432021699e-05, + "loss": 0.5265, "step": 7454 }, { - "epoch": 1.5632208010065003, - "grad_norm": 7.116947249577371, - "learning_rate": 9.804186498061213e-06, - "loss": 0.8245, + "epoch": 1.0526687376447332, + "grad_norm": 3.950797575547279, + "learning_rate": 1.5068931398909264e-05, + "loss": 0.6218, "step": 7455 }, { - "epoch": 1.5634304885720276, - "grad_norm": 7.116804055019194, - "learning_rate": 9.801923052564068e-06, - "loss": 0.6003, + "epoch": 1.052809940694719, + "grad_norm": 3.808659260038404, + "learning_rate": 1.5067617247997053e-05, + "loss": 0.6687, "step": 7456 }, { - "epoch": 1.5636401761375551, - "grad_norm": 7.621269926462281, - "learning_rate": 9.799659617218707e-06, - "loss": 0.8642, + "epoch": 1.0529511437447048, + "grad_norm": 3.2592060288790083, + "learning_rate": 1.5066302979315601e-05, + "loss": 0.6068, "step": 7457 }, { - "epoch": 1.5638498637030824, - "grad_norm": 7.742711612185244, - "learning_rate": 9.797396192141138e-06, - "loss": 0.9105, + "epoch": 1.0530923467946907, + "grad_norm": 3.5046768286920362, + "learning_rate": 1.5064988592895463e-05, + "loss": 0.6857, "step": 7458 }, { - "epoch": 1.5640595512686097, - "grad_norm": 6.965607872390398, - "learning_rate": 9.79513277744737e-06, - "loss": 0.8853, + "epoch": 1.0532335498446765, + "grad_norm": 3.6437130601125967, + "learning_rate": 1.5063674088767172e-05, + "loss": 0.692, "step": 7459 }, { - "epoch": 1.5642692388341373, - "grad_norm": 6.756849768589031, - "learning_rate": 9.792869373253399e-06, - "loss": 0.751, + "epoch": 1.0533747528946624, + "grad_norm": 3.170674864142198, + "learning_rate": 1.5062359466961283e-05, + "loss": 0.4721, "step": 7460 }, { - "epoch": 1.5644789263996643, - "grad_norm": 6.6637820610235945, - "learning_rate": 9.790605979675231e-06, - "loss": 0.7562, + "epoch": 1.0535159559446483, + "grad_norm": 3.363711338645682, + "learning_rate": 1.5061044727508347e-05, + "loss": 0.5815, "step": 7461 }, { - "epoch": 1.5646886139651919, - "grad_norm": 7.003379069793895, - "learning_rate": 9.788342596828875e-06, - "loss": 0.6947, + "epoch": 1.0536571589946342, + "grad_norm": 4.028269225232735, + "learning_rate": 1.5059729870438917e-05, + "loss": 0.6246, "step": 7462 }, { - "epoch": 1.5648983015307192, - "grad_norm": 7.146558886184148, - "learning_rate": 9.78607922483032e-06, - "loss": 0.6611, + "epoch": 1.05379836204462, + "grad_norm": 2.97261708732574, + "learning_rate": 1.505841489578355e-05, + "loss": 0.4869, "step": 7463 }, { - "epoch": 1.5651079890962465, - "grad_norm": 10.160253778038685, - "learning_rate": 9.78381586379558e-06, - "loss": 1.1366, + "epoch": 1.053939565094606, + "grad_norm": 3.4221379711211166, + "learning_rate": 1.5057099803572806e-05, + "loss": 0.6255, "step": 7464 }, { - "epoch": 1.565317676661774, - "grad_norm": 7.119847253049567, - "learning_rate": 9.781552513840654e-06, - "loss": 0.656, + "epoch": 1.0540807681445918, + "grad_norm": 3.315766610580562, + "learning_rate": 1.5055784593837246e-05, + "loss": 0.5185, "step": 7465 }, { - "epoch": 1.5655273642273013, - "grad_norm": 8.890059129015276, - "learning_rate": 9.779289175081539e-06, - "loss": 0.9618, + "epoch": 1.0542219711945777, + "grad_norm": 3.3012482355425283, + "learning_rate": 1.5054469266607435e-05, + "loss": 0.5137, "step": 7466 }, { - "epoch": 1.5657370517928286, - "grad_norm": 6.5823168964265175, - "learning_rate": 9.777025847634235e-06, - "loss": 0.8161, + "epoch": 1.0543631742445636, + "grad_norm": 3.880374949495192, + "learning_rate": 1.5053153821913941e-05, + "loss": 0.6522, "step": 7467 }, { - "epoch": 1.5659467393583562, - "grad_norm": 6.992804144728043, - "learning_rate": 9.774762531614747e-06, - "loss": 0.7692, + "epoch": 1.0545043772945495, + "grad_norm": 11.196966230218365, + "learning_rate": 1.5051838259787332e-05, + "loss": 0.7558, "step": 7468 }, { - "epoch": 1.5661564269238835, - "grad_norm": 8.453073228194512, - "learning_rate": 9.772499227139063e-06, - "loss": 1.0109, + "epoch": 1.0546455803445354, + "grad_norm": 3.2411956424599073, + "learning_rate": 1.5050522580258189e-05, + "loss": 0.5314, "step": 7469 }, { - "epoch": 1.5663661144894108, - "grad_norm": 8.639264144664015, - "learning_rate": 9.770235934323193e-06, - "loss": 0.9519, + "epoch": 1.0547867833945213, + "grad_norm": 4.355052367400331, + "learning_rate": 1.5049206783357082e-05, + "loss": 0.6862, "step": 7470 }, { - "epoch": 1.5665758020549383, - "grad_norm": 7.291100729142389, - "learning_rate": 9.767972653283131e-06, - "loss": 0.8416, + "epoch": 1.0549279864445071, + "grad_norm": 3.591200301144237, + "learning_rate": 1.5047890869114588e-05, + "loss": 0.5882, "step": 7471 }, { - "epoch": 1.5667854896204654, - "grad_norm": 8.386398995828655, - "learning_rate": 9.765709384134871e-06, - "loss": 0.8636, + "epoch": 1.055069189494493, + "grad_norm": 3.2208305234697177, + "learning_rate": 1.5046574837561289e-05, + "loss": 0.6379, "step": 7472 }, { - "epoch": 1.566995177185993, - "grad_norm": 7.569159697271252, - "learning_rate": 9.76344612699441e-06, - "loss": 0.9601, + "epoch": 1.055210392544479, + "grad_norm": 4.340864282460565, + "learning_rate": 1.5045258688727771e-05, + "loss": 0.6483, "step": 7473 }, { - "epoch": 1.5672048647515202, - "grad_norm": 7.123754178319306, - "learning_rate": 9.76118288197775e-06, - "loss": 0.8016, + "epoch": 1.0553515955944648, + "grad_norm": 3.6945492391080403, + "learning_rate": 1.504394242264462e-05, + "loss": 0.5939, "step": 7474 }, { - "epoch": 1.5674145523170475, - "grad_norm": 5.7134592128711015, - "learning_rate": 9.758919649200875e-06, - "loss": 0.6273, + "epoch": 1.0554927986444507, + "grad_norm": 3.2722205272211555, + "learning_rate": 1.5042626039342426e-05, + "loss": 0.6233, "step": 7475 }, { - "epoch": 1.567624239882575, - "grad_norm": 6.365461833380269, - "learning_rate": 9.75665642877979e-06, - "loss": 0.7107, + "epoch": 1.0556340016944366, + "grad_norm": 3.817569072808398, + "learning_rate": 1.504130953885178e-05, + "loss": 0.6871, "step": 7476 }, { - "epoch": 1.5678339274481023, - "grad_norm": 5.676818018675542, - "learning_rate": 9.754393220830487e-06, - "loss": 0.526, + "epoch": 1.0557752047444224, + "grad_norm": 3.357691434577466, + "learning_rate": 1.5039992921203277e-05, + "loss": 0.5563, "step": 7477 }, { - "epoch": 1.5680436150136297, - "grad_norm": 7.830523842657074, - "learning_rate": 9.752130025468954e-06, - "loss": 0.8589, + "epoch": 1.0559164077944083, + "grad_norm": 3.5292004608996486, + "learning_rate": 1.5038676186427515e-05, + "loss": 0.5888, "step": 7478 }, { - "epoch": 1.5682533025791572, - "grad_norm": 7.396452662931585, - "learning_rate": 9.749866842811186e-06, - "loss": 0.8333, + "epoch": 1.0560576108443942, + "grad_norm": 4.4109892943387585, + "learning_rate": 1.5037359334555097e-05, + "loss": 0.772, "step": 7479 }, { - "epoch": 1.5684629901446843, - "grad_norm": 6.48636418906607, - "learning_rate": 9.74760367297318e-06, - "loss": 0.7618, + "epoch": 1.05619881389438, + "grad_norm": 3.749373041522415, + "learning_rate": 1.5036042365616621e-05, + "loss": 0.6063, "step": 7480 }, { - "epoch": 1.5686726777102118, - "grad_norm": 7.248439210261221, - "learning_rate": 9.745340516070921e-06, - "loss": 0.7922, + "epoch": 1.056340016944366, + "grad_norm": 3.589771681614758, + "learning_rate": 1.5034725279642697e-05, + "loss": 0.6442, "step": 7481 }, { - "epoch": 1.568882365275739, - "grad_norm": 7.6972765277571416, - "learning_rate": 9.743077372220403e-06, - "loss": 0.772, + "epoch": 1.0564812199943519, + "grad_norm": 3.5482656259913314, + "learning_rate": 1.5033408076663932e-05, + "loss": 0.6308, "step": 7482 }, { - "epoch": 1.5690920528412664, - "grad_norm": 7.4256166688719665, - "learning_rate": 9.740814241537616e-06, - "loss": 0.7461, + "epoch": 1.0566224230443377, + "grad_norm": 3.848960554247446, + "learning_rate": 1.5032090756710935e-05, + "loss": 0.7211, "step": 7483 }, { - "epoch": 1.569301740406794, - "grad_norm": 6.6431578924629155, - "learning_rate": 9.738551124138548e-06, - "loss": 0.743, + "epoch": 1.0567636260943236, + "grad_norm": 3.8321164839995157, + "learning_rate": 1.5030773319814324e-05, + "loss": 0.7429, "step": 7484 }, { - "epoch": 1.5695114279723212, - "grad_norm": 7.329326809923285, - "learning_rate": 9.736288020139185e-06, - "loss": 0.8285, + "epoch": 1.0569048291443095, + "grad_norm": 3.958040827533154, + "learning_rate": 1.5029455766004713e-05, + "loss": 0.6684, "step": 7485 }, { - "epoch": 1.5697211155378485, - "grad_norm": 7.1880629119512545, - "learning_rate": 9.734024929655525e-06, - "loss": 0.773, + "epoch": 1.0570460321942954, + "grad_norm": 5.343076222815828, + "learning_rate": 1.502813809531272e-05, + "loss": 0.4492, "step": 7486 }, { - "epoch": 1.569930803103376, - "grad_norm": 5.582308808457554, - "learning_rate": 9.731761852803544e-06, - "loss": 0.6851, + "epoch": 1.0571872352442813, + "grad_norm": 3.2021279579511117, + "learning_rate": 1.5026820307768972e-05, + "loss": 0.535, "step": 7487 }, { - "epoch": 1.5701404906689034, - "grad_norm": 6.053800125006612, - "learning_rate": 9.729498789699234e-06, - "loss": 0.6111, + "epoch": 1.0573284382942671, + "grad_norm": 5.771382938756304, + "learning_rate": 1.5025502403404089e-05, + "loss": 0.7977, "step": 7488 }, { - "epoch": 1.5703501782344307, - "grad_norm": 7.219497942778008, - "learning_rate": 9.727235740458582e-06, - "loss": 0.6762, + "epoch": 1.057469641344253, + "grad_norm": 2.9854358368963556, + "learning_rate": 1.50241843822487e-05, + "loss": 0.5148, "step": 7489 }, { - "epoch": 1.5705598657999582, - "grad_norm": 8.521334094887038, - "learning_rate": 9.72497270519757e-06, - "loss": 0.8987, + "epoch": 1.057610844394239, + "grad_norm": 3.2948350321201114, + "learning_rate": 1.5022866244333438e-05, + "loss": 0.5334, "step": 7490 }, { - "epoch": 1.5707695533654853, - "grad_norm": 7.009516597054003, - "learning_rate": 9.722709684032182e-06, - "loss": 0.7874, + "epoch": 1.0577520474442248, + "grad_norm": 3.5060815020004092, + "learning_rate": 1.5021547989688932e-05, + "loss": 0.5158, "step": 7491 }, { - "epoch": 1.5709792409310128, - "grad_norm": 6.47722224926297, - "learning_rate": 9.720446677078406e-06, - "loss": 0.6186, + "epoch": 1.0578932504942107, + "grad_norm": 3.8088992684814125, + "learning_rate": 1.502022961834582e-05, + "loss": 0.6802, "step": 7492 }, { - "epoch": 1.5711889284965401, - "grad_norm": 8.663255608378202, - "learning_rate": 9.718183684452223e-06, - "loss": 0.8661, + "epoch": 1.0580344535441966, + "grad_norm": 4.058020841161868, + "learning_rate": 1.5018911130334743e-05, + "loss": 0.7878, "step": 7493 }, { - "epoch": 1.5713986160620674, - "grad_norm": 7.134489860552042, - "learning_rate": 9.715920706269611e-06, - "loss": 0.8573, + "epoch": 1.0581756565941824, + "grad_norm": 3.450488720826541, + "learning_rate": 1.5017592525686333e-05, + "loss": 0.5903, "step": 7494 }, { - "epoch": 1.571608303627595, - "grad_norm": 6.597442510139765, - "learning_rate": 9.713657742646559e-06, - "loss": 0.8199, + "epoch": 1.0583168596441683, + "grad_norm": 3.3564542121079, + "learning_rate": 1.5016273804431242e-05, + "loss": 0.6173, "step": 7495 }, { - "epoch": 1.5718179911931223, - "grad_norm": 7.461486140895039, - "learning_rate": 9.711394793699045e-06, - "loss": 0.7018, + "epoch": 1.0584580626941542, + "grad_norm": 3.8017849495120495, + "learning_rate": 1.5014954966600117e-05, + "loss": 0.6507, "step": 7496 }, { - "epoch": 1.5720276787586496, - "grad_norm": 7.488250612217129, - "learning_rate": 9.709131859543048e-06, - "loss": 0.7776, + "epoch": 1.05859926574414, + "grad_norm": 3.3315085431162275, + "learning_rate": 1.50136360122236e-05, + "loss": 0.559, "step": 7497 }, { - "epoch": 1.572237366324177, - "grad_norm": 6.307917033650578, - "learning_rate": 9.706868940294547e-06, - "loss": 0.5386, + "epoch": 1.058740468794126, + "grad_norm": 3.017688589943004, + "learning_rate": 1.501231694133235e-05, + "loss": 0.5167, "step": 7498 }, { - "epoch": 1.5724470538897042, - "grad_norm": 7.499994318930143, - "learning_rate": 9.704606036069522e-06, - "loss": 0.9418, + "epoch": 1.0588816718441119, + "grad_norm": 3.1276445663908765, + "learning_rate": 1.5010997753957019e-05, + "loss": 0.5682, "step": 7499 }, { - "epoch": 1.5726567414552317, - "grad_norm": 6.352712573145573, - "learning_rate": 9.702343146983948e-06, - "loss": 0.7557, + "epoch": 1.0590228748940977, + "grad_norm": 3.3594841172204335, + "learning_rate": 1.5009678450128263e-05, + "loss": 0.5415, "step": 7500 }, { - "epoch": 1.572866429020759, - "grad_norm": 7.459755694325853, - "learning_rate": 9.700080273153805e-06, - "loss": 0.8752, + "epoch": 1.0591640779440836, + "grad_norm": 3.4523451802869753, + "learning_rate": 1.5008359029876744e-05, + "loss": 0.6595, "step": 7501 }, { - "epoch": 1.5730761165862863, - "grad_norm": 8.010113058389537, - "learning_rate": 9.69781741469507e-06, - "loss": 0.888, + "epoch": 1.0593052809940695, + "grad_norm": 3.603985321563, + "learning_rate": 1.5007039493233123e-05, + "loss": 0.6279, "step": 7502 }, { - "epoch": 1.5732858041518138, - "grad_norm": 7.173241699338803, - "learning_rate": 9.695554571723716e-06, - "loss": 0.8885, + "epoch": 1.0594464840440554, + "grad_norm": 3.492555144892753, + "learning_rate": 1.5005719840228067e-05, + "loss": 0.5134, "step": 7503 }, { - "epoch": 1.5734954917173412, - "grad_norm": 5.617137493552929, - "learning_rate": 9.693291744355715e-06, - "loss": 0.4666, + "epoch": 1.0595876870940413, + "grad_norm": 3.4260397401173117, + "learning_rate": 1.5004400070892246e-05, + "loss": 0.6125, "step": 7504 }, { - "epoch": 1.5737051792828685, - "grad_norm": 8.085811460821102, - "learning_rate": 9.69102893270705e-06, - "loss": 1.0287, + "epoch": 1.0597288901440272, + "grad_norm": 3.282513387151086, + "learning_rate": 1.5003080185256325e-05, + "loss": 0.5451, "step": 7505 }, { - "epoch": 1.573914866848396, - "grad_norm": 6.538928147186817, - "learning_rate": 9.688766136893682e-06, - "loss": 0.6976, + "epoch": 1.059870093194013, + "grad_norm": 3.17519695623594, + "learning_rate": 1.5001760183350981e-05, + "loss": 0.5206, "step": 7506 }, { - "epoch": 1.5741245544139233, - "grad_norm": 5.7084207093394985, - "learning_rate": 9.68650335703159e-06, - "loss": 0.52, + "epoch": 1.060011296243999, + "grad_norm": 3.1958468776056734, + "learning_rate": 1.5000440065206894e-05, + "loss": 0.6137, "step": 7507 }, { - "epoch": 1.5743342419794506, - "grad_norm": 7.8446855839585385, - "learning_rate": 9.684240593236749e-06, - "loss": 0.7537, + "epoch": 1.0601524992939848, + "grad_norm": 3.04599694227866, + "learning_rate": 1.4999119830854739e-05, + "loss": 0.4917, "step": 7508 }, { - "epoch": 1.5745439295449781, - "grad_norm": 9.25493089398353, - "learning_rate": 9.681977845625123e-06, - "loss": 1.0807, + "epoch": 1.0602937023439707, + "grad_norm": 3.1309171298108986, + "learning_rate": 1.4997799480325198e-05, + "loss": 0.5512, "step": 7509 }, { - "epoch": 1.5747536171105052, - "grad_norm": 8.692973847412153, - "learning_rate": 9.67971511431268e-06, - "loss": 0.8971, + "epoch": 1.0604349053939566, + "grad_norm": 3.805606256651297, + "learning_rate": 1.4996479013648952e-05, + "loss": 0.7471, "step": 7510 }, { - "epoch": 1.5749633046760327, - "grad_norm": 6.976075381288707, - "learning_rate": 9.677452399415398e-06, - "loss": 0.7109, + "epoch": 1.0605761084439425, + "grad_norm": 2.8719624263945023, + "learning_rate": 1.4995158430856694e-05, + "loss": 0.4947, "step": 7511 }, { - "epoch": 1.57517299224156, - "grad_norm": 8.253237559108165, - "learning_rate": 9.67518970104924e-06, - "loss": 0.955, + "epoch": 1.0607173114939283, + "grad_norm": 3.6564176435247435, + "learning_rate": 1.499383773197911e-05, + "loss": 0.6395, "step": 7512 }, { - "epoch": 1.5753826798070873, - "grad_norm": 7.758732530833251, - "learning_rate": 9.67292701933017e-06, - "loss": 0.8814, + "epoch": 1.0608585145439142, + "grad_norm": 3.290271917881279, + "learning_rate": 1.4992516917046898e-05, + "loss": 0.6416, "step": 7513 }, { - "epoch": 1.5755923673726149, - "grad_norm": 8.06314963419161, - "learning_rate": 9.670664354374162e-06, - "loss": 0.859, + "epoch": 1.0609997175939, + "grad_norm": 3.53772107864924, + "learning_rate": 1.4991195986090744e-05, + "loss": 0.5964, "step": 7514 }, { - "epoch": 1.5758020549381422, - "grad_norm": 6.5901242393198105, - "learning_rate": 9.668401706297174e-06, - "loss": 0.6697, + "epoch": 1.061140920643886, + "grad_norm": 3.336683061733831, + "learning_rate": 1.498987493914135e-05, + "loss": 0.5567, "step": 7515 }, { - "epoch": 1.5760117425036695, - "grad_norm": 7.1728352680891145, - "learning_rate": 9.666139075215171e-06, - "loss": 0.7886, + "epoch": 1.0612821236938719, + "grad_norm": 3.3387703378662676, + "learning_rate": 1.4988553776229421e-05, + "loss": 0.5771, "step": 7516 }, { - "epoch": 1.576221430069197, - "grad_norm": 8.118085997577953, - "learning_rate": 9.663876461244124e-06, - "loss": 0.8422, + "epoch": 1.0614233267438578, + "grad_norm": 3.251470817094527, + "learning_rate": 1.4987232497385658e-05, + "loss": 0.6343, "step": 7517 }, { - "epoch": 1.576431117634724, - "grad_norm": 5.829857308472773, - "learning_rate": 9.66161386449999e-06, - "loss": 0.7008, + "epoch": 1.0615645297938436, + "grad_norm": 3.043507279995774, + "learning_rate": 1.4985911102640762e-05, + "loss": 0.5585, "step": 7518 }, { - "epoch": 1.5766408052002516, - "grad_norm": 7.376646148600872, - "learning_rate": 9.65935128509873e-06, - "loss": 0.6357, + "epoch": 1.0617057328438295, + "grad_norm": 3.681708052883947, + "learning_rate": 1.4984589592025447e-05, + "loss": 0.6069, "step": 7519 }, { - "epoch": 1.5768504927657792, - "grad_norm": 7.480249333462964, - "learning_rate": 9.657088723156313e-06, - "loss": 1.098, + "epoch": 1.0618469358938154, + "grad_norm": 3.366318051333044, + "learning_rate": 1.498326796557042e-05, + "loss": 0.6458, "step": 7520 }, { - "epoch": 1.5770601803313062, - "grad_norm": 7.377854134648045, - "learning_rate": 9.654826178788688e-06, - "loss": 1.0076, + "epoch": 1.0619881389438013, + "grad_norm": 3.600289380414608, + "learning_rate": 1.4981946223306403e-05, + "loss": 0.6498, "step": 7521 }, { - "epoch": 1.5772698678968338, - "grad_norm": 7.550957039288108, - "learning_rate": 9.652563652111821e-06, - "loss": 0.7002, + "epoch": 1.0621293419937872, + "grad_norm": 3.8605253117532525, + "learning_rate": 1.4980624365264103e-05, + "loss": 0.677, "step": 7522 }, { - "epoch": 1.577479555462361, - "grad_norm": 6.399844844795855, - "learning_rate": 9.650301143241673e-06, - "loss": 0.6634, + "epoch": 1.062270545043773, + "grad_norm": 3.994073151951099, + "learning_rate": 1.4979302391474243e-05, + "loss": 0.6897, "step": 7523 }, { - "epoch": 1.5776892430278884, - "grad_norm": 6.54201463432692, - "learning_rate": 9.648038652294195e-06, - "loss": 0.9115, + "epoch": 1.062411748093759, + "grad_norm": 4.09317896139282, + "learning_rate": 1.4977980301967549e-05, + "loss": 0.8015, "step": 7524 }, { - "epoch": 1.577898930593416, - "grad_norm": 7.176741895892891, - "learning_rate": 9.645776179385345e-06, - "loss": 0.7798, + "epoch": 1.0625529511437448, + "grad_norm": 3.1804775013132316, + "learning_rate": 1.4976658096774741e-05, + "loss": 0.5536, "step": 7525 }, { - "epoch": 1.5781086181589432, - "grad_norm": 6.150466206177959, - "learning_rate": 9.643513724631086e-06, - "loss": 0.6437, + "epoch": 1.0626941541937307, + "grad_norm": 3.9162123095840187, + "learning_rate": 1.4975335775926547e-05, + "loss": 0.6109, "step": 7526 }, { - "epoch": 1.5783183057244705, - "grad_norm": 7.060595301967234, - "learning_rate": 9.64125128814736e-06, - "loss": 0.7624, + "epoch": 1.0628353572437164, + "grad_norm": 3.5512062239734887, + "learning_rate": 1.4974013339453702e-05, + "loss": 0.6854, "step": 7527 }, { - "epoch": 1.578527993289998, - "grad_norm": 6.420338450913425, - "learning_rate": 9.638988870050132e-06, - "loss": 0.5972, + "epoch": 1.0629765602937022, + "grad_norm": 3.9867972009588706, + "learning_rate": 1.497269078738693e-05, + "loss": 0.6303, "step": 7528 }, { - "epoch": 1.5787376808555251, - "grad_norm": 6.19310094822929, - "learning_rate": 9.636726470455351e-06, - "loss": 0.7269, + "epoch": 1.0631177633436881, + "grad_norm": 3.4550987613491433, + "learning_rate": 1.4971368119756973e-05, + "loss": 0.6529, "step": 7529 }, { - "epoch": 1.5789473684210527, - "grad_norm": 6.931568484899664, - "learning_rate": 9.634464089478967e-06, - "loss": 0.7508, + "epoch": 1.063258966393674, + "grad_norm": 3.5634808439474503, + "learning_rate": 1.4970045336594571e-05, + "loss": 0.6055, "step": 7530 }, { - "epoch": 1.57915705598658, - "grad_norm": 8.96362334964029, - "learning_rate": 9.63220172723693e-06, - "loss": 0.809, + "epoch": 1.06340016944366, + "grad_norm": 3.034343028285317, + "learning_rate": 1.4968722437930458e-05, + "loss": 0.5048, "step": 7531 }, { - "epoch": 1.5793667435521073, - "grad_norm": 8.500684861990925, - "learning_rate": 9.629939383845198e-06, - "loss": 0.9827, + "epoch": 1.0635413724936458, + "grad_norm": 4.63345251180764, + "learning_rate": 1.4967399423795384e-05, + "loss": 0.7329, "step": 7532 }, { - "epoch": 1.5795764311176348, - "grad_norm": 6.261521623516531, - "learning_rate": 9.627677059419708e-06, - "loss": 0.7134, + "epoch": 1.0636825755436317, + "grad_norm": 3.1313017848389766, + "learning_rate": 1.4966076294220093e-05, + "loss": 0.5242, "step": 7533 }, { - "epoch": 1.579786118683162, - "grad_norm": 6.6534545299229135, - "learning_rate": 9.625414754076417e-06, - "loss": 0.7252, + "epoch": 1.0638237785936175, + "grad_norm": 3.8110378415908603, + "learning_rate": 1.4964753049235333e-05, + "loss": 0.6428, "step": 7534 }, { - "epoch": 1.5799958062486894, - "grad_norm": 6.1417236288698955, - "learning_rate": 9.623152467931273e-06, - "loss": 0.6064, + "epoch": 1.0639649816436034, + "grad_norm": 3.3063080129857627, + "learning_rate": 1.4963429688871856e-05, + "loss": 0.5411, "step": 7535 }, { - "epoch": 1.580205493814217, - "grad_norm": 6.385380118770043, - "learning_rate": 9.620890201100217e-06, - "loss": 0.7248, + "epoch": 1.0641061846935893, + "grad_norm": 3.365464060708699, + "learning_rate": 1.4962106213160415e-05, + "loss": 0.6883, "step": 7536 }, { - "epoch": 1.580415181379744, - "grad_norm": 7.402160771858727, - "learning_rate": 9.61862795369919e-06, - "loss": 0.9799, + "epoch": 1.0642473877435752, + "grad_norm": 4.020474233319959, + "learning_rate": 1.496078262213177e-05, + "loss": 0.7421, "step": 7537 }, { - "epoch": 1.5806248689452715, - "grad_norm": 6.230326862722612, - "learning_rate": 9.61636572584415e-06, - "loss": 0.671, + "epoch": 1.064388590793561, + "grad_norm": 3.485984328032362, + "learning_rate": 1.4959458915816681e-05, + "loss": 0.5488, "step": 7538 }, { - "epoch": 1.580834556510799, - "grad_norm": 7.575365523441947, - "learning_rate": 9.614103517651026e-06, - "loss": 0.8565, + "epoch": 1.064529793843547, + "grad_norm": 2.8794823734910504, + "learning_rate": 1.4958135094245904e-05, + "loss": 0.5286, "step": 7539 }, { - "epoch": 1.5810442440763262, - "grad_norm": 6.948627075638781, - "learning_rate": 9.611841329235768e-06, - "loss": 0.7565, + "epoch": 1.0646709968935328, + "grad_norm": 4.306493706420228, + "learning_rate": 1.495681115745021e-05, + "loss": 0.8215, "step": 7540 }, { - "epoch": 1.5812539316418537, - "grad_norm": 7.363592153405708, - "learning_rate": 9.609579160714318e-06, - "loss": 0.818, + "epoch": 1.0648121999435187, + "grad_norm": 4.300476451912054, + "learning_rate": 1.4955487105460366e-05, + "loss": 0.8642, "step": 7541 }, { - "epoch": 1.581463619207381, - "grad_norm": 7.83431952455529, - "learning_rate": 9.607317012202608e-06, - "loss": 0.8215, + "epoch": 1.0649534029935046, + "grad_norm": 3.442390099742231, + "learning_rate": 1.4954162938307143e-05, + "loss": 0.5555, "step": 7542 }, { - "epoch": 1.5816733067729083, - "grad_norm": 7.175861132628268, - "learning_rate": 9.605054883816584e-06, - "loss": 0.8704, + "epoch": 1.0650946060434905, + "grad_norm": 3.856525549539632, + "learning_rate": 1.4952838656021313e-05, + "loss": 0.6088, "step": 7543 }, { - "epoch": 1.5818829943384358, - "grad_norm": 6.493760770143072, - "learning_rate": 9.602792775672185e-06, - "loss": 0.7365, + "epoch": 1.0652358090934764, + "grad_norm": 3.20853720532316, + "learning_rate": 1.4951514258633652e-05, + "loss": 0.4813, "step": 7544 }, { - "epoch": 1.5820926819039631, - "grad_norm": 6.198319330361964, - "learning_rate": 9.600530687885344e-06, - "loss": 0.6595, + "epoch": 1.0653770121434623, + "grad_norm": 3.4231474508280884, + "learning_rate": 1.4950189746174936e-05, + "loss": 0.5431, "step": 7545 }, { - "epoch": 1.5823023694694904, - "grad_norm": 9.560676981398183, - "learning_rate": 9.598268620571996e-06, - "loss": 0.8659, + "epoch": 1.0655182151934481, + "grad_norm": 2.9608809082398095, + "learning_rate": 1.4948865118675948e-05, + "loss": 0.5577, "step": 7546 }, { - "epoch": 1.582512057035018, - "grad_norm": 7.366657585410217, - "learning_rate": 9.59600657384808e-06, - "loss": 0.8464, + "epoch": 1.065659418243434, + "grad_norm": 3.2823943586273927, + "learning_rate": 1.4947540376167476e-05, + "loss": 0.5265, "step": 7547 }, { - "epoch": 1.582721744600545, - "grad_norm": 7.2257100285519575, - "learning_rate": 9.593744547829526e-06, - "loss": 0.975, + "epoch": 1.06580062129342, + "grad_norm": 3.01082516576592, + "learning_rate": 1.4946215518680299e-05, + "loss": 0.5177, "step": 7548 }, { - "epoch": 1.5829314321660726, - "grad_norm": 6.397330127305164, - "learning_rate": 9.591482542632268e-06, - "loss": 0.7875, + "epoch": 1.0659418243434058, + "grad_norm": 3.6755709223995083, + "learning_rate": 1.4944890546245208e-05, + "loss": 0.7278, "step": 7549 }, { - "epoch": 1.5831411197315999, - "grad_norm": 5.951921373719367, - "learning_rate": 9.589220558372241e-06, - "loss": 0.5713, + "epoch": 1.0660830273933917, + "grad_norm": 3.874485351060745, + "learning_rate": 1.4943565458892999e-05, + "loss": 0.679, "step": 7550 }, { - "epoch": 1.5833508072971272, - "grad_norm": 6.814451252091627, - "learning_rate": 9.586958595165373e-06, - "loss": 0.7571, + "epoch": 1.0662242304433776, + "grad_norm": 3.03004440974935, + "learning_rate": 1.4942240256654463e-05, + "loss": 0.4616, "step": 7551 }, { - "epoch": 1.5835604948626547, - "grad_norm": 5.729014870120971, - "learning_rate": 9.584696653127589e-06, - "loss": 0.506, + "epoch": 1.0663654334933634, + "grad_norm": 3.3961447614169087, + "learning_rate": 1.4940914939560398e-05, + "loss": 0.6846, "step": 7552 }, { - "epoch": 1.583770182428182, - "grad_norm": 6.015967608269447, - "learning_rate": 9.582434732374823e-06, - "loss": 0.6572, + "epoch": 1.0665066365433493, + "grad_norm": 3.442757919854349, + "learning_rate": 1.4939589507641602e-05, + "loss": 0.5952, "step": 7553 }, { - "epoch": 1.5839798699937093, - "grad_norm": 7.131110594928549, - "learning_rate": 9.580172833023006e-06, - "loss": 0.9791, + "epoch": 1.0666478395933352, + "grad_norm": 3.4805179683929364, + "learning_rate": 1.4938263960928878e-05, + "loss": 0.5536, "step": 7554 }, { - "epoch": 1.5841895575592368, - "grad_norm": 6.396493916540819, - "learning_rate": 9.577910955188056e-06, - "loss": 0.6287, + "epoch": 1.066789042643321, + "grad_norm": 3.6988463737556154, + "learning_rate": 1.4936938299453038e-05, + "loss": 0.6101, "step": 7555 }, { - "epoch": 1.5843992451247642, - "grad_norm": 5.898334677538959, - "learning_rate": 9.5756490989859e-06, - "loss": 0.6404, + "epoch": 1.066930245693307, + "grad_norm": 3.7529118511670134, + "learning_rate": 1.493561252324488e-05, + "loss": 0.5991, "step": 7556 }, { - "epoch": 1.5846089326902915, - "grad_norm": 7.431070292088967, - "learning_rate": 9.573387264532469e-06, - "loss": 0.7677, + "epoch": 1.0670714487432928, + "grad_norm": 3.835308498155701, + "learning_rate": 1.4934286632335217e-05, + "loss": 0.6281, "step": 7557 }, { - "epoch": 1.584818620255819, - "grad_norm": 7.214486151736406, - "learning_rate": 9.571125451943674e-06, - "loss": 0.8318, + "epoch": 1.0672126517932787, + "grad_norm": 3.55306135812472, + "learning_rate": 1.4932960626754867e-05, + "loss": 0.6062, "step": 7558 }, { - "epoch": 1.585028307821346, - "grad_norm": 7.134508097611619, - "learning_rate": 9.568863661335447e-06, - "loss": 0.7964, + "epoch": 1.0673538548432646, + "grad_norm": 3.753758703565222, + "learning_rate": 1.493163450653464e-05, + "loss": 0.6067, "step": 7559 }, { - "epoch": 1.5852379953868736, - "grad_norm": 7.438777391646582, - "learning_rate": 9.566601892823706e-06, - "loss": 0.7829, + "epoch": 1.0674950578932505, + "grad_norm": 3.0307273525430176, + "learning_rate": 1.4930308271705357e-05, + "loss": 0.5583, "step": 7560 }, { - "epoch": 1.585447682952401, - "grad_norm": 8.35222137174369, - "learning_rate": 9.564340146524369e-06, - "loss": 0.7847, + "epoch": 1.0676362609432364, + "grad_norm": 4.007621045962966, + "learning_rate": 1.4928981922297842e-05, + "loss": 0.5929, "step": 7561 }, { - "epoch": 1.5856573705179282, - "grad_norm": 7.154482566491909, - "learning_rate": 9.562078422553352e-06, - "loss": 0.8228, + "epoch": 1.0677774639932223, + "grad_norm": 2.762194156328269, + "learning_rate": 1.4927655458342914e-05, + "loss": 0.4892, "step": 7562 }, { - "epoch": 1.5858670580834557, - "grad_norm": 10.21973009542303, - "learning_rate": 9.559816721026583e-06, - "loss": 0.8651, + "epoch": 1.0679186670432081, + "grad_norm": 3.4816142633936233, + "learning_rate": 1.49263288798714e-05, + "loss": 0.5714, "step": 7563 }, { - "epoch": 1.586076745648983, - "grad_norm": 6.0944646423921585, - "learning_rate": 9.557555042059963e-06, - "loss": 0.7973, + "epoch": 1.068059870093194, + "grad_norm": 3.8332577405031025, + "learning_rate": 1.4925002186914133e-05, + "loss": 0.6085, "step": 7564 }, { - "epoch": 1.5862864332145103, - "grad_norm": 7.033348811610297, - "learning_rate": 9.555293385769417e-06, - "loss": 0.7986, + "epoch": 1.06820107314318, + "grad_norm": 3.3468516587537693, + "learning_rate": 1.4923675379501939e-05, + "loss": 0.5322, "step": 7565 }, { - "epoch": 1.5864961207800379, - "grad_norm": 6.064191070214426, - "learning_rate": 9.553031752270858e-06, - "loss": 0.6064, + "epoch": 1.0683422761931658, + "grad_norm": 3.564624976409129, + "learning_rate": 1.4922348457665656e-05, + "loss": 0.5532, "step": 7566 }, { - "epoch": 1.586705808345565, - "grad_norm": 7.4518086165095925, - "learning_rate": 9.550770141680194e-06, - "loss": 1.0722, + "epoch": 1.0684834792431517, + "grad_norm": 2.5940057281703286, + "learning_rate": 1.492102142143612e-05, + "loss": 0.3667, "step": 7567 }, { - "epoch": 1.5869154959110925, - "grad_norm": 8.205309077106456, - "learning_rate": 9.548508554113339e-06, - "loss": 0.9551, + "epoch": 1.0686246822931376, + "grad_norm": 3.5447768501585206, + "learning_rate": 1.4919694270844176e-05, + "loss": 0.6436, "step": 7568 }, { - "epoch": 1.5871251834766198, - "grad_norm": 5.992382118360831, - "learning_rate": 9.546246989686207e-06, - "loss": 0.5532, + "epoch": 1.0687658853431234, + "grad_norm": 3.805530456573539, + "learning_rate": 1.491836700592066e-05, + "loss": 0.7294, "step": 7569 }, { - "epoch": 1.587334871042147, - "grad_norm": 6.145985110612443, - "learning_rate": 9.543985448514702e-06, - "loss": 0.5055, + "epoch": 1.0689070883931093, + "grad_norm": 3.149888999331618, + "learning_rate": 1.4917039626696416e-05, + "loss": 0.5156, "step": 7570 }, { - "epoch": 1.5875445586076746, - "grad_norm": 7.159649154096195, - "learning_rate": 9.541723930714734e-06, - "loss": 0.8941, + "epoch": 1.0690482914430952, + "grad_norm": 3.650028535373315, + "learning_rate": 1.4915712133202295e-05, + "loss": 0.6967, "step": 7571 }, { - "epoch": 1.587754246173202, - "grad_norm": 5.753477237891142, - "learning_rate": 9.53946243640221e-06, - "loss": 0.6239, + "epoch": 1.069189494493081, + "grad_norm": 3.635960047623998, + "learning_rate": 1.491438452546915e-05, + "loss": 0.5806, "step": 7572 }, { - "epoch": 1.5879639337387292, - "grad_norm": 5.684399582245832, - "learning_rate": 9.537200965693031e-06, - "loss": 0.6259, + "epoch": 1.069330697543067, + "grad_norm": 4.840031626334433, + "learning_rate": 1.491305680352783e-05, + "loss": 0.5131, "step": 7573 }, { - "epoch": 1.5881736213042568, - "grad_norm": 9.627128063925703, - "learning_rate": 9.53493951870311e-06, - "loss": 1.315, + "epoch": 1.0694719005930529, + "grad_norm": 3.154831540045117, + "learning_rate": 1.4911728967409189e-05, + "loss": 0.5363, "step": 7574 }, { - "epoch": 1.588383308869784, - "grad_norm": 8.099195278428454, - "learning_rate": 9.532678095548345e-06, - "loss": 0.6974, + "epoch": 1.0696131036430387, + "grad_norm": 3.8790833178624364, + "learning_rate": 1.4910401017144089e-05, + "loss": 0.7669, "step": 7575 }, { - "epoch": 1.5885929964353114, - "grad_norm": 7.342496324371607, - "learning_rate": 9.530416696344636e-06, - "loss": 0.8866, + "epoch": 1.0697543066930246, + "grad_norm": 2.9692260619469297, + "learning_rate": 1.4909072952763395e-05, + "loss": 0.4953, "step": 7576 }, { - "epoch": 1.588802684000839, - "grad_norm": 6.560006933999104, - "learning_rate": 9.528155321207885e-06, - "loss": 0.7291, + "epoch": 1.0698955097430105, + "grad_norm": 3.651285317662748, + "learning_rate": 1.4907744774297959e-05, + "loss": 0.5821, "step": 7577 }, { - "epoch": 1.589012371566366, - "grad_norm": 7.096134236351607, - "learning_rate": 9.525893970253997e-06, - "loss": 0.8271, + "epoch": 1.0700367127929964, + "grad_norm": 3.9534177845410423, + "learning_rate": 1.4906416481778656e-05, + "loss": 0.6126, "step": 7578 }, { - "epoch": 1.5892220591318935, - "grad_norm": 6.604334480487465, - "learning_rate": 9.523632643598858e-06, - "loss": 0.7784, + "epoch": 1.0701779158429823, + "grad_norm": 3.1297149509715614, + "learning_rate": 1.4905088075236354e-05, + "loss": 0.4664, "step": 7579 }, { - "epoch": 1.5894317466974208, - "grad_norm": 7.9393429132673665, - "learning_rate": 9.521371341358376e-06, - "loss": 0.6618, + "epoch": 1.0703191188929682, + "grad_norm": 3.5436653758979735, + "learning_rate": 1.4903759554701922e-05, + "loss": 0.6684, "step": 7580 }, { - "epoch": 1.5896414342629481, - "grad_norm": 7.3498354194543785, - "learning_rate": 9.519110063648445e-06, - "loss": 0.6907, + "epoch": 1.070460321942954, + "grad_norm": 5.656603129785557, + "learning_rate": 1.4902430920206237e-05, + "loss": 0.6696, "step": 7581 }, { - "epoch": 1.5898511218284757, - "grad_norm": 7.035034089666707, - "learning_rate": 9.516848810584955e-06, - "loss": 0.7239, + "epoch": 1.07060152499294, + "grad_norm": 3.27774201805249, + "learning_rate": 1.4901102171780175e-05, + "loss": 0.535, "step": 7582 }, { - "epoch": 1.590060809394003, - "grad_norm": 7.767931199004609, - "learning_rate": 9.514587582283799e-06, - "loss": 0.9196, + "epoch": 1.0707427280429258, + "grad_norm": 3.4019494472068295, + "learning_rate": 1.4899773309454612e-05, + "loss": 0.5839, "step": 7583 }, { - "epoch": 1.5902704969595303, - "grad_norm": 6.314748512294622, - "learning_rate": 9.51232637886088e-06, - "loss": 0.5313, + "epoch": 1.0708839310929117, + "grad_norm": 3.127608689751116, + "learning_rate": 1.4898444333260436e-05, + "loss": 0.4842, "step": 7584 }, { - "epoch": 1.5904801845250578, - "grad_norm": 6.789016078591631, - "learning_rate": 9.51006520043207e-06, - "loss": 0.558, + "epoch": 1.0710251341428976, + "grad_norm": 4.954692826458377, + "learning_rate": 1.489711524322853e-05, + "loss": 0.8708, "step": 7585 }, { - "epoch": 1.5906898720905849, - "grad_norm": 7.350103278827284, - "learning_rate": 9.507804047113272e-06, - "loss": 0.6295, + "epoch": 1.0711663371928835, + "grad_norm": 3.011817888229896, + "learning_rate": 1.4895786039389779e-05, + "loss": 0.5064, "step": 7586 }, { - "epoch": 1.5908995596561124, - "grad_norm": 6.9470304294354985, - "learning_rate": 9.505542919020372e-06, - "loss": 0.7123, + "epoch": 1.0713075402428693, + "grad_norm": 3.8146781213231145, + "learning_rate": 1.4894456721775074e-05, + "loss": 0.5515, "step": 7587 }, { - "epoch": 1.5911092472216397, - "grad_norm": 6.654879317505086, - "learning_rate": 9.503281816269253e-06, - "loss": 0.8382, + "epoch": 1.0714487432928552, + "grad_norm": 3.1272211140532247, + "learning_rate": 1.4893127290415312e-05, + "loss": 0.5242, "step": 7588 }, { - "epoch": 1.591318934787167, - "grad_norm": 7.225590537665633, - "learning_rate": 9.501020738975801e-06, - "loss": 0.9082, + "epoch": 1.071589946342841, + "grad_norm": 3.7911799193351046, + "learning_rate": 1.4891797745341382e-05, + "loss": 0.6034, "step": 7589 }, { - "epoch": 1.5915286223526945, - "grad_norm": 6.352394236644447, - "learning_rate": 9.498759687255905e-06, - "loss": 0.792, + "epoch": 1.071731149392827, + "grad_norm": 3.424871647402132, + "learning_rate": 1.4890468086584187e-05, + "loss": 0.5179, "step": 7590 }, { - "epoch": 1.5917383099182218, - "grad_norm": 7.242417276568485, - "learning_rate": 9.496498661225443e-06, - "loss": 0.7897, + "epoch": 1.0718723524428126, + "grad_norm": 4.230405734623407, + "learning_rate": 1.4889138314174622e-05, + "loss": 0.681, "step": 7591 }, { - "epoch": 1.5919479974837492, - "grad_norm": 7.209606441983618, - "learning_rate": 9.494237661000298e-06, - "loss": 0.7947, + "epoch": 1.0720135554927985, + "grad_norm": 4.4977089110824195, + "learning_rate": 1.4887808428143595e-05, + "loss": 0.6991, "step": 7592 }, { - "epoch": 1.5921576850492767, - "grad_norm": 8.484306295654152, - "learning_rate": 9.49197668669635e-06, - "loss": 1.0704, + "epoch": 1.0721547585427844, + "grad_norm": 3.8438138741702157, + "learning_rate": 1.4886478428522015e-05, + "loss": 0.6189, "step": 7593 }, { - "epoch": 1.592367372614804, - "grad_norm": 5.79309084056881, - "learning_rate": 9.489715738429479e-06, - "loss": 0.728, + "epoch": 1.0722959615927703, + "grad_norm": 3.1189362515146235, + "learning_rate": 1.4885148315340783e-05, + "loss": 0.5299, "step": 7594 }, { - "epoch": 1.5925770601803313, - "grad_norm": 6.221914843784227, - "learning_rate": 9.487454816315556e-06, - "loss": 0.6808, + "epoch": 1.0724371646427562, + "grad_norm": 3.399345304536633, + "learning_rate": 1.4883818088630814e-05, + "loss": 0.5906, "step": 7595 }, { - "epoch": 1.5927867477458588, - "grad_norm": 7.202143424080865, - "learning_rate": 9.48519392047047e-06, - "loss": 0.9228, + "epoch": 1.072578367692742, + "grad_norm": 4.281357910940498, + "learning_rate": 1.4882487748423025e-05, + "loss": 0.7668, "step": 7596 }, { - "epoch": 1.592996435311386, - "grad_norm": 7.629816182196042, - "learning_rate": 9.482933051010085e-06, - "loss": 0.5871, + "epoch": 1.072719570742728, + "grad_norm": 2.9761829070312777, + "learning_rate": 1.4881157294748326e-05, + "loss": 0.5936, "step": 7597 }, { - "epoch": 1.5932061228769134, - "grad_norm": 7.743035071591913, - "learning_rate": 9.480672208050278e-06, - "loss": 0.8004, + "epoch": 1.0728607737927138, + "grad_norm": 3.4519175520290486, + "learning_rate": 1.487982672763764e-05, + "loss": 0.6778, "step": 7598 }, { - "epoch": 1.5934158104424407, - "grad_norm": 7.143327766381275, - "learning_rate": 9.478411391706923e-06, - "loss": 0.7195, + "epoch": 1.0730019768426997, + "grad_norm": 3.6180887649020383, + "learning_rate": 1.487849604712189e-05, + "loss": 0.6057, "step": 7599 }, { - "epoch": 1.593625498007968, - "grad_norm": 8.063154950357209, - "learning_rate": 9.476150602095883e-06, - "loss": 0.9223, + "epoch": 1.0731431798926856, + "grad_norm": 3.1535660596412542, + "learning_rate": 1.4877165253231995e-05, + "loss": 0.5221, "step": 7600 }, { - "epoch": 1.5938351855734956, - "grad_norm": 6.998763510581478, - "learning_rate": 9.473889839333037e-06, - "loss": 0.6741, + "epoch": 1.0732843829426715, + "grad_norm": 4.608452945595066, + "learning_rate": 1.487583434599889e-05, + "loss": 0.697, "step": 7601 }, { - "epoch": 1.5940448731390229, - "grad_norm": 7.795093008316764, - "learning_rate": 9.471629103534252e-06, - "loss": 0.9485, + "epoch": 1.0734255859926574, + "grad_norm": 3.407972489161472, + "learning_rate": 1.48745033254535e-05, + "loss": 0.5867, "step": 7602 }, { - "epoch": 1.5942545607045502, - "grad_norm": 8.258486459264457, - "learning_rate": 9.469368394815391e-06, - "loss": 1.032, + "epoch": 1.0735667890426432, + "grad_norm": 3.492042085488486, + "learning_rate": 1.4873172191626758e-05, + "loss": 0.7386, "step": 7603 }, { - "epoch": 1.5944642482700777, - "grad_norm": 6.863645188415032, - "learning_rate": 9.467107713292315e-06, - "loss": 0.8017, + "epoch": 1.0737079920926291, + "grad_norm": 3.1550892529680574, + "learning_rate": 1.4871840944549596e-05, + "loss": 0.4928, "step": 7604 }, { - "epoch": 1.5946739358356048, - "grad_norm": 8.569780407977742, - "learning_rate": 9.4648470590809e-06, - "loss": 0.7837, + "epoch": 1.073849195142615, + "grad_norm": 3.2078855118346805, + "learning_rate": 1.4870509584252956e-05, + "loss": 0.5393, "step": 7605 }, { - "epoch": 1.5948836234011323, - "grad_norm": 7.916736903579657, - "learning_rate": 9.462586432296995e-06, - "loss": 0.797, + "epoch": 1.073990398192601, + "grad_norm": 3.086574489391303, + "learning_rate": 1.4869178110767777e-05, + "loss": 0.6672, "step": 7606 }, { - "epoch": 1.5950933109666596, - "grad_norm": 5.618496525361615, - "learning_rate": 9.46032583305647e-06, - "loss": 0.5166, + "epoch": 1.0741316012425868, + "grad_norm": 2.8514914840894803, + "learning_rate": 1.4867846524125e-05, + "loss": 0.6009, "step": 7607 }, { - "epoch": 1.595302998532187, - "grad_norm": 6.86309679295889, - "learning_rate": 9.458065261475184e-06, - "loss": 0.5524, + "epoch": 1.0742728042925727, + "grad_norm": 3.245570287100149, + "learning_rate": 1.4866514824355572e-05, + "loss": 0.554, "step": 7608 }, { - "epoch": 1.5955126860977145, - "grad_norm": 7.08751977868575, - "learning_rate": 9.455804717668993e-06, - "loss": 0.8004, + "epoch": 1.0744140073425585, + "grad_norm": 4.328808605990061, + "learning_rate": 1.4865183011490442e-05, + "loss": 0.7472, "step": 7609 }, { - "epoch": 1.5957223736632418, - "grad_norm": 7.941673010461113, - "learning_rate": 9.45354420175375e-06, - "loss": 0.7947, + "epoch": 1.0745552103925444, + "grad_norm": 3.2079907951634947, + "learning_rate": 1.4863851085560563e-05, + "loss": 0.5045, "step": 7610 }, { - "epoch": 1.595932061228769, - "grad_norm": 7.416563145582192, - "learning_rate": 9.451283713845321e-06, - "loss": 0.8703, + "epoch": 1.0746964134425303, + "grad_norm": 4.473656497834196, + "learning_rate": 1.4862519046596882e-05, + "loss": 0.854, "step": 7611 }, { - "epoch": 1.5961417487942966, - "grad_norm": 6.769307770647489, - "learning_rate": 9.449023254059547e-06, - "loss": 0.6543, + "epoch": 1.0748376164925162, + "grad_norm": 4.049502187332494, + "learning_rate": 1.4861186894630359e-05, + "loss": 0.6456, "step": 7612 }, { - "epoch": 1.596351436359824, - "grad_norm": 8.958574064433897, - "learning_rate": 9.446762822512289e-06, - "loss": 0.784, + "epoch": 1.074978819542502, + "grad_norm": 3.434216185523278, + "learning_rate": 1.4859854629691953e-05, + "loss": 0.6046, "step": 7613 }, { - "epoch": 1.5965611239253512, - "grad_norm": 7.001065084174718, - "learning_rate": 9.444502419319393e-06, - "loss": 0.6407, + "epoch": 1.075120022592488, + "grad_norm": 4.2654503052946815, + "learning_rate": 1.4858522251812621e-05, + "loss": 0.816, "step": 7614 }, { - "epoch": 1.5967708114908787, - "grad_norm": 6.811153271369026, - "learning_rate": 9.442242044596719e-06, - "loss": 0.7529, + "epoch": 1.0752612256424738, + "grad_norm": 3.189915906748601, + "learning_rate": 1.4857189761023333e-05, + "loss": 0.5261, "step": 7615 }, { - "epoch": 1.5969804990564058, - "grad_norm": 8.573629056599192, - "learning_rate": 9.439981698460101e-06, - "loss": 0.8493, + "epoch": 1.0754024286924597, + "grad_norm": 4.005094332110878, + "learning_rate": 1.485585715735505e-05, + "loss": 0.6334, "step": 7616 }, { - "epoch": 1.5971901866219333, - "grad_norm": 7.82309361622577, - "learning_rate": 9.437721381025395e-06, - "loss": 0.9077, + "epoch": 1.0755436317424456, + "grad_norm": 3.7451104276339318, + "learning_rate": 1.4854524440838747e-05, + "loss": 0.5723, "step": 7617 }, { - "epoch": 1.5973998741874607, - "grad_norm": 6.690752380500164, - "learning_rate": 9.435461092408445e-06, - "loss": 0.6738, + "epoch": 1.0756848347924315, + "grad_norm": 3.5316333793634973, + "learning_rate": 1.4853191611505391e-05, + "loss": 0.5165, "step": 7618 }, { - "epoch": 1.597609561752988, - "grad_norm": 8.078854930003832, - "learning_rate": 9.43320083272509e-06, - "loss": 0.8209, + "epoch": 1.0758260378424174, + "grad_norm": 3.755696230381059, + "learning_rate": 1.4851858669385956e-05, + "loss": 0.6229, "step": 7619 }, { - "epoch": 1.5978192493185155, - "grad_norm": 8.591697372307808, - "learning_rate": 9.430940602091174e-06, - "loss": 1.1009, + "epoch": 1.0759672408924033, + "grad_norm": 3.1475517592833606, + "learning_rate": 1.4850525614511427e-05, + "loss": 0.6059, "step": 7620 }, { - "epoch": 1.5980289368840428, - "grad_norm": 7.3563687001377165, - "learning_rate": 9.428680400622544e-06, - "loss": 0.8315, + "epoch": 1.0761084439423891, + "grad_norm": 3.609776004015949, + "learning_rate": 1.4849192446912772e-05, + "loss": 0.6284, "step": 7621 }, { - "epoch": 1.59823862444957, - "grad_norm": 7.989395837742091, - "learning_rate": 9.426420228435031e-06, - "loss": 0.9364, + "epoch": 1.076249646992375, + "grad_norm": 3.6821877459686037, + "learning_rate": 1.484785916662098e-05, + "loss": 0.6236, "step": 7622 }, { - "epoch": 1.5984483120150976, - "grad_norm": 6.770592746617301, - "learning_rate": 9.424160085644476e-06, - "loss": 0.7764, + "epoch": 1.076390850042361, + "grad_norm": 3.323719201610466, + "learning_rate": 1.4846525773667035e-05, + "loss": 0.5576, "step": 7623 }, { - "epoch": 1.5986579995806247, - "grad_norm": 6.020115962274509, - "learning_rate": 9.421899972366721e-06, - "loss": 0.7114, + "epoch": 1.0765320530923468, + "grad_norm": 3.3645919381406415, + "learning_rate": 1.4845192268081924e-05, + "loss": 0.6504, "step": 7624 }, { - "epoch": 1.5988676871461522, - "grad_norm": 6.647356031656806, - "learning_rate": 9.419639888717589e-06, - "loss": 0.6068, + "epoch": 1.0766732561423327, + "grad_norm": 3.5371097461597034, + "learning_rate": 1.4843858649896634e-05, + "loss": 0.7498, "step": 7625 }, { - "epoch": 1.5990773747116795, - "grad_norm": 5.4915670355416815, - "learning_rate": 9.417379834812918e-06, - "loss": 0.6227, + "epoch": 1.0768144591923186, + "grad_norm": 4.212588905954183, + "learning_rate": 1.4842524919142164e-05, + "loss": 0.5804, "step": 7626 }, { - "epoch": 1.5992870622772069, - "grad_norm": 6.509542323178818, - "learning_rate": 9.415119810768546e-06, - "loss": 0.7962, + "epoch": 1.0769556622423044, + "grad_norm": 3.7709444531599954, + "learning_rate": 1.484119107584951e-05, + "loss": 0.6788, "step": 7627 }, { - "epoch": 1.5994967498427344, - "grad_norm": 7.494419036308688, - "learning_rate": 9.412859816700296e-06, - "loss": 0.9367, + "epoch": 1.0770968652922903, + "grad_norm": 3.8216243941455184, + "learning_rate": 1.4839857120049658e-05, + "loss": 0.6369, "step": 7628 }, { - "epoch": 1.5997064374082617, - "grad_norm": 6.163821032850612, - "learning_rate": 9.410599852723999e-06, - "loss": 0.6636, + "epoch": 1.0772380683422762, + "grad_norm": 4.1985980718051605, + "learning_rate": 1.4838523051773623e-05, + "loss": 0.5482, "step": 7629 }, { - "epoch": 1.599916124973789, - "grad_norm": 6.201386977542586, - "learning_rate": 9.408339918955483e-06, - "loss": 0.5742, + "epoch": 1.077379271392262, + "grad_norm": 5.773035503138816, + "learning_rate": 1.4837188871052399e-05, + "loss": 0.8512, "step": 7630 }, { - "epoch": 1.6001258125393165, - "grad_norm": 6.58385629566131, - "learning_rate": 9.406080015510567e-06, - "loss": 0.7012, + "epoch": 1.077520474442248, + "grad_norm": 3.012776880298254, + "learning_rate": 1.4835854577916996e-05, + "loss": 0.5806, "step": 7631 }, { - "epoch": 1.6003355001048438, - "grad_norm": 8.09143711172415, - "learning_rate": 9.403820142505085e-06, - "loss": 0.9602, + "epoch": 1.0776616774922338, + "grad_norm": 3.2774031644410226, + "learning_rate": 1.4834520172398421e-05, + "loss": 0.503, "step": 7632 }, { - "epoch": 1.6005451876703711, - "grad_norm": 7.573456172268836, - "learning_rate": 9.401560300054856e-06, - "loss": 0.8303, + "epoch": 1.0778028805422197, + "grad_norm": 3.8412632221969587, + "learning_rate": 1.4833185654527684e-05, + "loss": 0.6788, "step": 7633 }, { - "epoch": 1.6007548752358987, - "grad_norm": 5.644970830874413, - "learning_rate": 9.399300488275698e-06, - "loss": 0.6366, + "epoch": 1.0779440835922056, + "grad_norm": 3.5161676741055756, + "learning_rate": 1.4831851024335799e-05, + "loss": 0.6225, "step": 7634 }, { - "epoch": 1.6009645628014257, - "grad_norm": 7.525339440261543, - "learning_rate": 9.397040707283429e-06, - "loss": 0.9075, + "epoch": 1.0780852866421915, + "grad_norm": 3.613079335783669, + "learning_rate": 1.4830516281853784e-05, + "loss": 0.5847, "step": 7635 }, { - "epoch": 1.6011742503669533, - "grad_norm": 6.877027376852609, - "learning_rate": 9.394780957193875e-06, - "loss": 0.8368, + "epoch": 1.0782264896921774, + "grad_norm": 3.3238132849414286, + "learning_rate": 1.482918142711266e-05, + "loss": 0.5302, "step": 7636 }, { - "epoch": 1.6013839379324806, - "grad_norm": 7.312198668623047, - "learning_rate": 9.392521238122841e-06, - "loss": 0.6836, + "epoch": 1.0783676927421633, + "grad_norm": 3.318184973899602, + "learning_rate": 1.482784646014344e-05, + "loss": 0.5835, "step": 7637 }, { - "epoch": 1.6015936254980079, - "grad_norm": 7.409437819147826, - "learning_rate": 9.39026155018615e-06, - "loss": 0.9561, + "epoch": 1.0785088957921491, + "grad_norm": 3.9292520173944765, + "learning_rate": 1.4826511380977155e-05, + "loss": 0.6874, "step": 7638 }, { - "epoch": 1.6018033130635354, - "grad_norm": 6.1472965616939845, - "learning_rate": 9.388001893499613e-06, - "loss": 0.7141, + "epoch": 1.078650098842135, + "grad_norm": 3.3281695228232873, + "learning_rate": 1.482517618964483e-05, + "loss": 0.6311, "step": 7639 }, { - "epoch": 1.6020130006290627, - "grad_norm": 8.147380707981435, - "learning_rate": 9.38574226817904e-06, - "loss": 1.0179, + "epoch": 1.078791301892121, + "grad_norm": 3.8660137373117585, + "learning_rate": 1.4823840886177494e-05, + "loss": 0.6056, "step": 7640 }, { - "epoch": 1.60222268819459, - "grad_norm": 6.27219879140679, - "learning_rate": 9.383482674340236e-06, - "loss": 0.6746, + "epoch": 1.0789325049421068, + "grad_norm": 3.332326544865899, + "learning_rate": 1.482250547060618e-05, + "loss": 0.4995, "step": 7641 }, { - "epoch": 1.6024323757601175, - "grad_norm": 6.576457475137217, - "learning_rate": 9.381223112099022e-06, - "loss": 0.6924, + "epoch": 1.0790737079920927, + "grad_norm": 3.452357178130149, + "learning_rate": 1.4821169942961922e-05, + "loss": 0.703, "step": 7642 }, { - "epoch": 1.6026420633256446, - "grad_norm": 6.816325966272055, - "learning_rate": 9.378963581571192e-06, - "loss": 0.8075, + "epoch": 1.0792149110420786, + "grad_norm": 2.8171779919595163, + "learning_rate": 1.4819834303275755e-05, + "loss": 0.441, "step": 7643 }, { - "epoch": 1.6028517508911722, - "grad_norm": 6.061143228797713, - "learning_rate": 9.376704082872557e-06, - "loss": 0.5856, + "epoch": 1.0793561140920644, + "grad_norm": 3.3903399785820847, + "learning_rate": 1.4818498551578721e-05, + "loss": 0.6046, "step": 7644 }, { - "epoch": 1.6030614384566997, - "grad_norm": 6.61247757902698, - "learning_rate": 9.37444461611892e-06, - "loss": 0.7368, + "epoch": 1.0794973171420503, + "grad_norm": 3.74799106392717, + "learning_rate": 1.481716268790186e-05, + "loss": 0.5454, "step": 7645 }, { - "epoch": 1.6032711260222268, - "grad_norm": 7.695586660506961, - "learning_rate": 9.37218518142608e-06, - "loss": 0.9097, + "epoch": 1.0796385201920362, + "grad_norm": 3.3196627393887974, + "learning_rate": 1.481582671227622e-05, + "loss": 0.622, "step": 7646 }, { - "epoch": 1.6034808135877543, - "grad_norm": 7.350334955326874, - "learning_rate": 9.369925778909834e-06, - "loss": 0.9832, + "epoch": 1.079779723242022, + "grad_norm": 3.221491836941583, + "learning_rate": 1.481449062473285e-05, + "loss": 0.5008, "step": 7647 }, { - "epoch": 1.6036905011532816, - "grad_norm": 7.966025178095759, - "learning_rate": 9.367666408685993e-06, - "loss": 0.9826, + "epoch": 1.079920926292008, + "grad_norm": 4.673303700394533, + "learning_rate": 1.4813154425302791e-05, + "loss": 0.7063, "step": 7648 }, { - "epoch": 1.603900188718809, - "grad_norm": 9.168642622306578, - "learning_rate": 9.36540707087034e-06, - "loss": 0.9032, + "epoch": 1.0800621293419939, + "grad_norm": 3.8820088138559576, + "learning_rate": 1.4811818114017106e-05, + "loss": 0.6317, "step": 7649 }, { - "epoch": 1.6041098762843364, - "grad_norm": 6.810058942069842, - "learning_rate": 9.363147765578677e-06, - "loss": 0.9252, + "epoch": 1.0802033323919797, + "grad_norm": 2.5476669310754465, + "learning_rate": 1.4810481690906846e-05, + "loss": 0.4809, "step": 7650 }, { - "epoch": 1.6043195638498637, - "grad_norm": 7.16135585593105, - "learning_rate": 9.360888492926796e-06, - "loss": 0.6588, + "epoch": 1.0803445354419656, + "grad_norm": 4.089025500660644, + "learning_rate": 1.4809145156003066e-05, + "loss": 0.6916, "step": 7651 }, { - "epoch": 1.604529251415391, - "grad_norm": 6.741823606793392, - "learning_rate": 9.358629253030486e-06, - "loss": 0.8774, + "epoch": 1.0804857384919515, + "grad_norm": 3.424099058394347, + "learning_rate": 1.4807808509336831e-05, + "loss": 0.7123, "step": 7652 }, { - "epoch": 1.6047389389809186, - "grad_norm": 6.75213300538725, - "learning_rate": 9.35637004600554e-06, - "loss": 0.7712, + "epoch": 1.0806269415419374, + "grad_norm": 2.7510376951055875, + "learning_rate": 1.4806471750939206e-05, + "loss": 0.4977, "step": 7653 }, { - "epoch": 1.6049486265464457, - "grad_norm": 6.249395764617356, - "learning_rate": 9.354110871967747e-06, - "loss": 0.6762, + "epoch": 1.0807681445919233, + "grad_norm": 4.012657112290507, + "learning_rate": 1.480513488084125e-05, + "loss": 0.6895, "step": 7654 }, { - "epoch": 1.6051583141119732, - "grad_norm": 5.5447810821287655, - "learning_rate": 9.351851731032892e-06, - "loss": 0.6068, + "epoch": 1.0809093476419092, + "grad_norm": 2.8220191558244068, + "learning_rate": 1.4803797899074035e-05, + "loss": 0.4297, "step": 7655 }, { - "epoch": 1.6053680016775005, - "grad_norm": 6.109286996690537, - "learning_rate": 9.349592623316758e-06, - "loss": 0.6901, + "epoch": 1.081050550691895, + "grad_norm": 3.1434667538784176, + "learning_rate": 1.480246080566863e-05, + "loss": 0.4945, "step": 7656 }, { - "epoch": 1.6055776892430278, - "grad_norm": 8.614548385817152, - "learning_rate": 9.347333548935134e-06, - "loss": 0.8464, + "epoch": 1.081191753741881, + "grad_norm": 3.501852560632229, + "learning_rate": 1.4801123600656114e-05, + "loss": 0.6198, "step": 7657 }, { - "epoch": 1.6057873768085553, - "grad_norm": 7.324001671689483, - "learning_rate": 9.34507450800379e-06, - "loss": 0.5875, + "epoch": 1.0813329567918668, + "grad_norm": 3.320461502862783, + "learning_rate": 1.4799786284067554e-05, + "loss": 0.6715, "step": 7658 }, { - "epoch": 1.6059970643740826, - "grad_norm": 8.369331799018477, - "learning_rate": 9.34281550063852e-06, - "loss": 0.8612, + "epoch": 1.0814741598418527, + "grad_norm": 3.0165389680699253, + "learning_rate": 1.4798448855934035e-05, + "loss": 0.5479, "step": 7659 }, { - "epoch": 1.60620675193961, - "grad_norm": 11.079365352820734, - "learning_rate": 9.340556526955095e-06, - "loss": 1.053, + "epoch": 1.0816153628918386, + "grad_norm": 2.947107089699705, + "learning_rate": 1.4797111316286639e-05, + "loss": 0.5798, "step": 7660 }, { - "epoch": 1.6064164395051375, - "grad_norm": 6.492289935691345, - "learning_rate": 9.33829758706929e-06, - "loss": 0.8331, + "epoch": 1.0817565659418245, + "grad_norm": 3.3977708368612065, + "learning_rate": 1.4795773665156448e-05, + "loss": 0.5796, "step": 7661 }, { - "epoch": 1.6066261270706645, - "grad_norm": 6.264903971811581, - "learning_rate": 9.336038681096878e-06, - "loss": 0.6872, + "epoch": 1.0818977689918103, + "grad_norm": 3.5144869824496965, + "learning_rate": 1.4794435902574543e-05, + "loss": 0.6682, "step": 7662 }, { - "epoch": 1.606835814636192, - "grad_norm": 6.455794627783083, - "learning_rate": 9.333779809153644e-06, - "loss": 0.8051, + "epoch": 1.082038972041796, + "grad_norm": 3.8371186182317394, + "learning_rate": 1.4793098028572024e-05, + "loss": 0.6418, "step": 7663 }, { - "epoch": 1.6070455022017196, - "grad_norm": 7.462378459358794, - "learning_rate": 9.33152097135534e-06, - "loss": 0.871, + "epoch": 1.0821801750917819, + "grad_norm": 2.8775446069495643, + "learning_rate": 1.4791760043179975e-05, + "loss": 0.5117, "step": 7664 }, { - "epoch": 1.6072551897672467, - "grad_norm": 7.138445784854093, - "learning_rate": 9.329262167817752e-06, - "loss": 0.8272, + "epoch": 1.0823213781417678, + "grad_norm": 3.6190774549901055, + "learning_rate": 1.4790421946429491e-05, + "loss": 0.6588, "step": 7665 }, { - "epoch": 1.6074648773327742, - "grad_norm": 6.689626612675976, - "learning_rate": 9.327003398656642e-06, - "loss": 0.7404, + "epoch": 1.0824625811917536, + "grad_norm": 3.7124544137249464, + "learning_rate": 1.478908373835167e-05, + "loss": 0.6459, "step": 7666 }, { - "epoch": 1.6076745648983015, - "grad_norm": 7.39643293709871, - "learning_rate": 9.324744663987773e-06, - "loss": 0.943, + "epoch": 1.0826037842417395, + "grad_norm": 2.847208039224612, + "learning_rate": 1.4787745418977612e-05, + "loss": 0.4707, "step": 7667 }, { - "epoch": 1.6078842524638288, - "grad_norm": 7.011284641014348, - "learning_rate": 9.322485963926909e-06, - "loss": 0.7704, + "epoch": 1.0827449872917254, + "grad_norm": 3.0652538058129335, + "learning_rate": 1.4786406988338415e-05, + "loss": 0.5315, "step": 7668 }, { - "epoch": 1.6080939400293564, - "grad_norm": 8.530566651447783, - "learning_rate": 9.320227298589819e-06, - "loss": 0.8044, + "epoch": 1.0828861903417113, + "grad_norm": 3.917785567250713, + "learning_rate": 1.4785068446465189e-05, + "loss": 0.7106, "step": 7669 }, { - "epoch": 1.6083036275948837, - "grad_norm": 5.936296176278467, - "learning_rate": 9.317968668092254e-06, - "loss": 0.6623, + "epoch": 1.0830273933916972, + "grad_norm": 3.5715054087861544, + "learning_rate": 1.4783729793389043e-05, + "loss": 0.7457, "step": 7670 }, { - "epoch": 1.608513315160411, - "grad_norm": 5.9878794218950615, - "learning_rate": 9.31571007254998e-06, - "loss": 0.5952, + "epoch": 1.083168596441683, + "grad_norm": 3.37483754931424, + "learning_rate": 1.4782391029141077e-05, + "loss": 0.5401, "step": 7671 }, { - "epoch": 1.6087230027259385, - "grad_norm": 6.8333450404266545, - "learning_rate": 9.313451512078755e-06, - "loss": 0.7791, + "epoch": 1.083309799491669, + "grad_norm": 3.588543601734951, + "learning_rate": 1.4781052153752411e-05, + "loss": 0.5628, "step": 7672 }, { - "epoch": 1.6089326902914656, - "grad_norm": 8.583954802582612, - "learning_rate": 9.311192986794327e-06, - "loss": 0.9966, + "epoch": 1.0834510025416548, + "grad_norm": 3.6425670995466923, + "learning_rate": 1.4779713167254157e-05, + "loss": 0.6412, "step": 7673 }, { - "epoch": 1.609142377856993, - "grad_norm": 7.44724023065807, - "learning_rate": 9.30893449681245e-06, - "loss": 1.1628, + "epoch": 1.0835922055916407, + "grad_norm": 3.632937290511941, + "learning_rate": 1.4778374069677435e-05, + "loss": 0.5632, "step": 7674 }, { - "epoch": 1.6093520654225204, - "grad_norm": 8.696459747858393, - "learning_rate": 9.306676042248883e-06, - "loss": 0.6441, + "epoch": 1.0837334086416266, + "grad_norm": 3.943526049199452, + "learning_rate": 1.477703486105336e-05, + "loss": 0.6971, "step": 7675 }, { - "epoch": 1.6095617529880477, - "grad_norm": 6.42364527281743, - "learning_rate": 9.304417623219373e-06, - "loss": 0.7912, + "epoch": 1.0838746116916125, + "grad_norm": 3.692651928110072, + "learning_rate": 1.4775695541413063e-05, + "loss": 0.5641, "step": 7676 }, { - "epoch": 1.6097714405535752, - "grad_norm": 5.694052347505275, - "learning_rate": 9.302159239839663e-06, - "loss": 0.4556, + "epoch": 1.0840158147415984, + "grad_norm": 3.634161232944167, + "learning_rate": 1.4774356110787657e-05, + "loss": 0.6334, "step": 7677 }, { - "epoch": 1.6099811281191025, - "grad_norm": 6.234753324298042, - "learning_rate": 9.299900892225501e-06, - "loss": 0.7282, + "epoch": 1.0841570177915842, + "grad_norm": 3.142057361186876, + "learning_rate": 1.4773016569208283e-05, + "loss": 0.5813, "step": 7678 }, { - "epoch": 1.6101908156846299, - "grad_norm": 5.74387733448822, - "learning_rate": 9.297642580492636e-06, - "loss": 0.6228, + "epoch": 1.0842982208415701, + "grad_norm": 3.1806693785983775, + "learning_rate": 1.4771676916706063e-05, + "loss": 0.5318, "step": 7679 }, { - "epoch": 1.6104005032501574, - "grad_norm": 8.161814630229511, - "learning_rate": 9.295384304756807e-06, - "loss": 0.856, + "epoch": 1.084439423891556, + "grad_norm": 4.837396753903249, + "learning_rate": 1.4770337153312131e-05, + "loss": 0.7523, "step": 7680 }, { - "epoch": 1.6106101908156847, - "grad_norm": 7.547190616900892, - "learning_rate": 9.293126065133752e-06, - "loss": 0.7523, + "epoch": 1.084580626941542, + "grad_norm": 3.9395758528665388, + "learning_rate": 1.4768997279057624e-05, + "loss": 0.5592, "step": 7681 }, { - "epoch": 1.610819878381212, - "grad_norm": 7.194895647727988, - "learning_rate": 9.290867861739214e-06, - "loss": 0.7067, + "epoch": 1.0847218299915278, + "grad_norm": 3.3969029060507054, + "learning_rate": 1.476765729397368e-05, + "loss": 0.6799, "step": 7682 }, { - "epoch": 1.6110295659467395, - "grad_norm": 6.073590239822679, - "learning_rate": 9.288609694688926e-06, - "loss": 0.6457, + "epoch": 1.0848630330415137, + "grad_norm": 3.7169186205491394, + "learning_rate": 1.476631719809144e-05, + "loss": 0.6307, "step": 7683 }, { - "epoch": 1.6112392535122666, - "grad_norm": 5.904870026897326, - "learning_rate": 9.286351564098626e-06, - "loss": 0.6146, + "epoch": 1.0850042360914995, + "grad_norm": 3.290156896305463, + "learning_rate": 1.4764976991442045e-05, + "loss": 0.6364, "step": 7684 }, { - "epoch": 1.6114489410777941, - "grad_norm": 7.807972922699126, - "learning_rate": 9.284093470084049e-06, - "loss": 0.7418, + "epoch": 1.0851454391414854, + "grad_norm": 3.864198070394517, + "learning_rate": 1.4763636674056646e-05, + "loss": 0.6885, "step": 7685 }, { - "epoch": 1.6116586286433214, - "grad_norm": 6.635005892113704, - "learning_rate": 9.28183541276092e-06, - "loss": 0.6635, + "epoch": 1.0852866421914713, + "grad_norm": 4.083706067313782, + "learning_rate": 1.4762296245966387e-05, + "loss": 0.688, "step": 7686 }, { - "epoch": 1.6118683162088487, - "grad_norm": 6.588851396234455, - "learning_rate": 9.279577392244971e-06, - "loss": 0.7607, + "epoch": 1.0854278452414572, + "grad_norm": 3.6715130743360023, + "learning_rate": 1.476095570720242e-05, + "loss": 0.675, "step": 7687 }, { - "epoch": 1.6120780037743763, - "grad_norm": 6.254578303206539, - "learning_rate": 9.277319408651934e-06, - "loss": 0.708, + "epoch": 1.085569048291443, + "grad_norm": 4.313387898771667, + "learning_rate": 1.4759615057795895e-05, + "loss": 0.669, "step": 7688 }, { - "epoch": 1.6122876913399036, - "grad_norm": 7.459935497985122, - "learning_rate": 9.275061462097525e-06, - "loss": 1.0482, + "epoch": 1.085710251341429, + "grad_norm": 3.368996217545799, + "learning_rate": 1.4758274297777974e-05, + "loss": 0.5682, "step": 7689 }, { - "epoch": 1.6124973789054309, - "grad_norm": 6.847227334735842, - "learning_rate": 9.272803552697477e-06, - "loss": 0.736, + "epoch": 1.0858514543914148, + "grad_norm": 2.6796123196495256, + "learning_rate": 1.4756933427179814e-05, + "loss": 0.4548, "step": 7690 }, { - "epoch": 1.6127070664709584, - "grad_norm": 6.202760044137037, - "learning_rate": 9.27054568056751e-06, - "loss": 0.5946, + "epoch": 1.0859926574414007, + "grad_norm": 3.4924615278274955, + "learning_rate": 1.4755592446032576e-05, + "loss": 0.5272, "step": 7691 }, { - "epoch": 1.6129167540364855, - "grad_norm": 6.186885385079247, - "learning_rate": 9.268287845823339e-06, - "loss": 0.7232, + "epoch": 1.0861338604913866, + "grad_norm": 3.6780020580981767, + "learning_rate": 1.475425135436742e-05, + "loss": 0.5829, "step": 7692 }, { - "epoch": 1.613126441602013, - "grad_norm": 6.712424454639081, - "learning_rate": 9.266030048580685e-06, - "loss": 0.821, + "epoch": 1.0862750635413725, + "grad_norm": 3.356910711620978, + "learning_rate": 1.475291015221552e-05, + "loss": 0.588, "step": 7693 }, { - "epoch": 1.6133361291675403, - "grad_norm": 8.081996553281089, - "learning_rate": 9.263772288955269e-06, - "loss": 1.1096, + "epoch": 1.0864162665913584, + "grad_norm": 4.3427998415794065, + "learning_rate": 1.4751568839608036e-05, + "loss": 0.8802, "step": 7694 }, { - "epoch": 1.6135458167330676, - "grad_norm": 8.163160732650868, - "learning_rate": 9.261514567062792e-06, - "loss": 0.9729, + "epoch": 1.0865574696413443, + "grad_norm": 2.826537870391396, + "learning_rate": 1.475022741657615e-05, + "loss": 0.4491, "step": 7695 }, { - "epoch": 1.6137555042985952, - "grad_norm": 6.819508470747861, - "learning_rate": 9.259256883018979e-06, - "loss": 0.7247, + "epoch": 1.0866986726913301, + "grad_norm": 3.7640186052150786, + "learning_rate": 1.4748885883151028e-05, + "loss": 0.6987, "step": 7696 }, { - "epoch": 1.6139651918641225, - "grad_norm": 7.1199469147694625, - "learning_rate": 9.256999236939537e-06, - "loss": 0.862, + "epoch": 1.086839875741316, + "grad_norm": 2.8545900324697633, + "learning_rate": 1.4747544239363846e-05, + "loss": 0.507, "step": 7697 }, { - "epoch": 1.6141748794296498, - "grad_norm": 7.3173584164444785, - "learning_rate": 9.25474162894017e-06, - "loss": 0.5913, + "epoch": 1.086981078791302, + "grad_norm": 3.9918760572072833, + "learning_rate": 1.4746202485245789e-05, + "loss": 0.7448, "step": 7698 }, { - "epoch": 1.6143845669951773, - "grad_norm": 6.337892406269405, - "learning_rate": 9.252484059136586e-06, - "loss": 0.5719, + "epoch": 1.0871222818412878, + "grad_norm": 4.128515412270134, + "learning_rate": 1.4744860620828034e-05, + "loss": 0.7309, "step": 7699 }, { - "epoch": 1.6145942545607046, - "grad_norm": 6.646045209862763, - "learning_rate": 9.250226527644493e-06, - "loss": 0.7902, + "epoch": 1.0872634848912737, + "grad_norm": 3.73825083604541, + "learning_rate": 1.474351864614177e-05, + "loss": 0.5917, "step": 7700 }, { - "epoch": 1.614803942126232, - "grad_norm": 6.752093276979535, - "learning_rate": 9.24796903457959e-06, - "loss": 0.6803, + "epoch": 1.0874046879412596, + "grad_norm": 3.1329917887241403, + "learning_rate": 1.474217656121818e-05, + "loss": 0.6208, "step": 7701 }, { - "epoch": 1.6150136296917594, - "grad_norm": 7.643069592921631, - "learning_rate": 9.245711580057578e-06, - "loss": 0.7636, + "epoch": 1.0875458909912454, + "grad_norm": 3.6308912832373754, + "learning_rate": 1.4740834366088454e-05, + "loss": 0.6721, "step": 7702 }, { - "epoch": 1.6152233172572865, - "grad_norm": 6.454717226082348, - "learning_rate": 9.243454164194158e-06, - "loss": 0.6316, + "epoch": 1.0876870940412313, + "grad_norm": 4.178957061798229, + "learning_rate": 1.4739492060783787e-05, + "loss": 0.6853, "step": 7703 }, { - "epoch": 1.615433004822814, - "grad_norm": 6.7170588879563615, - "learning_rate": 9.241196787105022e-06, - "loss": 0.7073, + "epoch": 1.0878282970912172, + "grad_norm": 3.295400953874177, + "learning_rate": 1.4738149645335369e-05, + "loss": 0.5849, "step": 7704 }, { - "epoch": 1.6156426923883414, - "grad_norm": 6.914804418408023, - "learning_rate": 9.238939448905865e-06, - "loss": 0.7462, + "epoch": 1.087969500141203, + "grad_norm": 3.377253858923734, + "learning_rate": 1.47368071197744e-05, + "loss": 0.6513, "step": 7705 }, { - "epoch": 1.6158523799538687, - "grad_norm": 6.18811509733816, - "learning_rate": 9.236682149712386e-06, - "loss": 0.6062, + "epoch": 1.088110703191189, + "grad_norm": 3.182587799336959, + "learning_rate": 1.4735464484132079e-05, + "loss": 0.5257, "step": 7706 }, { - "epoch": 1.6160620675193962, - "grad_norm": 8.158439158376162, - "learning_rate": 9.234424889640266e-06, - "loss": 0.7235, + "epoch": 1.0882519062411748, + "grad_norm": 3.147312263271996, + "learning_rate": 1.473412173843961e-05, + "loss": 0.5661, "step": 7707 }, { - "epoch": 1.6162717550849235, - "grad_norm": 6.435878491576857, - "learning_rate": 9.232167668805202e-06, - "loss": 0.7737, + "epoch": 1.0883931092911607, + "grad_norm": 3.081011010253514, + "learning_rate": 1.4732778882728193e-05, + "loss": 0.5715, "step": 7708 }, { - "epoch": 1.6164814426504508, - "grad_norm": 6.668656512325095, - "learning_rate": 9.229910487322876e-06, - "loss": 0.5984, + "epoch": 1.0885343123411466, + "grad_norm": 11.539098034700615, + "learning_rate": 1.473143591702904e-05, + "loss": 0.8634, "step": 7709 }, { - "epoch": 1.6166911302159783, - "grad_norm": 6.845463158922423, - "learning_rate": 9.22765334530897e-06, - "loss": 0.6837, + "epoch": 1.0886755153911325, + "grad_norm": 4.211549305181795, + "learning_rate": 1.4730092841373362e-05, + "loss": 0.6738, "step": 7710 }, { - "epoch": 1.6169008177815054, - "grad_norm": 6.389256813563026, - "learning_rate": 9.225396242879171e-06, - "loss": 0.6051, + "epoch": 1.0888167184411184, + "grad_norm": 3.2229816773682938, + "learning_rate": 1.4728749655792367e-05, + "loss": 0.5335, "step": 7711 }, { - "epoch": 1.617110505347033, - "grad_norm": 5.680118568209008, - "learning_rate": 9.223139180149161e-06, - "loss": 0.4734, + "epoch": 1.0889579214911043, + "grad_norm": 3.840789445851332, + "learning_rate": 1.4727406360317274e-05, + "loss": 0.6091, "step": 7712 }, { - "epoch": 1.6173201929125602, - "grad_norm": 9.2518072146989, - "learning_rate": 9.220882157234613e-06, - "loss": 1.0221, + "epoch": 1.0890991245410901, + "grad_norm": 3.992168857421405, + "learning_rate": 1.4726062954979296e-05, + "loss": 0.7044, "step": 7713 }, { - "epoch": 1.6175298804780875, - "grad_norm": 6.583159640906543, - "learning_rate": 9.218625174251203e-06, - "loss": 0.7512, + "epoch": 1.089240327591076, + "grad_norm": 3.412593876722597, + "learning_rate": 1.4724719439809659e-05, + "loss": 0.601, "step": 7714 }, { - "epoch": 1.617739568043615, - "grad_norm": 6.778190902839815, - "learning_rate": 9.216368231314616e-06, - "loss": 0.5849, + "epoch": 1.089381530641062, + "grad_norm": 3.40264751201409, + "learning_rate": 1.472337581483958e-05, + "loss": 0.589, "step": 7715 }, { - "epoch": 1.6179492556091424, - "grad_norm": 6.1380748567697525, - "learning_rate": 9.21411132854051e-06, - "loss": 0.5401, + "epoch": 1.0895227336910478, + "grad_norm": 4.227426778325474, + "learning_rate": 1.472203208010029e-05, + "loss": 0.6007, "step": 7716 }, { - "epoch": 1.6181589431746697, - "grad_norm": 7.266616727456601, - "learning_rate": 9.211854466044561e-06, - "loss": 0.6502, + "epoch": 1.0896639367410337, + "grad_norm": 3.315217980428389, + "learning_rate": 1.472068823562301e-05, + "loss": 0.6216, "step": 7717 }, { - "epoch": 1.6183686307401972, - "grad_norm": 7.306867193447994, - "learning_rate": 9.209597643942442e-06, - "loss": 0.6686, + "epoch": 1.0898051397910196, + "grad_norm": 3.305687771359706, + "learning_rate": 1.4719344281438977e-05, + "loss": 0.5749, "step": 7718 }, { - "epoch": 1.6185783183057245, - "grad_norm": 8.986654667226633, - "learning_rate": 9.207340862349812e-06, - "loss": 0.5342, + "epoch": 1.0899463428410054, + "grad_norm": 3.66761936308962, + "learning_rate": 1.471800021757942e-05, + "loss": 0.5962, "step": 7719 }, { - "epoch": 1.6187880058712518, - "grad_norm": 7.330866395700292, - "learning_rate": 9.205084121382334e-06, - "loss": 0.8546, + "epoch": 1.0900875458909913, + "grad_norm": 3.506245668525747, + "learning_rate": 1.4716656044075577e-05, + "loss": 0.4483, "step": 7720 }, { - "epoch": 1.6189976934367794, - "grad_norm": 8.72673438380192, - "learning_rate": 9.202827421155683e-06, - "loss": 0.8632, + "epoch": 1.0902287489409772, + "grad_norm": 3.6671245744582923, + "learning_rate": 1.4715311760958682e-05, + "loss": 0.6775, "step": 7721 }, { - "epoch": 1.6192073810023064, - "grad_norm": 6.702695749450438, - "learning_rate": 9.200570761785501e-06, - "loss": 0.5613, + "epoch": 1.090369951990963, + "grad_norm": 2.8927250101618287, + "learning_rate": 1.4713967368259981e-05, + "loss": 0.4471, "step": 7722 }, { - "epoch": 1.619417068567834, - "grad_norm": 6.616371381299311, - "learning_rate": 9.198314143387455e-06, - "loss": 0.6279, + "epoch": 1.090511155040949, + "grad_norm": 3.7355088319410683, + "learning_rate": 1.4712622866010709e-05, + "loss": 0.5822, "step": 7723 }, { - "epoch": 1.6196267561333613, - "grad_norm": 6.369013747765007, - "learning_rate": 9.196057566077203e-06, - "loss": 0.7452, + "epoch": 1.0906523580909349, + "grad_norm": 3.1659860512365414, + "learning_rate": 1.471127825424212e-05, + "loss": 0.5709, "step": 7724 }, { - "epoch": 1.6198364436988886, - "grad_norm": 7.613138906783497, - "learning_rate": 9.193801029970393e-06, - "loss": 0.9167, + "epoch": 1.0907935611409207, + "grad_norm": 3.400972442962781, + "learning_rate": 1.4709933532985458e-05, + "loss": 0.5153, "step": 7725 }, { - "epoch": 1.620046131264416, - "grad_norm": 9.037988270623964, - "learning_rate": 9.191544535182676e-06, - "loss": 0.8354, + "epoch": 1.0909347641909066, + "grad_norm": 3.660754240848205, + "learning_rate": 1.4708588702271978e-05, + "loss": 0.6312, "step": 7726 }, { - "epoch": 1.6202558188299434, - "grad_norm": 8.455700302451687, - "learning_rate": 9.189288081829707e-06, - "loss": 0.9112, + "epoch": 1.0910759672408923, + "grad_norm": 3.4956945888443967, + "learning_rate": 1.4707243762132927e-05, + "loss": 0.6452, "step": 7727 }, { - "epoch": 1.6204655063954707, - "grad_norm": 7.0709877123798766, - "learning_rate": 9.18703167002713e-06, - "loss": 0.82, + "epoch": 1.0912171702908782, + "grad_norm": 4.419085105925884, + "learning_rate": 1.4705898712599563e-05, + "loss": 0.8022, "step": 7728 }, { - "epoch": 1.6206751939609982, - "grad_norm": 5.833403692857144, - "learning_rate": 9.184775299890589e-06, - "loss": 0.6453, + "epoch": 1.091358373340864, + "grad_norm": 3.2879254336430064, + "learning_rate": 1.4704553553703148e-05, + "loss": 0.4783, "step": 7729 }, { - "epoch": 1.6208848815265253, - "grad_norm": 6.505698151080973, - "learning_rate": 9.18251897153573e-06, - "loss": 0.8114, + "epoch": 1.09149957639085, + "grad_norm": 3.9258437449185006, + "learning_rate": 1.4703208285474942e-05, + "loss": 0.6026, "step": 7730 }, { - "epoch": 1.6210945690920529, - "grad_norm": 7.211377931681865, - "learning_rate": 9.180262685078188e-06, - "loss": 0.7509, + "epoch": 1.0916407794408358, + "grad_norm": 2.642947171308434, + "learning_rate": 1.47018629079462e-05, + "loss": 0.4765, "step": 7731 }, { - "epoch": 1.6213042566575802, - "grad_norm": 7.229646746529025, - "learning_rate": 9.178006440633606e-06, - "loss": 0.6695, + "epoch": 1.0917819824908217, + "grad_norm": 3.8015128490651517, + "learning_rate": 1.4700517421148199e-05, + "loss": 0.6552, "step": 7732 }, { - "epoch": 1.6215139442231075, - "grad_norm": 8.54145256196707, - "learning_rate": 9.175750238317622e-06, - "loss": 1.0609, + "epoch": 1.0919231855408076, + "grad_norm": 4.1279238851345506, + "learning_rate": 1.4699171825112206e-05, + "loss": 0.6893, "step": 7733 }, { - "epoch": 1.621723631788635, - "grad_norm": 7.923460238884291, - "learning_rate": 9.173494078245866e-06, - "loss": 0.918, + "epoch": 1.0920643885907935, + "grad_norm": 3.3375311327790955, + "learning_rate": 1.4697826119869483e-05, + "loss": 0.6366, "step": 7734 }, { - "epoch": 1.6219333193541623, - "grad_norm": 6.130069673907095, - "learning_rate": 9.171237960533972e-06, - "loss": 0.5995, + "epoch": 1.0922055916407793, + "grad_norm": 3.2068403139614516, + "learning_rate": 1.4696480305451313e-05, + "loss": 0.5727, "step": 7735 }, { - "epoch": 1.6221430069196896, - "grad_norm": 6.610556106485422, - "learning_rate": 9.168981885297567e-06, - "loss": 0.7412, + "epoch": 1.0923467946907652, + "grad_norm": 3.2756519391559458, + "learning_rate": 1.4695134381888969e-05, + "loss": 0.5248, "step": 7736 }, { - "epoch": 1.6223526944852171, - "grad_norm": 6.537399381012326, - "learning_rate": 9.166725852652287e-06, - "loss": 0.6613, + "epoch": 1.0924879977407511, + "grad_norm": 3.1589507382766406, + "learning_rate": 1.4693788349213729e-05, + "loss": 0.5314, "step": 7737 }, { - "epoch": 1.6225623820507444, - "grad_norm": 6.549146352386218, - "learning_rate": 9.16446986271375e-06, - "loss": 0.6692, + "epoch": 1.092629200790737, + "grad_norm": 3.579626312188878, + "learning_rate": 1.4692442207456875e-05, + "loss": 0.603, "step": 7738 }, { - "epoch": 1.6227720696162717, - "grad_norm": 7.297369259180637, - "learning_rate": 9.162213915597582e-06, - "loss": 0.8235, + "epoch": 1.0927704038407229, + "grad_norm": 2.9995292102255044, + "learning_rate": 1.469109595664969e-05, + "loss": 0.4995, "step": 7739 }, { - "epoch": 1.6229817571817993, - "grad_norm": 8.056141968987534, - "learning_rate": 9.159958011419406e-06, - "loss": 1.0264, + "epoch": 1.0929116068907088, + "grad_norm": 3.9753316500252187, + "learning_rate": 1.468974959682346e-05, + "loss": 0.7579, "step": 7740 }, { - "epoch": 1.6231914447473264, - "grad_norm": 9.394002331696816, - "learning_rate": 9.157702150294834e-06, - "loss": 1.0481, + "epoch": 1.0930528099406946, + "grad_norm": 4.294777544309448, + "learning_rate": 1.4688403128009477e-05, + "loss": 0.6987, "step": 7741 }, { - "epoch": 1.6234011323128539, - "grad_norm": 8.525512423961626, - "learning_rate": 9.15544633233949e-06, - "loss": 0.8341, + "epoch": 1.0931940129906805, + "grad_norm": 3.6495303442917, + "learning_rate": 1.468705655023903e-05, + "loss": 0.6572, "step": 7742 }, { - "epoch": 1.6236108198783812, - "grad_norm": 7.818781009419329, - "learning_rate": 9.153190557668988e-06, - "loss": 0.8095, + "epoch": 1.0933352160406664, + "grad_norm": 2.8004883813199615, + "learning_rate": 1.4685709863543412e-05, + "loss": 0.444, "step": 7743 }, { - "epoch": 1.6238205074439085, - "grad_norm": 5.356602716697762, - "learning_rate": 9.150934826398937e-06, - "loss": 0.4621, + "epoch": 1.0934764190906523, + "grad_norm": 3.7867684773572545, + "learning_rate": 1.468436306795392e-05, + "loss": 0.7659, "step": 7744 }, { - "epoch": 1.624030195009436, - "grad_norm": 7.193194220431343, - "learning_rate": 9.148679138644947e-06, - "loss": 0.8949, + "epoch": 1.0936176221406382, + "grad_norm": 3.4306558268382763, + "learning_rate": 1.4683016163501855e-05, + "loss": 0.515, "step": 7745 }, { - "epoch": 1.6242398825749633, - "grad_norm": 7.872547340112933, - "learning_rate": 9.146423494522633e-06, - "loss": 0.9217, + "epoch": 1.093758825190624, + "grad_norm": 3.63794221100813, + "learning_rate": 1.4681669150218516e-05, + "loss": 0.5577, "step": 7746 }, { - "epoch": 1.6244495701404906, - "grad_norm": 7.276877173815217, - "learning_rate": 9.144167894147589e-06, - "loss": 0.7924, + "epoch": 1.09390002824061, + "grad_norm": 3.415415323582596, + "learning_rate": 1.4680322028135208e-05, + "loss": 0.5388, "step": 7747 }, { - "epoch": 1.6246592577060182, - "grad_norm": 6.776400272317756, - "learning_rate": 9.14191233763543e-06, - "loss": 0.6, + "epoch": 1.0940412312905958, + "grad_norm": 2.837129728461303, + "learning_rate": 1.4678974797283235e-05, + "loss": 0.5122, "step": 7748 }, { - "epoch": 1.6248689452715452, - "grad_norm": 7.008105977822801, - "learning_rate": 9.139656825101751e-06, - "loss": 0.8169, + "epoch": 1.0941824343405817, + "grad_norm": 3.4413649445199392, + "learning_rate": 1.467762745769391e-05, + "loss": 0.6312, "step": 7749 }, { - "epoch": 1.6250786328370728, - "grad_norm": 7.4436658429806295, - "learning_rate": 9.13740135666215e-06, - "loss": 0.8182, + "epoch": 1.0943236373905676, + "grad_norm": 3.9963929080664133, + "learning_rate": 1.4676280009398544e-05, + "loss": 0.5671, "step": 7750 }, { - "epoch": 1.6252883204026, - "grad_norm": 6.69991438648829, - "learning_rate": 9.135145932432223e-06, - "loss": 0.5884, + "epoch": 1.0944648404405535, + "grad_norm": 3.65024202277792, + "learning_rate": 1.4674932452428449e-05, + "loss": 0.6386, "step": 7751 }, { - "epoch": 1.6254980079681274, - "grad_norm": 7.099049344858954, - "learning_rate": 9.132890552527575e-06, - "loss": 0.7609, + "epoch": 1.0946060434905394, + "grad_norm": 4.066079033500336, + "learning_rate": 1.4673584786814943e-05, + "loss": 0.792, "step": 7752 }, { - "epoch": 1.625707695533655, - "grad_norm": 6.523643052980017, - "learning_rate": 9.130635217063783e-06, - "loss": 0.6455, + "epoch": 1.0947472465405252, + "grad_norm": 3.9657715251004264, + "learning_rate": 1.4672237012589345e-05, + "loss": 0.5664, "step": 7753 }, { - "epoch": 1.6259173830991822, - "grad_norm": 9.535756779848574, - "learning_rate": 9.128379926156446e-06, - "loss": 1.0559, + "epoch": 1.0948884495905111, + "grad_norm": 3.6803043861779283, + "learning_rate": 1.4670889129782976e-05, + "loss": 0.5699, "step": 7754 }, { - "epoch": 1.6261270706647095, - "grad_norm": 7.467107567296489, - "learning_rate": 9.126124679921153e-06, - "loss": 0.8032, + "epoch": 1.095029652640497, + "grad_norm": 4.9960771706770615, + "learning_rate": 1.4669541138427164e-05, + "loss": 0.7969, "step": 7755 }, { - "epoch": 1.626336758230237, - "grad_norm": 6.459242281077416, - "learning_rate": 9.123869478473482e-06, - "loss": 0.7315, + "epoch": 1.095170855690483, + "grad_norm": 4.903308346815577, + "learning_rate": 1.4668193038553229e-05, + "loss": 0.7935, "step": 7756 }, { - "epoch": 1.6265464457957644, - "grad_norm": 8.208146292785123, - "learning_rate": 9.121614321929018e-06, - "loss": 0.8942, + "epoch": 1.0953120587404688, + "grad_norm": 3.669656306715767, + "learning_rate": 1.4666844830192504e-05, + "loss": 0.5531, "step": 7757 }, { - "epoch": 1.6267561333612917, - "grad_norm": 5.875277495235046, - "learning_rate": 9.119359210403347e-06, - "loss": 0.7302, + "epoch": 1.0954532617904547, + "grad_norm": 3.5228433733498847, + "learning_rate": 1.466549651337632e-05, + "loss": 0.6824, "step": 7758 }, { - "epoch": 1.6269658209268192, - "grad_norm": 6.658796999545085, - "learning_rate": 9.117104144012044e-06, - "loss": 0.5731, + "epoch": 1.0955944648404405, + "grad_norm": 3.7558130818602593, + "learning_rate": 1.4664148088136015e-05, + "loss": 0.6843, "step": 7759 }, { - "epoch": 1.6271755084923463, - "grad_norm": 8.764316501335495, - "learning_rate": 9.114849122870683e-06, - "loss": 0.8873, + "epoch": 1.0957356678904264, + "grad_norm": 3.3921698572480836, + "learning_rate": 1.466279955450292e-05, + "loss": 0.5582, "step": 7760 }, { - "epoch": 1.6273851960578738, - "grad_norm": 5.514092865093092, - "learning_rate": 9.112594147094842e-06, - "loss": 0.6521, + "epoch": 1.0958768709404123, + "grad_norm": 3.520896591589031, + "learning_rate": 1.4661450912508379e-05, + "loss": 0.5666, "step": 7761 }, { - "epoch": 1.627594883623401, - "grad_norm": 7.532641461594116, - "learning_rate": 9.110339216800085e-06, - "loss": 0.9602, + "epoch": 1.0960180739903982, + "grad_norm": 3.7955380490964945, + "learning_rate": 1.4660102162183732e-05, + "loss": 0.7396, "step": 7762 }, { - "epoch": 1.6278045711889284, - "grad_norm": 6.584313082856506, - "learning_rate": 9.108084332101989e-06, - "loss": 0.7568, + "epoch": 1.096159277040384, + "grad_norm": 3.5258617402707917, + "learning_rate": 1.4658753303560322e-05, + "loss": 0.6674, "step": 7763 }, { - "epoch": 1.628014258754456, - "grad_norm": 9.240860782331735, - "learning_rate": 9.105829493116122e-06, - "loss": 0.9935, + "epoch": 1.09630048009037, + "grad_norm": 3.6918204313264997, + "learning_rate": 1.4657404336669498e-05, + "loss": 0.6779, "step": 7764 }, { - "epoch": 1.6282239463199832, - "grad_norm": 7.217072751833694, - "learning_rate": 9.10357469995804e-06, - "loss": 0.7553, + "epoch": 1.0964416831403558, + "grad_norm": 3.819468449125098, + "learning_rate": 1.4656055261542605e-05, + "loss": 0.6012, "step": 7765 }, { - "epoch": 1.6284336338855105, - "grad_norm": 7.225607625019034, - "learning_rate": 9.10131995274331e-06, - "loss": 0.7728, + "epoch": 1.0965828861903417, + "grad_norm": 3.551736959731318, + "learning_rate": 1.4654706078211003e-05, + "loss": 0.5546, "step": 7766 }, { - "epoch": 1.628643321451038, - "grad_norm": 8.019962886801872, - "learning_rate": 9.099065251587493e-06, - "loss": 0.7569, + "epoch": 1.0967240892403276, + "grad_norm": 4.18147791355012, + "learning_rate": 1.4653356786706043e-05, + "loss": 0.7529, "step": 7767 }, { - "epoch": 1.6288530090165652, - "grad_norm": 6.579211521914319, - "learning_rate": 9.096810596606142e-06, - "loss": 0.6929, + "epoch": 1.0968652922903135, + "grad_norm": 3.2017484757665198, + "learning_rate": 1.4652007387059077e-05, + "loss": 0.5326, "step": 7768 }, { - "epoch": 1.6290626965820927, - "grad_norm": 7.993381902048428, - "learning_rate": 9.094555987914814e-06, - "loss": 0.8417, + "epoch": 1.0970064953402994, + "grad_norm": 3.4245555506166356, + "learning_rate": 1.4650657879301471e-05, + "loss": 0.6441, "step": 7769 }, { - "epoch": 1.6292723841476202, - "grad_norm": 6.976837203922693, - "learning_rate": 9.092301425629066e-06, - "loss": 0.8017, + "epoch": 1.0971476983902853, + "grad_norm": 3.525466499718918, + "learning_rate": 1.4649308263464583e-05, + "loss": 0.5982, "step": 7770 }, { - "epoch": 1.6294820717131473, - "grad_norm": 6.343367025367347, - "learning_rate": 9.09004690986444e-06, - "loss": 0.7216, + "epoch": 1.0972889014402711, + "grad_norm": 2.744184608810498, + "learning_rate": 1.4647958539579779e-05, + "loss": 0.43, "step": 7771 }, { - "epoch": 1.6296917592786748, - "grad_norm": 7.341908328338591, - "learning_rate": 9.087792440736489e-06, - "loss": 0.6986, + "epoch": 1.097430104490257, + "grad_norm": 2.6201528990653893, + "learning_rate": 1.4646608707678428e-05, + "loss": 0.5038, "step": 7772 }, { - "epoch": 1.6299014468442021, - "grad_norm": 7.76041552743114, - "learning_rate": 9.085538018360762e-06, - "loss": 0.9403, + "epoch": 1.097571307540243, + "grad_norm": 3.3736036531476166, + "learning_rate": 1.4645258767791892e-05, + "loss": 0.6937, "step": 7773 }, { - "epoch": 1.6301111344097294, - "grad_norm": 7.1234580369252924, - "learning_rate": 9.08328364285279e-06, - "loss": 0.7101, + "epoch": 1.0977125105902288, + "grad_norm": 3.328475328718485, + "learning_rate": 1.4643908719951551e-05, + "loss": 0.5456, "step": 7774 }, { - "epoch": 1.630320821975257, - "grad_norm": 7.985365803806052, - "learning_rate": 9.081029314328126e-06, - "loss": 0.9688, + "epoch": 1.0978537136402147, + "grad_norm": 4.367293985370606, + "learning_rate": 1.4642558564188781e-05, + "loss": 0.6067, "step": 7775 }, { - "epoch": 1.6305305095407843, - "grad_norm": 6.228212972436947, - "learning_rate": 9.078775032902306e-06, - "loss": 0.6933, + "epoch": 1.0979949166902006, + "grad_norm": 3.767829317927306, + "learning_rate": 1.464120830053495e-05, + "loss": 0.6131, "step": 7776 }, { - "epoch": 1.6307401971063116, - "grad_norm": 7.978440310520784, - "learning_rate": 9.07652079869086e-06, - "loss": 0.6719, + "epoch": 1.0981361197401864, + "grad_norm": 3.080562223734928, + "learning_rate": 1.4639857929021441e-05, + "loss": 0.4589, "step": 7777 }, { - "epoch": 1.630949884671839, - "grad_norm": 8.266884138862261, - "learning_rate": 9.07426661180932e-06, - "loss": 0.8062, + "epoch": 1.0982773227901723, + "grad_norm": 3.2825126428306834, + "learning_rate": 1.4638507449679642e-05, + "loss": 0.5849, "step": 7778 }, { - "epoch": 1.6311595722373662, - "grad_norm": 8.028440388954458, - "learning_rate": 9.072012472373229e-06, - "loss": 0.7219, + "epoch": 1.0984185258401582, + "grad_norm": 3.395887689016264, + "learning_rate": 1.4637156862540934e-05, + "loss": 0.5328, "step": 7779 }, { - "epoch": 1.6313692598028937, - "grad_norm": 6.094528031504201, - "learning_rate": 9.069758380498106e-06, - "loss": 0.5652, + "epoch": 1.098559728890144, + "grad_norm": 3.8383745200727235, + "learning_rate": 1.4635806167636698e-05, + "loss": 0.6374, "step": 7780 }, { - "epoch": 1.631578947368421, - "grad_norm": 6.660577313405789, - "learning_rate": 9.067504336299478e-06, - "loss": 0.6145, + "epoch": 1.09870093194013, + "grad_norm": 3.5912089738821535, + "learning_rate": 1.4634455364998332e-05, + "loss": 0.6114, "step": 7781 }, { - "epoch": 1.6317886349339483, - "grad_norm": 7.12195376059287, - "learning_rate": 9.065250339892874e-06, - "loss": 0.842, + "epoch": 1.0988421349901158, + "grad_norm": 3.182133617108485, + "learning_rate": 1.4633104454657225e-05, + "loss": 0.5402, "step": 7782 }, { - "epoch": 1.6319983224994759, - "grad_norm": 7.566507908931353, - "learning_rate": 9.062996391393808e-06, - "loss": 0.9187, + "epoch": 1.0989833380401017, + "grad_norm": 3.1548877192541047, + "learning_rate": 1.4631753436644769e-05, + "loss": 0.4529, "step": 7783 }, { - "epoch": 1.6322080100650032, - "grad_norm": 7.93613470835728, - "learning_rate": 9.060742490917802e-06, - "loss": 0.7541, + "epoch": 1.0991245410900876, + "grad_norm": 3.942223905794716, + "learning_rate": 1.4630402310992367e-05, + "loss": 0.6547, "step": 7784 }, { - "epoch": 1.6324176976305305, - "grad_norm": 8.833268826509867, - "learning_rate": 9.058488638580374e-06, - "loss": 0.9715, + "epoch": 1.0992657441400735, + "grad_norm": 4.63374999452884, + "learning_rate": 1.4629051077731412e-05, + "loss": 0.6439, "step": 7785 }, { - "epoch": 1.632627385196058, - "grad_norm": 5.663605550223333, - "learning_rate": 9.056234834497035e-06, - "loss": 0.5545, + "epoch": 1.0994069471900594, + "grad_norm": 4.393550774046112, + "learning_rate": 1.4627699736893309e-05, + "loss": 0.6724, "step": 7786 }, { - "epoch": 1.632837072761585, - "grad_norm": 5.908037337664748, - "learning_rate": 9.053981078783298e-06, - "loss": 0.5953, + "epoch": 1.0995481502400453, + "grad_norm": 4.593537380361326, + "learning_rate": 1.4626348288509465e-05, + "loss": 0.7602, "step": 7787 }, { - "epoch": 1.6330467603271126, - "grad_norm": 7.200108804549128, - "learning_rate": 9.051727371554672e-06, - "loss": 0.6641, + "epoch": 1.0996893532900311, + "grad_norm": 3.258452204600365, + "learning_rate": 1.4624996732611284e-05, + "loss": 0.5972, "step": 7788 }, { - "epoch": 1.6332564478926401, - "grad_norm": 6.320811345698257, - "learning_rate": 9.04947371292666e-06, - "loss": 0.6844, + "epoch": 1.099830556340017, + "grad_norm": 3.6441556434184337, + "learning_rate": 1.4623645069230174e-05, + "loss": 0.5343, "step": 7789 }, { - "epoch": 1.6334661354581672, - "grad_norm": 8.757750205533895, - "learning_rate": 9.047220103014772e-06, - "loss": 0.9923, + "epoch": 1.099971759390003, + "grad_norm": 3.316857697131328, + "learning_rate": 1.4622293298397554e-05, + "loss": 0.5353, "step": 7790 }, { - "epoch": 1.6336758230236947, - "grad_norm": 9.12307464401014, - "learning_rate": 9.044966541934508e-06, - "loss": 0.947, + "epoch": 1.1001129624399888, + "grad_norm": 3.9569567406955803, + "learning_rate": 1.4620941420144828e-05, + "loss": 0.7519, "step": 7791 }, { - "epoch": 1.633885510589222, - "grad_norm": 6.13456740024168, - "learning_rate": 9.042713029801361e-06, - "loss": 0.6018, + "epoch": 1.1002541654899747, + "grad_norm": 3.954889100787794, + "learning_rate": 1.4619589434503426e-05, + "loss": 0.6567, "step": 7792 }, { - "epoch": 1.6340951981547494, - "grad_norm": 8.043935077832542, - "learning_rate": 9.040459566730831e-06, - "loss": 0.991, + "epoch": 1.1003953685399606, + "grad_norm": 4.374201804369765, + "learning_rate": 1.4618237341504754e-05, + "loss": 0.7825, "step": 7793 }, { - "epoch": 1.6343048857202769, - "grad_norm": 6.425912823532984, - "learning_rate": 9.038206152838419e-06, - "loss": 0.5988, + "epoch": 1.1005365715899464, + "grad_norm": 3.450342345875821, + "learning_rate": 1.4616885141180244e-05, + "loss": 0.5746, "step": 7794 }, { - "epoch": 1.6345145732858042, - "grad_norm": 5.748848278214841, - "learning_rate": 9.035952788239604e-06, - "loss": 0.4612, + "epoch": 1.1006777746399323, + "grad_norm": 2.837036113805083, + "learning_rate": 1.4615532833561317e-05, + "loss": 0.5259, "step": 7795 }, { - "epoch": 1.6347242608513315, - "grad_norm": 7.991892382924009, - "learning_rate": 9.03369947304988e-06, - "loss": 0.8539, + "epoch": 1.1008189776899182, + "grad_norm": 3.257726215233877, + "learning_rate": 1.46141804186794e-05, + "loss": 0.6069, "step": 7796 }, { - "epoch": 1.634933948416859, - "grad_norm": 6.419182170042177, - "learning_rate": 9.031446207384737e-06, - "loss": 0.6415, + "epoch": 1.100960180739904, + "grad_norm": 3.241984490455417, + "learning_rate": 1.4612827896565922e-05, + "loss": 0.594, "step": 7797 }, { - "epoch": 1.635143635982386, - "grad_norm": 6.95291576151036, - "learning_rate": 9.029192991359657e-06, - "loss": 0.671, + "epoch": 1.10110138378989, + "grad_norm": 3.0874217907495543, + "learning_rate": 1.4611475267252318e-05, + "loss": 0.6056, "step": 7798 }, { - "epoch": 1.6353533235479136, - "grad_norm": 8.695520854777458, - "learning_rate": 9.026939825090115e-06, - "loss": 0.905, + "epoch": 1.1012425868398759, + "grad_norm": 3.205784122610946, + "learning_rate": 1.461012253077002e-05, + "loss": 0.4582, "step": 7799 }, { - "epoch": 1.635563011113441, - "grad_norm": 8.150198190082445, - "learning_rate": 9.024686708691596e-06, - "loss": 0.8433, + "epoch": 1.1013837898898615, + "grad_norm": 4.483148965402953, + "learning_rate": 1.4608769687150459e-05, + "loss": 0.6732, "step": 7800 }, { - "epoch": 1.6357726986789682, - "grad_norm": 8.242038531787703, - "learning_rate": 9.022433642279579e-06, - "loss": 0.8004, + "epoch": 1.1015249929398474, + "grad_norm": 2.8650037634397996, + "learning_rate": 1.460741673642509e-05, + "loss": 0.467, "step": 7801 }, { - "epoch": 1.6359823862444958, - "grad_norm": 6.7422538807376, - "learning_rate": 9.02018062596953e-06, - "loss": 0.7693, + "epoch": 1.1016661959898333, + "grad_norm": 3.95813080443705, + "learning_rate": 1.460606367862534e-05, + "loss": 0.6976, "step": 7802 }, { - "epoch": 1.636192073810023, - "grad_norm": 7.955059268544132, - "learning_rate": 9.017927659876922e-06, - "loss": 0.7599, + "epoch": 1.1018073990398192, + "grad_norm": 3.3221092910405416, + "learning_rate": 1.460471051378266e-05, + "loss": 0.6026, "step": 7803 }, { - "epoch": 1.6364017613755504, - "grad_norm": 9.334740973055053, - "learning_rate": 9.01567474411723e-06, - "loss": 0.9984, + "epoch": 1.101948602089805, + "grad_norm": 3.872561137847206, + "learning_rate": 1.4603357241928499e-05, + "loss": 0.5976, "step": 7804 }, { - "epoch": 1.636611448941078, - "grad_norm": 6.549294965642516, - "learning_rate": 9.013421878805909e-06, - "loss": 0.7306, + "epoch": 1.102089805139791, + "grad_norm": 3.573724705465519, + "learning_rate": 1.4602003863094303e-05, + "loss": 0.5184, "step": 7805 }, { - "epoch": 1.6368211365066052, - "grad_norm": 6.551145417504215, - "learning_rate": 9.01116906405843e-06, - "loss": 0.5998, + "epoch": 1.1022310081897768, + "grad_norm": 3.5048546940005334, + "learning_rate": 1.4600650377311523e-05, + "loss": 0.5797, "step": 7806 }, { - "epoch": 1.6370308240721325, - "grad_norm": 9.172331068984027, - "learning_rate": 9.008916299990254e-06, - "loss": 0.68, + "epoch": 1.1023722112397627, + "grad_norm": 3.3487253908055004, + "learning_rate": 1.4599296784611617e-05, + "loss": 0.509, "step": 7807 }, { - "epoch": 1.63724051163766, - "grad_norm": 5.972490240531828, - "learning_rate": 9.006663586716832e-06, - "loss": 0.5711, + "epoch": 1.1025134142897486, + "grad_norm": 4.160982313432065, + "learning_rate": 1.4597943085026037e-05, + "loss": 0.6656, "step": 7808 }, { - "epoch": 1.6374501992031871, - "grad_norm": 7.842216548639763, - "learning_rate": 9.004410924353625e-06, - "loss": 0.7333, + "epoch": 1.1026546173397345, + "grad_norm": 4.183974074568474, + "learning_rate": 1.4596589278586248e-05, + "loss": 0.712, "step": 7809 }, { - "epoch": 1.6376598867687147, - "grad_norm": 7.551651092716445, - "learning_rate": 9.002158313016088e-06, - "loss": 0.8457, + "epoch": 1.1027958203897203, + "grad_norm": 3.428799258318688, + "learning_rate": 1.459523536532371e-05, + "loss": 0.5621, "step": 7810 }, { - "epoch": 1.637869574334242, - "grad_norm": 6.171420578132184, - "learning_rate": 8.999905752819667e-06, - "loss": 0.6926, + "epoch": 1.1029370234397062, + "grad_norm": 3.2063473124525905, + "learning_rate": 1.4593881345269885e-05, + "loss": 0.4692, "step": 7811 }, { - "epoch": 1.6380792618997693, - "grad_norm": 6.638668594376376, - "learning_rate": 8.99765324387981e-06, - "loss": 0.6111, + "epoch": 1.1030782264896921, + "grad_norm": 3.646138489855479, + "learning_rate": 1.459252721845624e-05, + "loss": 0.5596, "step": 7812 }, { - "epoch": 1.6382889494652968, - "grad_norm": 6.414472911385584, - "learning_rate": 8.995400786311963e-06, - "loss": 0.6862, + "epoch": 1.103219429539678, + "grad_norm": 3.2485433122195793, + "learning_rate": 1.4591172984914248e-05, + "loss": 0.6639, "step": 7813 }, { - "epoch": 1.638498637030824, - "grad_norm": 7.556447301478495, - "learning_rate": 8.993148380231567e-06, - "loss": 0.8061, + "epoch": 1.1033606325896639, + "grad_norm": 3.8247334888845494, + "learning_rate": 1.4589818644675378e-05, + "loss": 0.581, "step": 7814 }, { - "epoch": 1.6387083245963514, - "grad_norm": 6.4943448568081354, - "learning_rate": 8.990896025754062e-06, - "loss": 0.7759, + "epoch": 1.1035018356396498, + "grad_norm": 4.413777695502822, + "learning_rate": 1.4588464197771106e-05, + "loss": 0.7267, "step": 7815 }, { - "epoch": 1.638918012161879, - "grad_norm": 7.040814841287184, - "learning_rate": 8.988643722994889e-06, - "loss": 0.7806, + "epoch": 1.1036430386896356, + "grad_norm": 3.250930815663188, + "learning_rate": 1.4587109644232906e-05, + "loss": 0.5478, "step": 7816 }, { - "epoch": 1.639127699727406, - "grad_norm": 5.721088550447536, - "learning_rate": 8.986391472069477e-06, - "loss": 0.5134, + "epoch": 1.1037842417396215, + "grad_norm": 3.1614385866756836, + "learning_rate": 1.4585754984092257e-05, + "loss": 0.601, "step": 7817 }, { - "epoch": 1.6393373872929335, - "grad_norm": 6.803899185579097, - "learning_rate": 8.98413927309326e-06, - "loss": 0.5648, + "epoch": 1.1039254447896074, + "grad_norm": 3.8377236357632, + "learning_rate": 1.4584400217380647e-05, + "loss": 0.6595, "step": 7818 }, { - "epoch": 1.6395470748584609, - "grad_norm": 6.7845898536229186, - "learning_rate": 8.98188712618167e-06, - "loss": 0.5402, + "epoch": 1.1040666478395933, + "grad_norm": 3.0901686672976214, + "learning_rate": 1.4583045344129556e-05, + "loss": 0.4225, "step": 7819 }, { - "epoch": 1.6397567624239882, - "grad_norm": 7.477447277540528, - "learning_rate": 8.979635031450127e-06, - "loss": 0.8508, + "epoch": 1.1042078508895792, + "grad_norm": 3.3799772139983215, + "learning_rate": 1.4581690364370466e-05, + "loss": 0.6498, "step": 7820 }, { - "epoch": 1.6399664499895157, - "grad_norm": 6.224998123392957, - "learning_rate": 8.977382989014059e-06, - "loss": 0.5612, + "epoch": 1.104349053939565, + "grad_norm": 2.6768283998672073, + "learning_rate": 1.4580335278134873e-05, + "loss": 0.4169, "step": 7821 }, { - "epoch": 1.640176137555043, - "grad_norm": 7.121249784422646, - "learning_rate": 8.97513099898889e-06, - "loss": 0.8514, + "epoch": 1.104490256989551, + "grad_norm": 4.164338851058762, + "learning_rate": 1.4578980085454268e-05, + "loss": 0.7127, "step": 7822 }, { - "epoch": 1.6403858251205703, - "grad_norm": 8.125016724587613, - "learning_rate": 8.972879061490033e-06, - "loss": 1.0115, + "epoch": 1.1046314600395368, + "grad_norm": 4.188348294921656, + "learning_rate": 1.4577624786360141e-05, + "loss": 0.6602, "step": 7823 }, { - "epoch": 1.6405955126860978, - "grad_norm": 6.954146328934536, - "learning_rate": 8.970627176632901e-06, - "loss": 0.6946, + "epoch": 1.1047726630895227, + "grad_norm": 3.5550282058570644, + "learning_rate": 1.4576269380883992e-05, + "loss": 0.605, "step": 7824 }, { - "epoch": 1.6408052002516251, - "grad_norm": 7.361509274333507, - "learning_rate": 8.96837534453292e-06, - "loss": 0.7038, + "epoch": 1.1049138661395086, + "grad_norm": 3.588329475121107, + "learning_rate": 1.4574913869057319e-05, + "loss": 0.6488, "step": 7825 }, { - "epoch": 1.6410148878171524, - "grad_norm": 5.9429767418405595, - "learning_rate": 8.966123565305484e-06, - "loss": 0.6317, + "epoch": 1.1050550691894945, + "grad_norm": 2.885477416623634, + "learning_rate": 1.4573558250911624e-05, + "loss": 0.5544, "step": 7826 }, { - "epoch": 1.64122457538268, - "grad_norm": 8.631053737205175, - "learning_rate": 8.963871839066013e-06, - "loss": 0.8529, + "epoch": 1.1051962722394804, + "grad_norm": 4.131872877766003, + "learning_rate": 1.457220252647841e-05, + "loss": 0.7971, "step": 7827 }, { - "epoch": 1.641434262948207, - "grad_norm": 6.809005778731341, - "learning_rate": 8.961620165929908e-06, - "loss": 0.7271, + "epoch": 1.1053374752894662, + "grad_norm": 4.0065120227389315, + "learning_rate": 1.457084669578918e-05, + "loss": 0.5347, "step": 7828 }, { - "epoch": 1.6416439505137346, - "grad_norm": 6.470193564204594, - "learning_rate": 8.959368546012568e-06, - "loss": 0.5662, + "epoch": 1.1054786783394521, + "grad_norm": 3.343720472098241, + "learning_rate": 1.4569490758875451e-05, + "loss": 0.5742, "step": 7829 }, { - "epoch": 1.6418536380792619, - "grad_norm": 5.865134245963868, - "learning_rate": 8.957116979429392e-06, - "loss": 0.6353, + "epoch": 1.105619881389438, + "grad_norm": 3.3412583197230674, + "learning_rate": 1.4568134715768727e-05, + "loss": 0.58, "step": 7830 }, { - "epoch": 1.6420633256447892, - "grad_norm": 8.82369511174877, - "learning_rate": 8.954865466295788e-06, - "loss": 0.8361, + "epoch": 1.105761084439424, + "grad_norm": 3.0735209976000526, + "learning_rate": 1.4566778566500528e-05, + "loss": 0.546, "step": 7831 }, { - "epoch": 1.6422730132103167, - "grad_norm": 8.316480211840165, - "learning_rate": 8.952614006727134e-06, - "loss": 1.0116, + "epoch": 1.1059022874894098, + "grad_norm": 3.75119416276313, + "learning_rate": 1.4565422311102367e-05, + "loss": 0.7095, "step": 7832 }, { - "epoch": 1.642482700775844, - "grad_norm": 5.954328749446922, - "learning_rate": 8.95036260083883e-06, - "loss": 0.6046, + "epoch": 1.1060434905393957, + "grad_norm": 3.9449037858382305, + "learning_rate": 1.4564065949605763e-05, + "loss": 0.5789, "step": 7833 }, { - "epoch": 1.6426923883413713, - "grad_norm": 6.9716871060910215, - "learning_rate": 8.948111248746267e-06, - "loss": 0.6077, + "epoch": 1.1061846935893815, + "grad_norm": 4.300112711832745, + "learning_rate": 1.4562709482042237e-05, + "loss": 0.6504, "step": 7834 }, { - "epoch": 1.6429020759068989, - "grad_norm": 7.462563305615036, - "learning_rate": 8.945859950564824e-06, - "loss": 0.7996, + "epoch": 1.1063258966393674, + "grad_norm": 3.3098782902314756, + "learning_rate": 1.4561352908443313e-05, + "loss": 0.5767, "step": 7835 }, { - "epoch": 1.643111763472426, - "grad_norm": 7.119581058729402, - "learning_rate": 8.943608706409885e-06, - "loss": 0.7473, + "epoch": 1.1064670996893533, + "grad_norm": 2.9959811622569035, + "learning_rate": 1.4559996228840518e-05, + "loss": 0.5827, "step": 7836 }, { - "epoch": 1.6433214510379535, - "grad_norm": 9.038190989241881, - "learning_rate": 8.941357516396836e-06, - "loss": 0.8691, + "epoch": 1.1066083027393392, + "grad_norm": 3.5476613947421116, + "learning_rate": 1.4558639443265379e-05, + "loss": 0.5903, "step": 7837 }, { - "epoch": 1.6435311386034808, - "grad_norm": 7.31329374559849, - "learning_rate": 8.939106380641048e-06, - "loss": 0.8647, + "epoch": 1.106749505789325, + "grad_norm": 2.96902744974858, + "learning_rate": 1.4557282551749428e-05, + "loss": 0.4487, "step": 7838 }, { - "epoch": 1.643740826169008, - "grad_norm": 7.319239433420918, - "learning_rate": 8.936855299257898e-06, - "loss": 0.8125, + "epoch": 1.106890708839311, + "grad_norm": 3.232105054044569, + "learning_rate": 1.45559255543242e-05, + "loss": 0.569, "step": 7839 }, { - "epoch": 1.6439505137345356, - "grad_norm": 7.906977798819099, - "learning_rate": 8.934604272362762e-06, - "loss": 0.8577, + "epoch": 1.1070319118892968, + "grad_norm": 4.107841451718482, + "learning_rate": 1.455456845102123e-05, + "loss": 0.681, "step": 7840 }, { - "epoch": 1.644160201300063, - "grad_norm": 8.097594850024912, - "learning_rate": 8.932353300070999e-06, - "loss": 0.8985, + "epoch": 1.1071731149392827, + "grad_norm": 3.508393237420716, + "learning_rate": 1.4553211241872054e-05, + "loss": 0.6274, "step": 7841 }, { - "epoch": 1.6443698888655902, - "grad_norm": 7.569917875226165, - "learning_rate": 8.930102382497983e-06, - "loss": 0.8273, + "epoch": 1.1073143179892686, + "grad_norm": 3.5155265034841285, + "learning_rate": 1.4551853926908215e-05, + "loss": 0.6972, "step": 7842 }, { - "epoch": 1.6445795764311177, - "grad_norm": 8.37187211832073, - "learning_rate": 8.927851519759078e-06, - "loss": 0.876, + "epoch": 1.1074555210392545, + "grad_norm": 3.3399668811061, + "learning_rate": 1.4550496506161258e-05, + "loss": 0.5138, "step": 7843 }, { - "epoch": 1.644789263996645, - "grad_norm": 7.03920992343352, - "learning_rate": 8.92560071196964e-06, - "loss": 0.8471, + "epoch": 1.1075967240892404, + "grad_norm": 4.012000541125126, + "learning_rate": 1.454913897966273e-05, + "loss": 0.728, "step": 7844 }, { - "epoch": 1.6449989515621724, - "grad_norm": 7.064214331672824, - "learning_rate": 8.923349959245028e-06, - "loss": 0.6916, + "epoch": 1.1077379271392263, + "grad_norm": 3.634471627724593, + "learning_rate": 1.454778134744417e-05, + "loss": 0.6325, "step": 7845 }, { - "epoch": 1.6452086391276999, - "grad_norm": 5.956189127791883, - "learning_rate": 8.921099261700601e-06, - "loss": 0.8166, + "epoch": 1.1078791301892121, + "grad_norm": 3.7984707366269386, + "learning_rate": 1.454642360953714e-05, + "loss": 0.6501, "step": 7846 }, { - "epoch": 1.645418326693227, - "grad_norm": 7.408802102290926, - "learning_rate": 8.918848619451704e-06, - "loss": 0.7926, + "epoch": 1.108020333239198, + "grad_norm": 3.1373729854239922, + "learning_rate": 1.454506576597319e-05, + "loss": 0.5193, "step": 7847 }, { - "epoch": 1.6456280142587545, - "grad_norm": 5.948108974255877, - "learning_rate": 8.916598032613692e-06, - "loss": 0.6077, + "epoch": 1.108161536289184, + "grad_norm": 3.753705835873736, + "learning_rate": 1.454370781678387e-05, + "loss": 0.6931, "step": 7848 }, { - "epoch": 1.6458377018242818, - "grad_norm": 6.374324558714478, - "learning_rate": 8.914347501301912e-06, - "loss": 0.6134, + "epoch": 1.1083027393391698, + "grad_norm": 3.869339341721548, + "learning_rate": 1.4542349762000747e-05, + "loss": 0.7346, "step": 7849 }, { - "epoch": 1.646047389389809, - "grad_norm": 7.76905898909541, - "learning_rate": 8.912097025631704e-06, - "loss": 0.8627, + "epoch": 1.1084439423891557, + "grad_norm": 3.623323755066261, + "learning_rate": 1.4540991601655374e-05, + "loss": 0.7027, "step": 7850 }, { - "epoch": 1.6462570769553366, - "grad_norm": 6.180386828779047, - "learning_rate": 8.909846605718409e-06, - "loss": 0.5631, + "epoch": 1.1085851454391416, + "grad_norm": 3.233144135909736, + "learning_rate": 1.4539633335779318e-05, + "loss": 0.536, "step": 7851 }, { - "epoch": 1.646466764520864, - "grad_norm": 7.69337411880533, - "learning_rate": 8.907596241677372e-06, - "loss": 0.8367, + "epoch": 1.1087263484891274, + "grad_norm": 3.8755719844220744, + "learning_rate": 1.4538274964404146e-05, + "loss": 0.5846, "step": 7852 }, { - "epoch": 1.6466764520863912, - "grad_norm": 7.013424175565773, - "learning_rate": 8.905345933623915e-06, - "loss": 0.7733, + "epoch": 1.1088675515391133, + "grad_norm": 2.795064000339568, + "learning_rate": 1.4536916487561423e-05, + "loss": 0.4815, "step": 7853 }, { - "epoch": 1.6468861396519188, - "grad_norm": 7.323874911566226, - "learning_rate": 8.903095681673379e-06, - "loss": 0.8333, + "epoch": 1.1090087545890992, + "grad_norm": 2.8778644159903095, + "learning_rate": 1.4535557905282716e-05, + "loss": 0.48, "step": 7854 }, { - "epoch": 1.6470958272174459, - "grad_norm": 7.066074133945021, - "learning_rate": 8.900845485941095e-06, - "loss": 0.833, + "epoch": 1.109149957639085, + "grad_norm": 3.8060882304694115, + "learning_rate": 1.4534199217599608e-05, + "loss": 0.6187, "step": 7855 }, { - "epoch": 1.6473055147829734, - "grad_norm": 6.9880044649366075, - "learning_rate": 8.898595346542384e-06, - "loss": 0.9081, + "epoch": 1.109291160689071, + "grad_norm": 3.8226041953001153, + "learning_rate": 1.4532840424543664e-05, + "loss": 0.6253, "step": 7856 }, { - "epoch": 1.6475152023485007, - "grad_norm": 6.7151457572736275, - "learning_rate": 8.89634526359257e-06, - "loss": 0.6958, + "epoch": 1.1094323637390568, + "grad_norm": 3.2786791425761947, + "learning_rate": 1.453148152614647e-05, + "loss": 0.6214, "step": 7857 }, { - "epoch": 1.647724889914028, - "grad_norm": 6.103046094916113, - "learning_rate": 8.894095237206977e-06, - "loss": 0.5631, + "epoch": 1.1095735667890427, + "grad_norm": 4.371592262850529, + "learning_rate": 1.4530122522439598e-05, + "loss": 0.6048, "step": 7858 }, { - "epoch": 1.6479345774795555, - "grad_norm": 6.198228014444002, - "learning_rate": 8.891845267500924e-06, - "loss": 0.5738, + "epoch": 1.1097147698390286, + "grad_norm": 2.7640635834712053, + "learning_rate": 1.4528763413454638e-05, + "loss": 0.4284, "step": 7859 }, { - "epoch": 1.6481442650450828, - "grad_norm": 5.692642421466629, - "learning_rate": 8.88959535458972e-06, - "loss": 0.654, + "epoch": 1.1098559728890145, + "grad_norm": 3.474302012448272, + "learning_rate": 1.4527404199223173e-05, + "loss": 0.646, "step": 7860 }, { - "epoch": 1.6483539526106101, - "grad_norm": 7.795451330499722, - "learning_rate": 8.88734549858868e-06, - "loss": 0.9011, + "epoch": 1.1099971759390004, + "grad_norm": 3.789964134041785, + "learning_rate": 1.4526044879776788e-05, + "loss": 0.708, "step": 7861 }, { - "epoch": 1.6485636401761377, - "grad_norm": 6.438274814043322, - "learning_rate": 8.885095699613118e-06, - "loss": 0.6424, + "epoch": 1.1101383789889863, + "grad_norm": 3.3456972625337857, + "learning_rate": 1.4524685455147071e-05, + "loss": 0.6955, "step": 7862 }, { - "epoch": 1.648773327741665, - "grad_norm": 6.772463335780769, - "learning_rate": 8.882845957778328e-06, - "loss": 0.8589, + "epoch": 1.110279582038972, + "grad_norm": 3.5345505032357742, + "learning_rate": 1.4523325925365623e-05, + "loss": 0.5629, "step": 7863 }, { - "epoch": 1.6489830153071923, - "grad_norm": 6.779361235484254, - "learning_rate": 8.880596273199622e-06, - "loss": 0.6972, + "epoch": 1.1104207850889578, + "grad_norm": 3.433202813113254, + "learning_rate": 1.4521966290464033e-05, + "loss": 0.582, "step": 7864 }, { - "epoch": 1.6491927028727198, - "grad_norm": 5.502511403079243, - "learning_rate": 8.878346645992302e-06, - "loss": 0.5442, + "epoch": 1.1105619881389437, + "grad_norm": 3.8992703100265147, + "learning_rate": 1.45206065504739e-05, + "loss": 0.6374, "step": 7865 }, { - "epoch": 1.6494023904382469, - "grad_norm": 7.044055865927619, - "learning_rate": 8.876097076271659e-06, - "loss": 0.6622, + "epoch": 1.1107031911889296, + "grad_norm": 3.3194222320322506, + "learning_rate": 1.4519246705426822e-05, + "loss": 0.5824, "step": 7866 }, { - "epoch": 1.6496120780037744, - "grad_norm": 6.417535907442338, - "learning_rate": 8.87384756415299e-06, - "loss": 0.6642, + "epoch": 1.1108443942389155, + "grad_norm": 3.644766909108725, + "learning_rate": 1.4517886755354403e-05, + "loss": 0.6885, "step": 7867 }, { - "epoch": 1.6498217655693017, - "grad_norm": 7.832673470524373, - "learning_rate": 8.871598109751588e-06, - "loss": 0.7935, + "epoch": 1.1109855972889013, + "grad_norm": 3.8811833698450986, + "learning_rate": 1.4516526700288243e-05, + "loss": 0.6531, "step": 7868 }, { - "epoch": 1.650031453134829, - "grad_norm": 6.5190577643641365, - "learning_rate": 8.869348713182739e-06, - "loss": 0.7521, + "epoch": 1.1111268003388872, + "grad_norm": 3.4941044355056015, + "learning_rate": 1.4515166540259957e-05, + "loss": 0.5669, "step": 7869 }, { - "epoch": 1.6502411407003565, - "grad_norm": 8.338501964790682, - "learning_rate": 8.86709937456173e-06, - "loss": 0.9217, + "epoch": 1.111268003388873, + "grad_norm": 3.97471037815537, + "learning_rate": 1.451380627530115e-05, + "loss": 0.7076, "step": 7870 }, { - "epoch": 1.6504508282658839, - "grad_norm": 7.140592566652326, - "learning_rate": 8.864850094003842e-06, - "loss": 0.8156, + "epoch": 1.111409206438859, + "grad_norm": 3.330975150699957, + "learning_rate": 1.4512445905443433e-05, + "loss": 0.5384, "step": 7871 }, { - "epoch": 1.6506605158314112, - "grad_norm": 6.760609089181651, - "learning_rate": 8.862600871624356e-06, - "loss": 0.741, + "epoch": 1.1115504094888449, + "grad_norm": 3.775330749671015, + "learning_rate": 1.4511085430718423e-05, + "loss": 0.5937, "step": 7872 }, { - "epoch": 1.6508702033969387, - "grad_norm": 6.394354328295808, - "learning_rate": 8.860351707538547e-06, - "loss": 0.5869, + "epoch": 1.1116916125388308, + "grad_norm": 3.322317146681728, + "learning_rate": 1.450972485115774e-05, + "loss": 0.4898, "step": 7873 }, { - "epoch": 1.6510798909624658, - "grad_norm": 6.4446379516652765, - "learning_rate": 8.858102601861693e-06, - "loss": 0.7013, + "epoch": 1.1118328155888166, + "grad_norm": 3.3695921498495927, + "learning_rate": 1.4508364166792993e-05, + "loss": 0.6871, "step": 7874 }, { - "epoch": 1.6512895785279933, - "grad_norm": 6.310371926910119, - "learning_rate": 8.855853554709059e-06, - "loss": 0.7111, + "epoch": 1.1119740186388025, + "grad_norm": 3.475986923491055, + "learning_rate": 1.4507003377655813e-05, + "loss": 0.581, "step": 7875 }, { - "epoch": 1.6514992660935206, - "grad_norm": 9.498116192142136, - "learning_rate": 8.853604566195913e-06, - "loss": 0.9019, + "epoch": 1.1121152216887884, + "grad_norm": 3.8631442375011202, + "learning_rate": 1.450564248377782e-05, + "loss": 0.6962, "step": 7876 }, { - "epoch": 1.651708953659048, - "grad_norm": 7.251788175737595, - "learning_rate": 8.851355636437526e-06, - "loss": 0.7464, + "epoch": 1.1122564247387743, + "grad_norm": 3.1832464689064746, + "learning_rate": 1.4504281485190639e-05, + "loss": 0.4989, "step": 7877 }, { - "epoch": 1.6519186412245754, - "grad_norm": 7.409117399119542, - "learning_rate": 8.84910676554915e-06, - "loss": 0.9153, + "epoch": 1.1123976277887602, + "grad_norm": 3.595818630366584, + "learning_rate": 1.4502920381925905e-05, + "loss": 0.5927, "step": 7878 }, { - "epoch": 1.6521283287901027, - "grad_norm": 7.88757298620162, - "learning_rate": 8.84685795364605e-06, - "loss": 0.7746, + "epoch": 1.112538830838746, + "grad_norm": 3.5225167747688184, + "learning_rate": 1.4501559174015245e-05, + "loss": 0.5516, "step": 7879 }, { - "epoch": 1.65233801635563, - "grad_norm": 7.325593811656621, - "learning_rate": 8.844609200843482e-06, - "loss": 0.7974, + "epoch": 1.112680033888732, + "grad_norm": 3.6428752986088786, + "learning_rate": 1.4500197861490293e-05, + "loss": 0.7149, "step": 7880 }, { - "epoch": 1.6525477039211576, - "grad_norm": 8.29674548135122, - "learning_rate": 8.842360507256693e-06, - "loss": 0.8739, + "epoch": 1.1128212369387178, + "grad_norm": 3.4019754795215404, + "learning_rate": 1.4498836444382689e-05, + "loss": 0.6277, "step": 7881 }, { - "epoch": 1.6527573914866849, - "grad_norm": 5.277429836228957, - "learning_rate": 8.840111873000935e-06, - "loss": 0.4822, + "epoch": 1.1129624399887037, + "grad_norm": 3.282833009947888, + "learning_rate": 1.4497474922724064e-05, + "loss": 0.5469, "step": 7882 }, { - "epoch": 1.6529670790522122, - "grad_norm": 6.434260100921237, - "learning_rate": 8.837863298191462e-06, - "loss": 0.6273, + "epoch": 1.1131036430386896, + "grad_norm": 3.9334948964230683, + "learning_rate": 1.4496113296546068e-05, + "loss": 0.7585, "step": 7883 }, { - "epoch": 1.6531767666177397, - "grad_norm": 7.405569585497389, - "learning_rate": 8.835614782943504e-06, - "loss": 0.8478, + "epoch": 1.1132448460886755, + "grad_norm": 2.9847821444186597, + "learning_rate": 1.4494751565880338e-05, + "loss": 0.4719, "step": 7884 }, { - "epoch": 1.6533864541832668, - "grad_norm": 6.809530668500229, - "learning_rate": 8.83336632737231e-06, - "loss": 0.6153, + "epoch": 1.1133860491386613, + "grad_norm": 2.742510621037806, + "learning_rate": 1.4493389730758525e-05, + "loss": 0.475, "step": 7885 }, { - "epoch": 1.6535961417487943, - "grad_norm": 7.1272301049012174, - "learning_rate": 8.831117931593116e-06, - "loss": 0.7542, + "epoch": 1.1135272521886472, + "grad_norm": 2.9885406855049736, + "learning_rate": 1.4492027791212275e-05, + "loss": 0.5707, "step": 7886 }, { - "epoch": 1.6538058293143216, - "grad_norm": 7.816309694049327, - "learning_rate": 8.828869595721155e-06, - "loss": 0.7419, + "epoch": 1.1136684552386331, + "grad_norm": 3.634929104410084, + "learning_rate": 1.4490665747273238e-05, + "loss": 0.638, "step": 7887 }, { - "epoch": 1.654015516879849, - "grad_norm": 6.8610751167557416, - "learning_rate": 8.826621319871656e-06, - "loss": 0.7819, + "epoch": 1.113809658288619, + "grad_norm": 3.872864363694069, + "learning_rate": 1.4489303598973067e-05, + "loss": 0.6473, "step": 7888 }, { - "epoch": 1.6542252044453765, - "grad_norm": 6.377532304688891, - "learning_rate": 8.824373104159852e-06, - "loss": 0.6748, + "epoch": 1.1139508613386049, + "grad_norm": 3.737025603394471, + "learning_rate": 1.4487941346343423e-05, + "loss": 0.7725, "step": 7889 }, { - "epoch": 1.6544348920109038, - "grad_norm": 6.063294558958377, - "learning_rate": 8.822124948700966e-06, - "loss": 0.5882, + "epoch": 1.1140920643885908, + "grad_norm": 3.3132694666127147, + "learning_rate": 1.448657898941596e-05, + "loss": 0.5655, "step": 7890 }, { - "epoch": 1.654644579576431, - "grad_norm": 5.341939592459933, - "learning_rate": 8.819876853610217e-06, - "loss": 0.5559, + "epoch": 1.1142332674385766, + "grad_norm": 3.4368769639969665, + "learning_rate": 1.4485216528222336e-05, + "loss": 0.5794, "step": 7891 }, { - "epoch": 1.6548542671419586, - "grad_norm": 6.361732821406228, - "learning_rate": 8.81762881900283e-06, - "loss": 0.6792, + "epoch": 1.1143744704885625, + "grad_norm": 3.403826365906247, + "learning_rate": 1.4483853962794218e-05, + "loss": 0.6242, "step": 7892 }, { - "epoch": 1.6550639547074857, - "grad_norm": 5.8629369787132, - "learning_rate": 8.815380844994013e-06, - "loss": 0.5539, + "epoch": 1.1145156735385484, + "grad_norm": 3.4006330416533155, + "learning_rate": 1.4482491293163273e-05, + "loss": 0.4992, "step": 7893 }, { - "epoch": 1.6552736422730132, - "grad_norm": 6.5251112158498685, - "learning_rate": 8.81313293169898e-06, - "loss": 0.8682, + "epoch": 1.1146568765885343, + "grad_norm": 2.928934174469643, + "learning_rate": 1.4481128519361163e-05, + "loss": 0.5215, "step": 7894 }, { - "epoch": 1.6554833298385407, - "grad_norm": 8.40042396561854, - "learning_rate": 8.810885079232949e-06, - "loss": 0.9727, + "epoch": 1.1147980796385202, + "grad_norm": 3.7987835059519264, + "learning_rate": 1.4479765641419561e-05, + "loss": 0.6305, "step": 7895 }, { - "epoch": 1.6556930174040678, - "grad_norm": 7.516003102280768, - "learning_rate": 8.808637287711117e-06, - "loss": 0.8874, + "epoch": 1.114939282688506, + "grad_norm": 3.344805030712181, + "learning_rate": 1.4478402659370144e-05, + "loss": 0.5601, "step": 7896 }, { - "epoch": 1.6559027049695954, - "grad_norm": 7.050681047044563, - "learning_rate": 8.80638955724869e-06, - "loss": 0.7266, + "epoch": 1.115080485738492, + "grad_norm": 4.2006141075272385, + "learning_rate": 1.4477039573244577e-05, + "loss": 0.7168, "step": 7897 }, { - "epoch": 1.6561123925351227, - "grad_norm": 7.833052915303682, - "learning_rate": 8.804141887960871e-06, - "loss": 0.9493, + "epoch": 1.1152216887884778, + "grad_norm": 3.7491839616705884, + "learning_rate": 1.447567638307455e-05, + "loss": 0.7401, "step": 7898 }, { - "epoch": 1.65632208010065, - "grad_norm": 7.195135434102008, - "learning_rate": 8.801894279962849e-06, - "loss": 0.7243, + "epoch": 1.1153628918384637, + "grad_norm": 4.776126246322276, + "learning_rate": 1.4474313088891734e-05, + "loss": 0.7319, "step": 7899 }, { - "epoch": 1.6565317676661775, - "grad_norm": 5.41166646842517, - "learning_rate": 8.799646733369827e-06, - "loss": 0.5467, + "epoch": 1.1155040948884496, + "grad_norm": 3.1544416331419365, + "learning_rate": 1.4472949690727813e-05, + "loss": 0.5832, "step": 7900 }, { - "epoch": 1.6567414552317048, - "grad_norm": 5.9291125246206695, - "learning_rate": 8.797399248296992e-06, - "loss": 0.7297, + "epoch": 1.1156452979384355, + "grad_norm": 4.608978134426898, + "learning_rate": 1.4471586188614475e-05, + "loss": 0.8741, "step": 7901 }, { - "epoch": 1.656951142797232, - "grad_norm": 6.208039422140017, - "learning_rate": 8.795151824859532e-06, - "loss": 0.6777, + "epoch": 1.1157865009884214, + "grad_norm": 3.0758037812421106, + "learning_rate": 1.4470222582583404e-05, + "loss": 0.5281, "step": 7902 }, { - "epoch": 1.6571608303627596, - "grad_norm": 7.577800940929109, - "learning_rate": 8.792904463172626e-06, - "loss": 0.9739, + "epoch": 1.1159277040384072, + "grad_norm": 3.56456740530919, + "learning_rate": 1.4468858872666292e-05, + "loss": 0.523, "step": 7903 }, { - "epoch": 1.6573705179282867, - "grad_norm": 5.202438717515364, - "learning_rate": 8.790657163351467e-06, - "loss": 0.5284, + "epoch": 1.1160689070883931, + "grad_norm": 3.9068822992952565, + "learning_rate": 1.4467495058894829e-05, + "loss": 0.6262, "step": 7904 }, { - "epoch": 1.6575802054938142, - "grad_norm": 6.611731298738552, - "learning_rate": 8.788409925511219e-06, - "loss": 0.6986, + "epoch": 1.116210110138379, + "grad_norm": 3.5907728510669803, + "learning_rate": 1.4466131141300708e-05, + "loss": 0.6103, "step": 7905 }, { - "epoch": 1.6577898930593415, - "grad_norm": 6.1960149729945195, - "learning_rate": 8.786162749767068e-06, - "loss": 0.6538, + "epoch": 1.116351313188365, + "grad_norm": 4.010818038853137, + "learning_rate": 1.446476711991563e-05, + "loss": 0.8251, "step": 7906 }, { - "epoch": 1.6579995806248689, - "grad_norm": 6.093209579388984, - "learning_rate": 8.783915636234182e-06, - "loss": 0.6627, + "epoch": 1.1164925162383508, + "grad_norm": 4.970793172637347, + "learning_rate": 1.4463402994771295e-05, + "loss": 0.8508, "step": 7907 }, { - "epoch": 1.6582092681903964, - "grad_norm": 6.654488490519788, - "learning_rate": 8.781668585027727e-06, - "loss": 0.6589, + "epoch": 1.1166337192883367, + "grad_norm": 2.7989165483081897, + "learning_rate": 1.4462038765899397e-05, + "loss": 0.5315, "step": 7908 }, { - "epoch": 1.6584189557559237, - "grad_norm": 8.942024539762086, - "learning_rate": 8.779421596262867e-06, - "loss": 0.941, + "epoch": 1.1167749223383225, + "grad_norm": 3.705768698215458, + "learning_rate": 1.4460674433331647e-05, + "loss": 0.7603, "step": 7909 }, { - "epoch": 1.658628643321451, - "grad_norm": 6.604483984739474, - "learning_rate": 8.777174670054773e-06, - "loss": 0.7595, + "epoch": 1.1169161253883084, + "grad_norm": 3.0329925131468527, + "learning_rate": 1.445930999709975e-05, + "loss": 0.5263, "step": 7910 }, { - "epoch": 1.6588383308869785, - "grad_norm": 6.480259971865652, - "learning_rate": 8.774927806518594e-06, - "loss": 0.6982, + "epoch": 1.1170573284382943, + "grad_norm": 2.9226169271081814, + "learning_rate": 1.4457945457235416e-05, + "loss": 0.5062, "step": 7911 }, { - "epoch": 1.6590480184525056, - "grad_norm": 7.291289020903068, - "learning_rate": 8.77268100576949e-06, - "loss": 0.8235, + "epoch": 1.1171985314882802, + "grad_norm": 2.7144345965121945, + "learning_rate": 1.4456580813770353e-05, + "loss": 0.573, "step": 7912 }, { - "epoch": 1.6592577060180331, - "grad_norm": 7.916026620836054, - "learning_rate": 8.770434267922616e-06, - "loss": 0.8941, + "epoch": 1.117339734538266, + "grad_norm": 3.13229739693429, + "learning_rate": 1.4455216066736278e-05, + "loss": 0.5061, "step": 7913 }, { - "epoch": 1.6594673935835607, - "grad_norm": 6.96395612257874, - "learning_rate": 8.768187593093116e-06, - "loss": 0.5578, + "epoch": 1.117480937588252, + "grad_norm": 2.9767180097400336, + "learning_rate": 1.4453851216164902e-05, + "loss": 0.5517, "step": 7914 }, { - "epoch": 1.6596770811490877, - "grad_norm": 7.690320321652676, - "learning_rate": 8.765940981396137e-06, - "loss": 0.7181, + "epoch": 1.1176221406382378, + "grad_norm": 2.8957543255816645, + "learning_rate": 1.4452486262087953e-05, + "loss": 0.5066, "step": 7915 }, { - "epoch": 1.6598867687146153, - "grad_norm": 6.978207996729425, - "learning_rate": 8.763694432946826e-06, - "loss": 0.7441, + "epoch": 1.1177633436882237, + "grad_norm": 3.6619997337740426, + "learning_rate": 1.4451121204537144e-05, + "loss": 0.6799, "step": 7916 }, { - "epoch": 1.6600964562801426, - "grad_norm": 7.342379795369436, - "learning_rate": 8.761447947860318e-06, - "loss": 0.8486, + "epoch": 1.1179045467382096, + "grad_norm": 3.36245773607999, + "learning_rate": 1.4449756043544197e-05, + "loss": 0.6311, "step": 7917 }, { - "epoch": 1.6603061438456699, - "grad_norm": 8.185879480307776, - "learning_rate": 8.759201526251751e-06, - "loss": 1.0746, + "epoch": 1.1180457497881955, + "grad_norm": 3.476816275683861, + "learning_rate": 1.4448390779140844e-05, + "loss": 0.6594, "step": 7918 }, { - "epoch": 1.6605158314111974, - "grad_norm": 7.043380014140624, - "learning_rate": 8.756955168236257e-06, - "loss": 0.6542, + "epoch": 1.1181869528381814, + "grad_norm": 3.902083070889247, + "learning_rate": 1.444702541135881e-05, + "loss": 0.81, "step": 7919 }, { - "epoch": 1.6607255189767247, - "grad_norm": 8.837546697426431, - "learning_rate": 8.754708873928969e-06, - "loss": 0.8123, + "epoch": 1.1183281558881673, + "grad_norm": 3.184969466696758, + "learning_rate": 1.4445659940229827e-05, + "loss": 0.5088, "step": 7920 }, { - "epoch": 1.660935206542252, - "grad_norm": 6.402749627382423, - "learning_rate": 8.75246264344501e-06, - "loss": 0.6905, + "epoch": 1.1184693589381531, + "grad_norm": 3.3340183550948614, + "learning_rate": 1.4444294365785627e-05, + "loss": 0.5875, "step": 7921 }, { - "epoch": 1.6611448941077795, - "grad_norm": 6.29802610229713, - "learning_rate": 8.750216476899502e-06, - "loss": 0.6896, + "epoch": 1.118610561988139, + "grad_norm": 4.639294049035789, + "learning_rate": 1.4442928688057945e-05, + "loss": 0.8231, "step": 7922 }, { - "epoch": 1.6613545816733066, - "grad_norm": 7.59738095478557, - "learning_rate": 8.747970374407571e-06, - "loss": 0.8244, + "epoch": 1.118751765038125, + "grad_norm": 3.6958372337249155, + "learning_rate": 1.4441562907078515e-05, + "loss": 0.6819, "step": 7923 }, { - "epoch": 1.6615642692388342, - "grad_norm": 7.4601749187730855, - "learning_rate": 8.745724336084327e-06, - "loss": 0.8671, + "epoch": 1.1188929680881108, + "grad_norm": 3.8052564827946473, + "learning_rate": 1.4440197022879088e-05, + "loss": 0.6306, "step": 7924 }, { - "epoch": 1.6617739568043615, - "grad_norm": 7.30553266498635, - "learning_rate": 8.743478362044885e-06, - "loss": 0.7412, + "epoch": 1.1190341711380967, + "grad_norm": 3.5808524087844447, + "learning_rate": 1.4438831035491392e-05, + "loss": 0.577, "step": 7925 }, { - "epoch": 1.6619836443698888, - "grad_norm": 7.285038067828108, - "learning_rate": 8.74123245240436e-06, - "loss": 0.5466, + "epoch": 1.1191753741880826, + "grad_norm": 3.9595287617811064, + "learning_rate": 1.4437464944947186e-05, + "loss": 0.5724, "step": 7926 }, { - "epoch": 1.6621933319354163, - "grad_norm": 7.193093442804558, - "learning_rate": 8.738986607277855e-06, - "loss": 0.89, + "epoch": 1.1193165772380684, + "grad_norm": 3.769728453735003, + "learning_rate": 1.4436098751278209e-05, + "loss": 0.73, "step": 7927 }, { - "epoch": 1.6624030195009436, - "grad_norm": 7.06242895499402, - "learning_rate": 8.73674082678047e-06, - "loss": 0.6801, + "epoch": 1.1194577802880543, + "grad_norm": 3.8000047314658465, + "learning_rate": 1.4434732454516214e-05, + "loss": 0.6584, "step": 7928 }, { - "epoch": 1.662612707066471, - "grad_norm": 8.133991032846687, - "learning_rate": 8.734495111027314e-06, - "loss": 0.9388, + "epoch": 1.1195989833380402, + "grad_norm": 3.442824658082659, + "learning_rate": 1.443336605469295e-05, + "loss": 0.6977, "step": 7929 }, { - "epoch": 1.6628223946319984, - "grad_norm": 6.684325985452376, - "learning_rate": 8.732249460133472e-06, - "loss": 0.5345, + "epoch": 1.119740186388026, + "grad_norm": 3.4098277596310584, + "learning_rate": 1.4431999551840175e-05, + "loss": 0.5824, "step": 7930 }, { - "epoch": 1.6630320821975257, - "grad_norm": 6.997096318411566, - "learning_rate": 8.730003874214047e-06, - "loss": 0.6719, + "epoch": 1.119881389438012, + "grad_norm": 3.508687191325764, + "learning_rate": 1.4430632945989643e-05, + "loss": 0.6234, "step": 7931 }, { - "epoch": 1.663241769763053, - "grad_norm": 6.503134897674551, - "learning_rate": 8.727758353384127e-06, - "loss": 0.6121, + "epoch": 1.1200225924879978, + "grad_norm": 3.0475893400491856, + "learning_rate": 1.4429266237173116e-05, + "loss": 0.5084, "step": 7932 }, { - "epoch": 1.6634514573285806, - "grad_norm": 6.640523560113847, - "learning_rate": 8.725512897758799e-06, - "loss": 0.6475, + "epoch": 1.1201637955379837, + "grad_norm": 4.239280685901594, + "learning_rate": 1.4427899425422354e-05, + "loss": 0.6969, "step": 7933 }, { - "epoch": 1.6636611448941077, - "grad_norm": 7.127556023431062, - "learning_rate": 8.72326750745314e-06, - "loss": 0.7375, + "epoch": 1.1203049985879696, + "grad_norm": 3.5199850270247652, + "learning_rate": 1.442653251076912e-05, + "loss": 0.6244, "step": 7934 }, { - "epoch": 1.6638708324596352, - "grad_norm": 8.384738109462614, - "learning_rate": 8.721022182582243e-06, - "loss": 0.9261, + "epoch": 1.1204462016379555, + "grad_norm": 3.6987858934057654, + "learning_rate": 1.4425165493245183e-05, + "loss": 0.5922, "step": 7935 }, { - "epoch": 1.6640805200251625, - "grad_norm": 7.685506103557741, - "learning_rate": 8.718776923261172e-06, - "loss": 0.8266, + "epoch": 1.1205874046879412, + "grad_norm": 3.4517869791055675, + "learning_rate": 1.4423798372882315e-05, + "loss": 0.5901, "step": 7936 }, { - "epoch": 1.6642902075906898, - "grad_norm": 7.400069930778782, - "learning_rate": 8.716531729605007e-06, - "loss": 0.8713, + "epoch": 1.120728607737927, + "grad_norm": 2.877165692422827, + "learning_rate": 1.442243114971228e-05, + "loss": 0.5053, "step": 7937 }, { - "epoch": 1.6644998951562173, - "grad_norm": 7.050206434113921, - "learning_rate": 8.71428660172882e-06, - "loss": 0.6644, + "epoch": 1.120869810787913, + "grad_norm": 3.840989322478067, + "learning_rate": 1.4421063823766855e-05, + "loss": 0.6688, "step": 7938 }, { - "epoch": 1.6647095827217446, - "grad_norm": 6.728694507009341, - "learning_rate": 8.712041539747672e-06, - "loss": 0.7154, + "epoch": 1.1210110138378988, + "grad_norm": 2.9147339201257236, + "learning_rate": 1.4419696395077816e-05, + "loss": 0.5499, "step": 7939 }, { - "epoch": 1.664919270287272, - "grad_norm": 5.800972377152287, - "learning_rate": 8.709796543776626e-06, - "loss": 0.6231, + "epoch": 1.1211522168878847, + "grad_norm": 3.855218325141458, + "learning_rate": 1.441832886367694e-05, + "loss": 0.6966, "step": 7940 }, { - "epoch": 1.6651289578527995, - "grad_norm": 7.135230790133888, - "learning_rate": 8.707551613930753e-06, - "loss": 0.7436, + "epoch": 1.1212934199378706, + "grad_norm": 3.616300304653319, + "learning_rate": 1.4416961229596013e-05, + "loss": 0.6364, "step": 7941 }, { - "epoch": 1.6653386454183265, - "grad_norm": 7.279366435575045, - "learning_rate": 8.705306750325093e-06, - "loss": 0.8324, + "epoch": 1.1214346229878565, + "grad_norm": 3.994384697940939, + "learning_rate": 1.441559349286681e-05, + "loss": 0.6097, "step": 7942 }, { - "epoch": 1.665548332983854, - "grad_norm": 6.87544505970825, - "learning_rate": 8.703061953074711e-06, - "loss": 0.8227, + "epoch": 1.1215758260378423, + "grad_norm": 3.5763703165175356, + "learning_rate": 1.4414225653521125e-05, + "loss": 0.6927, "step": 7943 }, { - "epoch": 1.6657580205493814, - "grad_norm": 8.186348512899643, - "learning_rate": 8.700817222294655e-06, - "loss": 0.8456, + "epoch": 1.1217170290878282, + "grad_norm": 3.1831095652791, + "learning_rate": 1.4412857711590742e-05, + "loss": 0.5492, "step": 7944 }, { - "epoch": 1.6659677081149087, - "grad_norm": 8.495462602433276, - "learning_rate": 8.698572558099968e-06, - "loss": 0.7951, + "epoch": 1.121858232137814, + "grad_norm": 4.196942667327182, + "learning_rate": 1.441148966710745e-05, + "loss": 0.7177, "step": 7945 }, { - "epoch": 1.6661773956804362, - "grad_norm": 7.315423616878592, - "learning_rate": 8.696327960605692e-06, - "loss": 0.8457, + "epoch": 1.1219994351878, + "grad_norm": 4.237336048233413, + "learning_rate": 1.4410121520103045e-05, + "loss": 0.6281, "step": 7946 }, { - "epoch": 1.6663870832459635, - "grad_norm": 6.937869589563069, - "learning_rate": 8.694083429926874e-06, - "loss": 0.8152, + "epoch": 1.1221406382377859, + "grad_norm": 3.3103023495428046, + "learning_rate": 1.4408753270609318e-05, + "loss": 0.5448, "step": 7947 }, { - "epoch": 1.6665967708114908, - "grad_norm": 6.44761682817487, - "learning_rate": 8.691838966178542e-06, - "loss": 0.555, + "epoch": 1.1222818412877718, + "grad_norm": 3.2081056077624037, + "learning_rate": 1.440738491865807e-05, + "loss": 0.5696, "step": 7948 }, { - "epoch": 1.6668064583770184, - "grad_norm": 8.031872900507722, - "learning_rate": 8.689594569475732e-06, - "loss": 0.6499, + "epoch": 1.1224230443377576, + "grad_norm": 3.5149348505291873, + "learning_rate": 1.44060164642811e-05, + "loss": 0.648, "step": 7949 }, { - "epoch": 1.6670161459425457, - "grad_norm": 8.077811337552681, - "learning_rate": 8.687350239933476e-06, - "loss": 0.8433, + "epoch": 1.1225642473877435, + "grad_norm": 4.092007143277288, + "learning_rate": 1.4404647907510212e-05, + "loss": 0.6178, "step": 7950 }, { - "epoch": 1.667225833508073, - "grad_norm": 6.540601450125432, - "learning_rate": 8.685105977666793e-06, - "loss": 0.6988, + "epoch": 1.1227054504377294, + "grad_norm": 4.103672124058433, + "learning_rate": 1.4403279248377207e-05, + "loss": 0.6801, "step": 7951 }, { - "epoch": 1.6674355210736005, - "grad_norm": 5.925902040233703, - "learning_rate": 8.68286178279071e-06, - "loss": 0.7093, + "epoch": 1.1228466534877153, + "grad_norm": 3.72210190572086, + "learning_rate": 1.4401910486913892e-05, + "loss": 0.7458, "step": 7952 }, { - "epoch": 1.6676452086391276, - "grad_norm": 7.539617421179522, - "learning_rate": 8.680617655420248e-06, - "loss": 0.7996, + "epoch": 1.1229878565377012, + "grad_norm": 3.4625328086633274, + "learning_rate": 1.4400541623152084e-05, + "loss": 0.6479, "step": 7953 }, { - "epoch": 1.667854896204655, - "grad_norm": 9.000955619924227, - "learning_rate": 8.678373595670417e-06, - "loss": 1.0206, + "epoch": 1.123129059587687, + "grad_norm": 3.9122006971772265, + "learning_rate": 1.4399172657123584e-05, + "loss": 0.6577, "step": 7954 }, { - "epoch": 1.6680645837701824, - "grad_norm": 5.7631604169046495, - "learning_rate": 8.676129603656232e-06, - "loss": 0.5531, + "epoch": 1.123270262637673, + "grad_norm": 3.220392244991836, + "learning_rate": 1.4397803588860213e-05, + "loss": 0.5251, "step": 7955 }, { - "epoch": 1.6682742713357097, - "grad_norm": 6.64776660053362, - "learning_rate": 8.673885679492702e-06, - "loss": 0.7323, + "epoch": 1.1234114656876588, + "grad_norm": 3.497784611356537, + "learning_rate": 1.4396434418393786e-05, + "loss": 0.6971, "step": 7956 }, { - "epoch": 1.6684839589012372, - "grad_norm": 6.363107573233188, - "learning_rate": 8.67164182329483e-06, - "loss": 0.7072, + "epoch": 1.1235526687376447, + "grad_norm": 3.831570551997078, + "learning_rate": 1.4395065145756118e-05, + "loss": 0.6927, "step": 7957 }, { - "epoch": 1.6686936464667645, - "grad_norm": 7.7207073421066275, - "learning_rate": 8.669398035177617e-06, - "loss": 0.9101, + "epoch": 1.1236938717876306, + "grad_norm": 3.317478727476699, + "learning_rate": 1.4393695770979038e-05, + "loss": 0.5294, "step": 7958 }, { - "epoch": 1.6689033340322919, - "grad_norm": 5.865041843001931, - "learning_rate": 8.667154315256066e-06, - "loss": 0.64, + "epoch": 1.1238350748376165, + "grad_norm": 3.413107251677628, + "learning_rate": 1.4392326294094365e-05, + "loss": 0.6854, "step": 7959 }, { - "epoch": 1.6691130215978194, - "grad_norm": 6.759500038707333, - "learning_rate": 8.664910663645166e-06, - "loss": 0.6285, + "epoch": 1.1239762778876023, + "grad_norm": 3.0376805576092005, + "learning_rate": 1.4390956715133928e-05, + "loss": 0.5363, "step": 7960 }, { - "epoch": 1.6693227091633465, - "grad_norm": 8.33527194595454, - "learning_rate": 8.662667080459906e-06, - "loss": 1.1407, + "epoch": 1.1241174809375882, + "grad_norm": 3.6833449997947074, + "learning_rate": 1.438958703412955e-05, + "loss": 0.5677, "step": 7961 }, { - "epoch": 1.669532396728874, - "grad_norm": 6.72025731500222, - "learning_rate": 8.660423565815285e-06, - "loss": 0.821, + "epoch": 1.1242586839875741, + "grad_norm": 3.1442189866414827, + "learning_rate": 1.4388217251113068e-05, + "loss": 0.514, "step": 7962 }, { - "epoch": 1.6697420842944013, - "grad_norm": 6.974269472223837, - "learning_rate": 8.658180119826275e-06, - "loss": 0.6479, + "epoch": 1.12439988703756, + "grad_norm": 3.4723783741248346, + "learning_rate": 1.4386847366116313e-05, + "loss": 0.5331, "step": 7963 }, { - "epoch": 1.6699517718599286, - "grad_norm": 6.2427910447622015, - "learning_rate": 8.655936742607862e-06, - "loss": 0.7162, + "epoch": 1.1245410900875459, + "grad_norm": 2.999395593588539, + "learning_rate": 1.4385477379171116e-05, + "loss": 0.5202, "step": 7964 }, { - "epoch": 1.6701614594254561, - "grad_norm": 6.257374494710077, - "learning_rate": 8.653693434275024e-06, - "loss": 0.575, + "epoch": 1.1246822931375318, + "grad_norm": 3.756574244699541, + "learning_rate": 1.4384107290309317e-05, + "loss": 0.706, "step": 7965 }, { - "epoch": 1.6703711469909834, - "grad_norm": 7.222025461798093, - "learning_rate": 8.651450194942732e-06, - "loss": 0.7048, + "epoch": 1.1248234961875176, + "grad_norm": 2.943707967649355, + "learning_rate": 1.4382737099562765e-05, + "loss": 0.61, "step": 7966 }, { - "epoch": 1.6705808345565107, - "grad_norm": 6.783236509834643, - "learning_rate": 8.649207024725951e-06, - "loss": 0.6906, + "epoch": 1.1249646992375035, + "grad_norm": 3.295947557751666, + "learning_rate": 1.4381366806963291e-05, + "loss": 0.5894, "step": 7967 }, { - "epoch": 1.6707905221220383, - "grad_norm": 8.073130144813996, - "learning_rate": 8.64696392373966e-06, - "loss": 0.9674, + "epoch": 1.1251059022874894, + "grad_norm": 3.9318882015101493, + "learning_rate": 1.4379996412542742e-05, + "loss": 0.5938, "step": 7968 }, { - "epoch": 1.6710002096875656, - "grad_norm": 7.533899201903426, - "learning_rate": 8.644720892098812e-06, - "loss": 0.7443, + "epoch": 1.1252471053374753, + "grad_norm": 3.2779360004570157, + "learning_rate": 1.4378625916332971e-05, + "loss": 0.6174, "step": 7969 }, { - "epoch": 1.6712098972530929, - "grad_norm": 7.1771266238803735, - "learning_rate": 8.64247792991837e-06, - "loss": 0.7647, + "epoch": 1.1253883083874612, + "grad_norm": 3.5026178708860396, + "learning_rate": 1.4377255318365828e-05, + "loss": 0.6425, "step": 7970 }, { - "epoch": 1.6714195848186204, - "grad_norm": 6.81172204387807, - "learning_rate": 8.640235037313289e-06, - "loss": 0.5971, + "epoch": 1.125529511437447, + "grad_norm": 3.933042695927975, + "learning_rate": 1.4375884618673156e-05, + "loss": 0.798, "step": 7971 }, { - "epoch": 1.6716292723841475, - "grad_norm": 8.73713069557914, - "learning_rate": 8.637992214398519e-06, - "loss": 0.9854, + "epoch": 1.125670714487433, + "grad_norm": 3.625369035897644, + "learning_rate": 1.4374513817286819e-05, + "loss": 0.5973, "step": 7972 }, { - "epoch": 1.671838959949675, - "grad_norm": 5.940784803064217, - "learning_rate": 8.635749461289007e-06, - "loss": 0.5894, + "epoch": 1.1258119175374188, + "grad_norm": 3.1590705228041998, + "learning_rate": 1.4373142914238669e-05, + "loss": 0.5434, "step": 7973 }, { - "epoch": 1.6720486475152023, - "grad_norm": 8.179758079792682, - "learning_rate": 8.633506778099708e-06, - "loss": 0.8216, + "epoch": 1.1259531205874047, + "grad_norm": 4.38318676199993, + "learning_rate": 1.4371771909560566e-05, + "loss": 0.7327, "step": 7974 }, { - "epoch": 1.6722583350807296, - "grad_norm": 8.146931451699574, - "learning_rate": 8.631264164945551e-06, - "loss": 0.8369, + "epoch": 1.1260943236373906, + "grad_norm": 3.6657412804290535, + "learning_rate": 1.4370400803284374e-05, + "loss": 0.6319, "step": 7975 }, { - "epoch": 1.6724680226462572, - "grad_norm": 8.496235430840583, - "learning_rate": 8.629021621941482e-06, - "loss": 0.9198, + "epoch": 1.1262355266873765, + "grad_norm": 2.7155885274217315, + "learning_rate": 1.4369029595441953e-05, + "loss": 0.557, "step": 7976 }, { - "epoch": 1.6726777102117845, - "grad_norm": 6.521546106626131, - "learning_rate": 8.626779149202436e-06, - "loss": 0.7298, + "epoch": 1.1263767297373624, + "grad_norm": 3.6045340765390037, + "learning_rate": 1.4367658286065168e-05, + "loss": 0.6744, "step": 7977 }, { - "epoch": 1.6728873977773118, - "grad_norm": 5.430366371788389, - "learning_rate": 8.624536746843334e-06, - "loss": 0.4953, + "epoch": 1.1265179327873482, + "grad_norm": 3.8483966727508845, + "learning_rate": 1.4366286875185895e-05, + "loss": 0.6875, "step": 7978 }, { - "epoch": 1.6730970853428393, - "grad_norm": 9.3905806556264, - "learning_rate": 8.62229441497911e-06, - "loss": 0.8371, + "epoch": 1.1266591358373341, + "grad_norm": 3.619813848703416, + "learning_rate": 1.4364915362835999e-05, + "loss": 0.6724, "step": 7979 }, { - "epoch": 1.6733067729083664, - "grad_norm": 6.917407619280939, - "learning_rate": 8.62005215372469e-06, - "loss": 0.7659, + "epoch": 1.12680033888732, + "grad_norm": 3.5649810468131324, + "learning_rate": 1.4363543749047354e-05, + "loss": 0.6184, "step": 7980 }, { - "epoch": 1.673516460473894, - "grad_norm": 7.608628742628341, - "learning_rate": 8.617809963194992e-06, - "loss": 0.8316, + "epoch": 1.126941541937306, + "grad_norm": 4.372705927484826, + "learning_rate": 1.436217203385184e-05, + "loss": 0.7917, "step": 7981 }, { - "epoch": 1.6737261480394212, - "grad_norm": 6.708058395228349, - "learning_rate": 8.615567843504925e-06, - "loss": 0.7325, + "epoch": 1.1270827449872918, + "grad_norm": 3.204333960688123, + "learning_rate": 1.436080021728133e-05, + "loss": 0.5321, "step": 7982 }, { - "epoch": 1.6739358356049485, - "grad_norm": 7.804047468555352, - "learning_rate": 8.613325794769411e-06, - "loss": 0.792, + "epoch": 1.1272239480372777, + "grad_norm": 3.081117257736331, + "learning_rate": 1.4359428299367707e-05, + "loss": 0.5845, "step": 7983 }, { - "epoch": 1.674145523170476, - "grad_norm": 6.997780450640977, - "learning_rate": 8.611083817103356e-06, - "loss": 0.724, + "epoch": 1.1273651510872635, + "grad_norm": 2.3727841395344753, + "learning_rate": 1.4358056280142851e-05, + "loss": 0.4592, "step": 7984 }, { - "epoch": 1.6743552107360034, - "grad_norm": 6.9139058076484705, - "learning_rate": 8.608841910621662e-06, - "loss": 0.813, + "epoch": 1.1275063541372494, + "grad_norm": 3.7718101687526215, + "learning_rate": 1.4356684159638647e-05, + "loss": 0.7019, "step": 7985 }, { - "epoch": 1.6745648983015307, - "grad_norm": 7.99163256020912, - "learning_rate": 8.606600075439234e-06, - "loss": 0.9029, + "epoch": 1.1276475571872353, + "grad_norm": 4.401860308044719, + "learning_rate": 1.4355311937886988e-05, + "loss": 0.8887, "step": 7986 }, { - "epoch": 1.6747745858670582, - "grad_norm": 8.114803515769998, - "learning_rate": 8.60435831167097e-06, - "loss": 0.779, + "epoch": 1.1277887602372212, + "grad_norm": 3.7985635063426937, + "learning_rate": 1.4353939614919763e-05, + "loss": 0.5451, "step": 7987 }, { - "epoch": 1.6749842734325855, - "grad_norm": 8.015682362584512, - "learning_rate": 8.60211661943176e-06, - "loss": 0.8846, + "epoch": 1.127929963287207, + "grad_norm": 3.5271589522757716, + "learning_rate": 1.4352567190768859e-05, + "loss": 0.5388, "step": 7988 }, { - "epoch": 1.6751939609981128, - "grad_norm": 6.843713158650937, - "learning_rate": 8.599874998836499e-06, - "loss": 0.7526, + "epoch": 1.128071166337193, + "grad_norm": 4.0730218919175005, + "learning_rate": 1.4351194665466173e-05, + "loss": 0.6806, "step": 7989 }, { - "epoch": 1.6754036485636403, - "grad_norm": 8.072510770494533, - "learning_rate": 8.597633450000076e-06, - "loss": 0.9538, + "epoch": 1.1282123693871788, + "grad_norm": 3.3661866266448643, + "learning_rate": 1.4349822039043602e-05, + "loss": 0.5363, "step": 7990 }, { - "epoch": 1.6756133361291674, - "grad_norm": 6.692364131144934, - "learning_rate": 8.595391973037368e-06, - "loss": 0.7711, + "epoch": 1.1283535724371647, + "grad_norm": 3.580187011081078, + "learning_rate": 1.4348449311533043e-05, + "loss": 0.5436, "step": 7991 }, { - "epoch": 1.675823023694695, - "grad_norm": 6.818287099161792, - "learning_rate": 8.593150568063255e-06, - "loss": 0.7255, + "epoch": 1.1284947754871506, + "grad_norm": 3.0649271113437626, + "learning_rate": 1.4347076482966404e-05, + "loss": 0.4958, "step": 7992 }, { - "epoch": 1.6760327112602222, - "grad_norm": 5.995940372667456, - "learning_rate": 8.590909235192622e-06, - "loss": 0.5283, + "epoch": 1.1286359785371365, + "grad_norm": 3.109031077366945, + "learning_rate": 1.4345703553375585e-05, + "loss": 0.5205, "step": 7993 }, { - "epoch": 1.6762423988257495, - "grad_norm": 8.206040461476935, - "learning_rate": 8.58866797454033e-06, - "loss": 0.7554, + "epoch": 1.1287771815871224, + "grad_norm": 3.9196086108273427, + "learning_rate": 1.4344330522792489e-05, + "loss": 0.7327, "step": 7994 }, { - "epoch": 1.676452086391277, - "grad_norm": 5.896634995020528, - "learning_rate": 8.586426786221251e-06, - "loss": 0.6032, + "epoch": 1.1289183846371083, + "grad_norm": 4.007752223044708, + "learning_rate": 1.434295739124903e-05, + "loss": 0.6488, "step": 7995 }, { - "epoch": 1.6766617739568044, - "grad_norm": 5.49976187378121, - "learning_rate": 8.584185670350256e-06, - "loss": 0.6236, + "epoch": 1.1290595876870941, + "grad_norm": 3.2037687643685695, + "learning_rate": 1.4341584158777115e-05, + "loss": 0.5453, "step": 7996 }, { - "epoch": 1.6768714615223317, - "grad_norm": 7.888853783740242, - "learning_rate": 8.581944627042198e-06, - "loss": 0.8424, + "epoch": 1.12920079073708, + "grad_norm": 4.8321674792305265, + "learning_rate": 1.4340210825408663e-05, + "loss": 0.7488, "step": 7997 }, { - "epoch": 1.6770811490878592, - "grad_norm": 5.6785441124012515, - "learning_rate": 8.579703656411933e-06, - "loss": 0.6689, + "epoch": 1.1293419937870657, + "grad_norm": 3.1158658712236322, + "learning_rate": 1.4338837391175582e-05, + "loss": 0.5715, "step": 7998 }, { - "epoch": 1.6772908366533863, - "grad_norm": 6.4909018131165075, - "learning_rate": 8.577462758574327e-06, - "loss": 0.5982, + "epoch": 1.1294831968370516, + "grad_norm": 4.384202269682889, + "learning_rate": 1.4337463856109796e-05, + "loss": 0.5558, "step": 7999 }, { - "epoch": 1.6775005242189138, - "grad_norm": 6.80960107317987, - "learning_rate": 8.575221933644217e-06, - "loss": 0.7077, + "epoch": 1.1296243998870374, + "grad_norm": 3.459322116799393, + "learning_rate": 1.4336090220243222e-05, + "loss": 0.5719, "step": 8000 }, { - "epoch": 1.6777102117844411, - "grad_norm": 6.663749593843585, - "learning_rate": 8.572981181736455e-06, - "loss": 0.6556, + "epoch": 1.1297656029370233, + "grad_norm": 4.088307199936969, + "learning_rate": 1.4334716483607785e-05, + "loss": 0.7876, "step": 8001 }, { - "epoch": 1.6779198993499684, - "grad_norm": 6.371710797541373, - "learning_rate": 8.570740502965884e-06, - "loss": 0.5609, + "epoch": 1.1299068059870092, + "grad_norm": 4.929629642313057, + "learning_rate": 1.4333342646235407e-05, + "loss": 0.8954, "step": 8002 }, { - "epoch": 1.678129586915496, - "grad_norm": 5.893576674590227, - "learning_rate": 8.56849989744734e-06, - "loss": 0.6534, + "epoch": 1.130048009036995, + "grad_norm": 3.6197237824251167, + "learning_rate": 1.433196870815802e-05, + "loss": 0.6106, "step": 8003 }, { - "epoch": 1.6783392744810233, - "grad_norm": 7.46259514234426, - "learning_rate": 8.566259365295656e-06, - "loss": 0.7833, + "epoch": 1.130189212086981, + "grad_norm": 4.036390912956681, + "learning_rate": 1.433059466940755e-05, + "loss": 0.6077, "step": 8004 }, { - "epoch": 1.6785489620465506, - "grad_norm": 5.492003604128599, - "learning_rate": 8.564018906625671e-06, - "loss": 0.5244, + "epoch": 1.1303304151369669, + "grad_norm": 3.464229162365807, + "learning_rate": 1.4329220530015928e-05, + "loss": 0.5948, "step": 8005 }, { - "epoch": 1.678758649612078, - "grad_norm": 5.807446988057185, - "learning_rate": 8.561778521552206e-06, - "loss": 0.6379, + "epoch": 1.1304716181869527, + "grad_norm": 3.2297970336805366, + "learning_rate": 1.4327846290015094e-05, + "loss": 0.5848, "step": 8006 }, { - "epoch": 1.6789683371776054, - "grad_norm": 6.620500762795703, - "learning_rate": 8.559538210190086e-06, - "loss": 0.6097, + "epoch": 1.1306128212369386, + "grad_norm": 3.1033205424633983, + "learning_rate": 1.4326471949436977e-05, + "loss": 0.5241, "step": 8007 }, { - "epoch": 1.6791780247431327, - "grad_norm": 7.835773203155217, - "learning_rate": 8.557297972654132e-06, - "loss": 0.8796, + "epoch": 1.1307540242869245, + "grad_norm": 3.563990061420415, + "learning_rate": 1.4325097508313524e-05, + "loss": 0.5485, "step": 8008 }, { - "epoch": 1.6793877123086602, - "grad_norm": 7.909440111544509, - "learning_rate": 8.555057809059157e-06, - "loss": 0.8656, + "epoch": 1.1308952273369104, + "grad_norm": 3.2317305462116876, + "learning_rate": 1.4323722966676671e-05, + "loss": 0.6201, "step": 8009 }, { - "epoch": 1.6795973998741873, - "grad_norm": 6.48791835471679, - "learning_rate": 8.552817719519974e-06, - "loss": 0.7394, + "epoch": 1.1310364303868963, + "grad_norm": 3.4773652501691275, + "learning_rate": 1.4322348324558366e-05, + "loss": 0.5111, "step": 8010 }, { - "epoch": 1.6798070874397149, - "grad_norm": 7.500786048208998, - "learning_rate": 8.550577704151399e-06, - "loss": 0.8444, + "epoch": 1.1311776334368822, + "grad_norm": 2.9880192659032, + "learning_rate": 1.4320973581990548e-05, + "loss": 0.5407, "step": 8011 }, { - "epoch": 1.6800167750052422, - "grad_norm": 5.251224581490754, - "learning_rate": 8.548337763068227e-06, - "loss": 0.5368, + "epoch": 1.131318836486868, + "grad_norm": 3.7692026447828604, + "learning_rate": 1.4319598739005174e-05, + "loss": 0.643, "step": 8012 }, { - "epoch": 1.6802264625707695, - "grad_norm": 6.95714058500969, - "learning_rate": 8.546097896385259e-06, - "loss": 0.8292, + "epoch": 1.131460039536854, + "grad_norm": 3.6703571986758794, + "learning_rate": 1.431822379563419e-05, + "loss": 0.7077, "step": 8013 }, { - "epoch": 1.680436150136297, - "grad_norm": 6.009253704792945, - "learning_rate": 8.543858104217304e-06, - "loss": 0.5852, + "epoch": 1.1316012425868398, + "grad_norm": 3.3797312949930096, + "learning_rate": 1.431684875190955e-05, + "loss": 0.5443, "step": 8014 }, { - "epoch": 1.6806458377018243, - "grad_norm": 8.533104719606097, - "learning_rate": 8.541618386679138e-06, - "loss": 0.9196, + "epoch": 1.1317424456368257, + "grad_norm": 3.3223370247385913, + "learning_rate": 1.4315473607863206e-05, + "loss": 0.4807, "step": 8015 }, { - "epoch": 1.6808555252673516, - "grad_norm": 5.700445457075318, - "learning_rate": 8.539378743885564e-06, - "loss": 0.6881, + "epoch": 1.1318836486868116, + "grad_norm": 4.5717501029329926, + "learning_rate": 1.4314098363527122e-05, + "loss": 0.6158, "step": 8016 }, { - "epoch": 1.6810652128328791, - "grad_norm": 5.359108099220697, - "learning_rate": 8.537139175951362e-06, - "loss": 0.5455, + "epoch": 1.1320248517367975, + "grad_norm": 4.420681644562101, + "learning_rate": 1.4312723018933257e-05, + "loss": 0.7762, "step": 8017 }, { - "epoch": 1.6812749003984062, - "grad_norm": 6.452091949045865, - "learning_rate": 8.534899682991315e-06, - "loss": 0.6682, + "epoch": 1.1321660547867833, + "grad_norm": 3.5560078947528018, + "learning_rate": 1.431134757411357e-05, + "loss": 0.5924, "step": 8018 }, { - "epoch": 1.6814845879639337, - "grad_norm": 5.767348710092066, - "learning_rate": 8.532660265120195e-06, - "loss": 0.6773, + "epoch": 1.1323072578367692, + "grad_norm": 3.326806329440733, + "learning_rate": 1.4309972029100026e-05, + "loss": 0.5385, "step": 8019 }, { - "epoch": 1.6816942755294613, - "grad_norm": 7.390001888198283, - "learning_rate": 8.530420922452792e-06, - "loss": 0.86, + "epoch": 1.132448460886755, + "grad_norm": 3.0965945327170585, + "learning_rate": 1.4308596383924593e-05, + "loss": 0.5705, "step": 8020 }, { - "epoch": 1.6819039630949884, - "grad_norm": 9.175739609780958, - "learning_rate": 8.528181655103856e-06, - "loss": 0.8896, + "epoch": 1.132589663936741, + "grad_norm": 4.532091990120125, + "learning_rate": 1.4307220638619244e-05, + "loss": 0.8211, "step": 8021 }, { - "epoch": 1.6821136506605159, - "grad_norm": 7.130144749929262, - "learning_rate": 8.525942463188165e-06, - "loss": 0.8211, + "epoch": 1.1327308669867269, + "grad_norm": 3.3676096115847067, + "learning_rate": 1.4305844793215943e-05, + "loss": 0.6195, "step": 8022 }, { - "epoch": 1.6823233382260432, - "grad_norm": 6.707265513893993, - "learning_rate": 8.523703346820482e-06, - "loss": 0.7068, + "epoch": 1.1328720700367128, + "grad_norm": 4.505915134112574, + "learning_rate": 1.4304468847746673e-05, + "loss": 0.7486, "step": 8023 }, { - "epoch": 1.6825330257915705, - "grad_norm": 8.058055461919542, - "learning_rate": 8.521464306115562e-06, - "loss": 0.993, + "epoch": 1.1330132730866986, + "grad_norm": 3.9072039099521194, + "learning_rate": 1.4303092802243404e-05, + "loss": 0.6677, "step": 8024 }, { - "epoch": 1.682742713357098, - "grad_norm": 6.913546534591349, - "learning_rate": 8.519225341188157e-06, - "loss": 0.7853, + "epoch": 1.1331544761366845, + "grad_norm": 3.371072509361814, + "learning_rate": 1.4301716656738116e-05, + "loss": 0.5629, "step": 8025 }, { - "epoch": 1.6829524009226253, - "grad_norm": 7.280290277923408, - "learning_rate": 8.516986452153026e-06, - "loss": 0.6362, + "epoch": 1.1332956791866704, + "grad_norm": 3.319081543617124, + "learning_rate": 1.4300340411262792e-05, + "loss": 0.6208, "step": 8026 }, { - "epoch": 1.6831620884881526, - "grad_norm": 6.784915773492927, - "learning_rate": 8.514747639124912e-06, - "loss": 0.7801, + "epoch": 1.1334368822366563, + "grad_norm": 3.33618824226898, + "learning_rate": 1.4298964065849412e-05, + "loss": 0.552, "step": 8027 }, { - "epoch": 1.6833717760536802, - "grad_norm": 5.865843341449276, - "learning_rate": 8.512508902218555e-06, - "loss": 0.619, + "epoch": 1.1335780852866422, + "grad_norm": 4.134918301489257, + "learning_rate": 1.4297587620529965e-05, + "loss": 0.7376, "step": 8028 }, { - "epoch": 1.6835814636192072, - "grad_norm": 7.540393017189529, - "learning_rate": 8.510270241548699e-06, - "loss": 0.8786, + "epoch": 1.133719288336628, + "grad_norm": 3.417682290141868, + "learning_rate": 1.4296211075336437e-05, + "loss": 0.6469, "step": 8029 }, { - "epoch": 1.6837911511847348, - "grad_norm": 7.777346796871491, - "learning_rate": 8.508031657230072e-06, - "loss": 0.7917, + "epoch": 1.133860491386614, + "grad_norm": 3.942233335081193, + "learning_rate": 1.4294834430300822e-05, + "loss": 0.6173, "step": 8030 }, { - "epoch": 1.684000838750262, - "grad_norm": 6.188350220507667, - "learning_rate": 8.505793149377415e-06, - "loss": 0.6743, + "epoch": 1.1340016944365998, + "grad_norm": 3.3919699016721148, + "learning_rate": 1.4293457685455106e-05, + "loss": 0.7197, "step": 8031 }, { - "epoch": 1.6842105263157894, - "grad_norm": 5.722682114044694, - "learning_rate": 8.50355471810545e-06, - "loss": 0.7378, + "epoch": 1.1341428974865857, + "grad_norm": 3.262675953380986, + "learning_rate": 1.4292080840831289e-05, + "loss": 0.5181, "step": 8032 }, { - "epoch": 1.684420213881317, - "grad_norm": 6.69646675371456, - "learning_rate": 8.5013163635289e-06, - "loss": 0.6308, + "epoch": 1.1342841005365716, + "grad_norm": 3.669778902693423, + "learning_rate": 1.4290703896461367e-05, + "loss": 0.6159, "step": 8033 }, { - "epoch": 1.6846299014468442, - "grad_norm": 6.7043084921855955, - "learning_rate": 8.499078085762487e-06, - "loss": 0.8286, + "epoch": 1.1344253035865575, + "grad_norm": 3.272941509852461, + "learning_rate": 1.4289326852377339e-05, + "loss": 0.642, "step": 8034 }, { - "epoch": 1.6848395890123715, - "grad_norm": 5.571066289421345, - "learning_rate": 8.496839884920926e-06, - "loss": 0.4789, + "epoch": 1.1345665066365433, + "grad_norm": 3.950748063640626, + "learning_rate": 1.428794970861121e-05, + "loss": 0.6363, "step": 8035 }, { - "epoch": 1.685049276577899, - "grad_norm": 5.77492340856858, - "learning_rate": 8.494601761118923e-06, - "loss": 0.5743, + "epoch": 1.1347077096865292, + "grad_norm": 3.0700171775654463, + "learning_rate": 1.4286572465194977e-05, + "loss": 0.5542, "step": 8036 }, { - "epoch": 1.6852589641434261, - "grad_norm": 6.59637512418809, - "learning_rate": 8.492363714471194e-06, - "loss": 0.628, + "epoch": 1.1348489127365151, + "grad_norm": 3.59033521870062, + "learning_rate": 1.4285195122160652e-05, + "loss": 0.7387, "step": 8037 }, { - "epoch": 1.6854686517089537, - "grad_norm": 6.301564052694155, - "learning_rate": 8.490125745092443e-06, - "loss": 0.7673, + "epoch": 1.134990115786501, + "grad_norm": 4.415996621530364, + "learning_rate": 1.4283817679540246e-05, + "loss": 0.8377, "step": 8038 }, { - "epoch": 1.6856783392744812, - "grad_norm": 6.05285412749015, - "learning_rate": 8.487887853097363e-06, - "loss": 0.6635, + "epoch": 1.1351313188364869, + "grad_norm": 3.1221454208875237, + "learning_rate": 1.4282440137365762e-05, + "loss": 0.6553, "step": 8039 }, { - "epoch": 1.6858880268400083, - "grad_norm": 6.835660195856901, - "learning_rate": 8.485650038600649e-06, - "loss": 0.8073, + "epoch": 1.1352725218864728, + "grad_norm": 2.588264556454677, + "learning_rate": 1.4281062495669224e-05, + "loss": 0.471, "step": 8040 }, { - "epoch": 1.6860977144055358, - "grad_norm": 7.414508280602449, - "learning_rate": 8.483412301717002e-06, - "loss": 0.9519, + "epoch": 1.1354137249364586, + "grad_norm": 3.5446844992314044, + "learning_rate": 1.427968475448264e-05, + "loss": 0.5623, "step": 8041 }, { - "epoch": 1.686307401971063, - "grad_norm": 6.194657060471599, - "learning_rate": 8.481174642561105e-06, - "loss": 0.5978, + "epoch": 1.1355549279864445, + "grad_norm": 3.0308874592429778, + "learning_rate": 1.4278306913838034e-05, + "loss": 0.6026, "step": 8042 }, { - "epoch": 1.6865170895365904, - "grad_norm": 5.684880897112377, - "learning_rate": 8.47893706124764e-06, - "loss": 0.6504, + "epoch": 1.1356961310364304, + "grad_norm": 3.887352159434565, + "learning_rate": 1.4276928973767418e-05, + "loss": 0.6056, "step": 8043 }, { - "epoch": 1.686726777102118, - "grad_norm": 8.617961456899883, - "learning_rate": 8.476699557891288e-06, - "loss": 0.9196, + "epoch": 1.1358373340864163, + "grad_norm": 4.405600416081371, + "learning_rate": 1.4275550934302822e-05, + "loss": 0.789, "step": 8044 }, { - "epoch": 1.6869364646676452, - "grad_norm": 6.747033439753016, - "learning_rate": 8.47446213260673e-06, - "loss": 0.6528, + "epoch": 1.1359785371364022, + "grad_norm": 3.3246968497931495, + "learning_rate": 1.4274172795476267e-05, + "loss": 0.6166, "step": 8045 }, { - "epoch": 1.6871461522331725, - "grad_norm": 7.674984968295738, - "learning_rate": 8.47222478550863e-06, - "loss": 0.765, + "epoch": 1.136119740186388, + "grad_norm": 3.726343988740029, + "learning_rate": 1.4272794557319785e-05, + "loss": 0.6553, "step": 8046 }, { - "epoch": 1.6873558397987, - "grad_norm": 7.049999230413108, - "learning_rate": 8.469987516711661e-06, - "loss": 0.7092, + "epoch": 1.136260943236374, + "grad_norm": 3.5068900025639045, + "learning_rate": 1.4271416219865403e-05, + "loss": 0.5735, "step": 8047 }, { - "epoch": 1.6875655273642272, - "grad_norm": 7.657758874030204, - "learning_rate": 8.467750326330487e-06, - "loss": 0.6388, + "epoch": 1.1364021462863598, + "grad_norm": 3.866590982362031, + "learning_rate": 1.427003778314515e-05, + "loss": 0.6142, "step": 8048 }, { - "epoch": 1.6877752149297547, - "grad_norm": 5.256210060462586, - "learning_rate": 8.465513214479763e-06, - "loss": 0.4683, + "epoch": 1.1365433493363457, + "grad_norm": 3.499273493846996, + "learning_rate": 1.4268659247191066e-05, + "loss": 0.6598, "step": 8049 }, { - "epoch": 1.687984902495282, - "grad_norm": 7.4660354131694735, - "learning_rate": 8.463276181274146e-06, - "loss": 0.7944, + "epoch": 1.1366845523863316, + "grad_norm": 3.4072538372772736, + "learning_rate": 1.426728061203518e-05, + "loss": 0.4915, "step": 8050 }, { - "epoch": 1.6881945900608093, - "grad_norm": 7.0439093485868325, - "learning_rate": 8.461039226828299e-06, - "loss": 0.6515, + "epoch": 1.1368257554363175, + "grad_norm": 3.529738661978145, + "learning_rate": 1.4265901877709541e-05, + "loss": 0.5996, "step": 8051 }, { - "epoch": 1.6884042776263368, - "grad_norm": 8.192802278177494, - "learning_rate": 8.458802351256853e-06, - "loss": 0.9139, + "epoch": 1.1369669584863034, + "grad_norm": 3.968659093978716, + "learning_rate": 1.426452304424618e-05, + "loss": 0.7222, "step": 8052 }, { - "epoch": 1.6886139651918641, - "grad_norm": 6.061250995956041, - "learning_rate": 8.45656555467446e-06, - "loss": 0.4878, + "epoch": 1.1371081615362892, + "grad_norm": 3.776310659022357, + "learning_rate": 1.4263144111677148e-05, + "loss": 0.6406, "step": 8053 }, { - "epoch": 1.6888236527573914, - "grad_norm": 6.245641532759829, - "learning_rate": 8.454328837195764e-06, - "loss": 0.6864, + "epoch": 1.1372493645862751, + "grad_norm": 3.690271177317174, + "learning_rate": 1.4261765080034487e-05, + "loss": 0.5826, "step": 8054 }, { - "epoch": 1.689033340322919, - "grad_norm": 9.770464898491316, - "learning_rate": 8.452092198935393e-06, - "loss": 1.0368, + "epoch": 1.137390567636261, + "grad_norm": 4.458457864471298, + "learning_rate": 1.4260385949350247e-05, + "loss": 0.7813, "step": 8055 }, { - "epoch": 1.6892430278884463, - "grad_norm": 7.477597614411314, - "learning_rate": 8.449855640007978e-06, - "loss": 0.6894, + "epoch": 1.137531770686247, + "grad_norm": 3.848854527383235, + "learning_rate": 1.4259006719656476e-05, + "loss": 0.6369, "step": 8056 }, { - "epoch": 1.6894527154539736, - "grad_norm": 7.503843832476244, - "learning_rate": 8.447619160528157e-06, - "loss": 0.6995, + "epoch": 1.1376729737362328, + "grad_norm": 3.3458802726988686, + "learning_rate": 1.4257627390985227e-05, + "loss": 0.5097, "step": 8057 }, { - "epoch": 1.689662403019501, - "grad_norm": 7.699534111686624, - "learning_rate": 8.445382760610543e-06, - "loss": 0.77, + "epoch": 1.1378141767862187, + "grad_norm": 3.630745009448528, + "learning_rate": 1.425624796336856e-05, + "loss": 0.707, "step": 8058 }, { - "epoch": 1.6898720905850282, - "grad_norm": 6.987653156604058, - "learning_rate": 8.443146440369759e-06, - "loss": 0.8355, + "epoch": 1.1379553798362045, + "grad_norm": 3.7527264943354566, + "learning_rate": 1.4254868436838525e-05, + "loss": 0.6749, "step": 8059 }, { - "epoch": 1.6900817781505557, - "grad_norm": 6.038029520529165, - "learning_rate": 8.440910199920421e-06, - "loss": 0.6061, + "epoch": 1.1380965828861904, + "grad_norm": 3.584071361580181, + "learning_rate": 1.4253488811427188e-05, + "loss": 0.6799, "step": 8060 }, { - "epoch": 1.690291465716083, - "grad_norm": 5.481871602360362, - "learning_rate": 8.438674039377137e-06, - "loss": 0.596, + "epoch": 1.1382377859361763, + "grad_norm": 3.543826670830915, + "learning_rate": 1.4252109087166605e-05, + "loss": 0.5211, "step": 8061 }, { - "epoch": 1.6905011532816103, - "grad_norm": 6.977803674676321, - "learning_rate": 8.436437958854517e-06, - "loss": 0.7972, + "epoch": 1.1383789889861622, + "grad_norm": 3.3088601323032742, + "learning_rate": 1.4250729264088845e-05, + "loss": 0.5936, "step": 8062 }, { - "epoch": 1.6907108408471379, - "grad_norm": 6.20600554704157, - "learning_rate": 8.434201958467166e-06, - "loss": 0.6285, + "epoch": 1.138520192036148, + "grad_norm": 3.407415926900606, + "learning_rate": 1.4249349342225971e-05, + "loss": 0.6009, "step": 8063 }, { - "epoch": 1.6909205284126652, - "grad_norm": 4.880448201168151, - "learning_rate": 8.43196603832968e-06, - "loss": 0.5071, + "epoch": 1.138661395086134, + "grad_norm": 3.528219698526353, + "learning_rate": 1.4247969321610055e-05, + "loss": 0.5294, "step": 8064 }, { - "epoch": 1.6911302159781925, - "grad_norm": 6.700645796645517, - "learning_rate": 8.429730198556651e-06, - "loss": 0.7009, + "epoch": 1.1388025981361198, + "grad_norm": 3.238406072603319, + "learning_rate": 1.4246589202273167e-05, + "loss": 0.5156, "step": 8065 }, { - "epoch": 1.69133990354372, - "grad_norm": 6.31705939585013, - "learning_rate": 8.427494439262678e-06, - "loss": 0.6697, + "epoch": 1.1389438011861057, + "grad_norm": 3.2240887520519483, + "learning_rate": 1.4245208984247378e-05, + "loss": 0.5919, "step": 8066 }, { - "epoch": 1.691549591109247, - "grad_norm": 6.072743926745923, - "learning_rate": 8.425258760562336e-06, - "loss": 0.7793, + "epoch": 1.1390850042360916, + "grad_norm": 3.569261097170348, + "learning_rate": 1.4243828667564767e-05, + "loss": 0.6271, "step": 8067 }, { - "epoch": 1.6917592786747746, - "grad_norm": 6.5431877333021955, - "learning_rate": 8.423023162570217e-06, - "loss": 0.8613, + "epoch": 1.1392262072860775, + "grad_norm": 3.442920369538943, + "learning_rate": 1.424244825225741e-05, + "loss": 0.6958, "step": 8068 }, { - "epoch": 1.691968966240302, - "grad_norm": 8.262423609252041, - "learning_rate": 8.420787645400898e-06, - "loss": 0.8091, + "epoch": 1.1393674103360634, + "grad_norm": 3.019791690585976, + "learning_rate": 1.4241067738357388e-05, + "loss": 0.5936, "step": 8069 }, { - "epoch": 1.6921786538058292, - "grad_norm": 6.918021527136952, - "learning_rate": 8.418552209168948e-06, - "loss": 0.9488, + "epoch": 1.1395086133860493, + "grad_norm": 3.995494347722344, + "learning_rate": 1.4239687125896783e-05, + "loss": 0.5098, "step": 8070 }, { - "epoch": 1.6923883413713567, - "grad_norm": 6.482965702566805, - "learning_rate": 8.41631685398894e-06, - "loss": 0.6523, + "epoch": 1.1396498164360351, + "grad_norm": 3.3361149793804152, + "learning_rate": 1.4238306414907678e-05, + "loss": 0.6189, "step": 8071 }, { - "epoch": 1.692598028936884, - "grad_norm": 7.941002900960024, - "learning_rate": 8.414081579975446e-06, - "loss": 0.6936, + "epoch": 1.139791019486021, + "grad_norm": 3.5075362882396846, + "learning_rate": 1.423692560542217e-05, + "loss": 0.5747, "step": 8072 }, { - "epoch": 1.6928077165024114, - "grad_norm": 6.19876960841199, - "learning_rate": 8.411846387243013e-06, - "loss": 0.6662, + "epoch": 1.139932222536007, + "grad_norm": 3.322814817587759, + "learning_rate": 1.4235544697472335e-05, + "loss": 0.5696, "step": 8073 }, { - "epoch": 1.6930174040679389, - "grad_norm": 7.233879723289626, - "learning_rate": 8.40961127590621e-06, - "loss": 0.7804, + "epoch": 1.1400734255859926, + "grad_norm": 4.3845571669174435, + "learning_rate": 1.4234163691090269e-05, + "loss": 0.6784, "step": 8074 }, { - "epoch": 1.6932270916334662, - "grad_norm": 6.347237426292081, - "learning_rate": 8.407376246079591e-06, - "loss": 0.6534, + "epoch": 1.1402146286359784, + "grad_norm": 3.307965748163738, + "learning_rate": 1.4232782586308069e-05, + "loss": 0.6278, "step": 8075 }, { - "epoch": 1.6934367791989935, - "grad_norm": 6.679985133310644, - "learning_rate": 8.405141297877698e-06, - "loss": 0.6696, + "epoch": 1.1403558316859643, + "grad_norm": 3.5317557617565427, + "learning_rate": 1.423140138315783e-05, + "loss": 0.5998, "step": 8076 }, { - "epoch": 1.693646466764521, - "grad_norm": 5.767368427162944, - "learning_rate": 8.40290643141508e-06, - "loss": 0.7408, + "epoch": 1.1404970347359502, + "grad_norm": 3.0436316704137756, + "learning_rate": 1.4230020081671651e-05, + "loss": 0.5646, "step": 8077 }, { - "epoch": 1.693856154330048, - "grad_norm": 5.945581495864111, - "learning_rate": 8.40067164680628e-06, - "loss": 0.61, + "epoch": 1.140638237785936, + "grad_norm": 3.7729434338464216, + "learning_rate": 1.4228638681881633e-05, + "loss": 0.6073, "step": 8078 }, { - "epoch": 1.6940658418955756, - "grad_norm": 6.993805198967216, - "learning_rate": 8.398436944165831e-06, - "loss": 0.6648, + "epoch": 1.140779440835922, + "grad_norm": 3.512487627709849, + "learning_rate": 1.4227257183819876e-05, + "loss": 0.6482, "step": 8079 }, { - "epoch": 1.694275529461103, - "grad_norm": 6.019221520052035, - "learning_rate": 8.396202323608265e-06, - "loss": 0.6475, + "epoch": 1.1409206438859079, + "grad_norm": 3.0292147493064947, + "learning_rate": 1.4225875587518485e-05, + "loss": 0.5401, "step": 8080 }, { - "epoch": 1.6944852170266302, - "grad_norm": 10.342347916573448, - "learning_rate": 8.393967785248114e-06, - "loss": 1.0226, + "epoch": 1.1410618469358937, + "grad_norm": 3.463318237144269, + "learning_rate": 1.4224493893009577e-05, + "loss": 0.5587, "step": 8081 }, { - "epoch": 1.6946949045921578, - "grad_norm": 7.331218085012214, - "learning_rate": 8.391733329199897e-06, - "loss": 0.8291, + "epoch": 1.1412030499858796, + "grad_norm": 4.09600474933207, + "learning_rate": 1.422311210032525e-05, + "loss": 0.5597, "step": 8082 }, { - "epoch": 1.694904592157685, - "grad_norm": 8.712958023710193, - "learning_rate": 8.389498955578135e-06, - "loss": 1.0205, + "epoch": 1.1413442530358655, + "grad_norm": 3.525488485618092, + "learning_rate": 1.4221730209497625e-05, + "loss": 0.5314, "step": 8083 }, { - "epoch": 1.6951142797232124, - "grad_norm": 8.40866670117006, - "learning_rate": 8.387264664497349e-06, - "loss": 0.9455, + "epoch": 1.1414854560858514, + "grad_norm": 2.6917574992551256, + "learning_rate": 1.422034822055881e-05, + "loss": 0.4601, "step": 8084 }, { - "epoch": 1.69532396728874, - "grad_norm": 6.971151785869194, - "learning_rate": 8.385030456072043e-06, - "loss": 0.7345, + "epoch": 1.1416266591358373, + "grad_norm": 3.11161014729673, + "learning_rate": 1.4218966133540928e-05, + "loss": 0.5073, "step": 8085 }, { - "epoch": 1.695533654854267, - "grad_norm": 6.580461537753359, - "learning_rate": 8.382796330416729e-06, - "loss": 0.6863, + "epoch": 1.1417678621858232, + "grad_norm": 2.9471924638202216, + "learning_rate": 1.4217583948476094e-05, + "loss": 0.4656, "step": 8086 }, { - "epoch": 1.6957433424197945, - "grad_norm": 5.130742848978635, - "learning_rate": 8.380562287645907e-06, - "loss": 0.5778, + "epoch": 1.141909065235809, + "grad_norm": 4.400216782181998, + "learning_rate": 1.4216201665396429e-05, + "loss": 0.5948, "step": 8087 }, { - "epoch": 1.6959530299853218, - "grad_norm": 5.535010613704116, - "learning_rate": 8.378328327874074e-06, - "loss": 0.4527, + "epoch": 1.142050268285795, + "grad_norm": 3.6545419479568952, + "learning_rate": 1.4214819284334058e-05, + "loss": 0.697, "step": 8088 }, { - "epoch": 1.6961627175508491, - "grad_norm": 6.695417726617908, - "learning_rate": 8.376094451215731e-06, - "loss": 0.6171, + "epoch": 1.1421914713357808, + "grad_norm": 3.3098133376144876, + "learning_rate": 1.421343680532111e-05, + "loss": 0.5342, "step": 8089 }, { - "epoch": 1.6963724051163767, - "grad_norm": 6.692135483844437, - "learning_rate": 8.373860657785363e-06, - "loss": 0.7069, + "epoch": 1.1423326743857667, + "grad_norm": 4.303964373610726, + "learning_rate": 1.4212054228389712e-05, + "loss": 0.6712, "step": 8090 }, { - "epoch": 1.696582092681904, - "grad_norm": 6.143929887689125, - "learning_rate": 8.371626947697457e-06, - "loss": 0.5504, + "epoch": 1.1424738774357526, + "grad_norm": 3.172856528258202, + "learning_rate": 1.4210671553571986e-05, + "loss": 0.5141, "step": 8091 }, { - "epoch": 1.6967917802474313, - "grad_norm": 5.952562922628196, - "learning_rate": 8.369393321066491e-06, - "loss": 0.5548, + "epoch": 1.1426150804857385, + "grad_norm": 3.897681278890643, + "learning_rate": 1.4209288780900074e-05, + "loss": 0.668, "step": 8092 }, { - "epoch": 1.6970014678129588, - "grad_norm": 9.527228428806245, - "learning_rate": 8.36715977800695e-06, - "loss": 0.9963, + "epoch": 1.1427562835357243, + "grad_norm": 3.382455034506384, + "learning_rate": 1.4207905910406113e-05, + "loss": 0.6563, "step": 8093 }, { - "epoch": 1.697211155378486, - "grad_norm": 5.945155094502981, - "learning_rate": 8.364926318633298e-06, - "loss": 0.6632, + "epoch": 1.1428974865857102, + "grad_norm": 3.560304051930665, + "learning_rate": 1.4206522942122232e-05, + "loss": 0.6969, "step": 8094 }, { - "epoch": 1.6974208429440134, - "grad_norm": 8.982557083600318, - "learning_rate": 8.36269294306001e-06, - "loss": 0.7392, + "epoch": 1.143038689635696, + "grad_norm": 3.479739387146171, + "learning_rate": 1.4205139876080574e-05, + "loss": 0.6168, "step": 8095 }, { - "epoch": 1.697630530509541, - "grad_norm": 7.58690850666072, - "learning_rate": 8.36045965140155e-06, - "loss": 0.7448, + "epoch": 1.143179892685682, + "grad_norm": 3.048107947315489, + "learning_rate": 1.4203756712313284e-05, + "loss": 0.5107, "step": 8096 }, { - "epoch": 1.697840218075068, - "grad_norm": 6.450839161702425, - "learning_rate": 8.358226443772376e-06, - "loss": 0.776, + "epoch": 1.1433210957356679, + "grad_norm": 3.771851907862194, + "learning_rate": 1.42023734508525e-05, + "loss": 0.6714, "step": 8097 }, { - "epoch": 1.6980499056405955, - "grad_norm": 8.272157740066207, - "learning_rate": 8.35599332028694e-06, - "loss": 0.7227, + "epoch": 1.1434622987856538, + "grad_norm": 2.7350997776684087, + "learning_rate": 1.4200990091730376e-05, + "loss": 0.4566, "step": 8098 }, { - "epoch": 1.6982595932061229, - "grad_norm": 8.34221366224279, - "learning_rate": 8.353760281059705e-06, - "loss": 0.8269, + "epoch": 1.1436035018356396, + "grad_norm": 4.271817291250578, + "learning_rate": 1.4199606634979054e-05, + "loss": 0.7686, "step": 8099 }, { - "epoch": 1.6984692807716502, - "grad_norm": 8.064196321986154, - "learning_rate": 8.351527326205104e-06, - "loss": 0.9194, + "epoch": 1.1437447048856255, + "grad_norm": 3.096084920790781, + "learning_rate": 1.4198223080630686e-05, + "loss": 0.5297, "step": 8100 }, { - "epoch": 1.6986789683371777, - "grad_norm": 7.122059137776797, - "learning_rate": 8.349294455837589e-06, - "loss": 0.6501, + "epoch": 1.1438859079356114, + "grad_norm": 3.4597248211942895, + "learning_rate": 1.4196839428717428e-05, + "loss": 0.5803, "step": 8101 }, { - "epoch": 1.698888655902705, - "grad_norm": 6.2678046415889, - "learning_rate": 8.347061670071598e-06, - "loss": 0.5761, + "epoch": 1.1440271109855973, + "grad_norm": 3.385262987980981, + "learning_rate": 1.4195455679271435e-05, + "loss": 0.5413, "step": 8102 }, { - "epoch": 1.6990983434682323, - "grad_norm": 8.477230873653662, - "learning_rate": 8.344828969021563e-06, - "loss": 0.7552, + "epoch": 1.1441683140355832, + "grad_norm": 3.745718931335684, + "learning_rate": 1.4194071832324863e-05, + "loss": 0.6381, "step": 8103 }, { - "epoch": 1.6993080310337598, - "grad_norm": 6.366475115603839, - "learning_rate": 8.34259635280191e-06, - "loss": 0.7131, + "epoch": 1.144309517085569, + "grad_norm": 4.609173837585445, + "learning_rate": 1.4192687887909875e-05, + "loss": 0.6194, "step": 8104 }, { - "epoch": 1.699517718599287, - "grad_norm": 6.732181579367404, - "learning_rate": 8.340363821527072e-06, - "loss": 0.7129, + "epoch": 1.144450720135555, + "grad_norm": 4.347929334789232, + "learning_rate": 1.4191303846058628e-05, + "loss": 0.6238, "step": 8105 }, { - "epoch": 1.6997274061648144, - "grad_norm": 7.086881562592512, - "learning_rate": 8.338131375311468e-06, - "loss": 0.7122, + "epoch": 1.1445919231855408, + "grad_norm": 3.722237735465618, + "learning_rate": 1.4189919706803292e-05, + "loss": 0.6349, "step": 8106 }, { - "epoch": 1.6999370937303417, - "grad_norm": 8.21091843624606, - "learning_rate": 8.335899014269512e-06, - "loss": 0.7338, + "epoch": 1.1447331262355267, + "grad_norm": 3.390300131956305, + "learning_rate": 1.4188535470176033e-05, + "loss": 0.6367, "step": 8107 }, { - "epoch": 1.700146781295869, - "grad_norm": 6.414818131162577, - "learning_rate": 8.333666738515614e-06, - "loss": 0.5577, + "epoch": 1.1448743292855126, + "grad_norm": 3.584121022673777, + "learning_rate": 1.4187151136209015e-05, + "loss": 0.622, "step": 8108 }, { - "epoch": 1.7003564688613966, - "grad_norm": 8.218206128148912, - "learning_rate": 8.331434548164193e-06, - "loss": 0.8518, + "epoch": 1.1450155323354985, + "grad_norm": 4.256336235160171, + "learning_rate": 1.4185766704934411e-05, + "loss": 0.6814, "step": 8109 }, { - "epoch": 1.7005661564269239, - "grad_norm": 7.091346840563901, - "learning_rate": 8.32920244332964e-06, - "loss": 0.7208, + "epoch": 1.1451567353854843, + "grad_norm": 3.4519615154053307, + "learning_rate": 1.4184382176384403e-05, + "loss": 0.5797, "step": 8110 }, { - "epoch": 1.7007758439924512, - "grad_norm": 7.456917798361744, - "learning_rate": 8.32697042412636e-06, - "loss": 0.7951, + "epoch": 1.1452979384354702, + "grad_norm": 4.425484809236835, + "learning_rate": 1.4182997550591155e-05, + "loss": 0.6675, "step": 8111 }, { - "epoch": 1.7009855315579787, - "grad_norm": 7.007658690685822, - "learning_rate": 8.324738490668752e-06, - "loss": 0.9643, + "epoch": 1.1454391414854561, + "grad_norm": 4.1076512916247605, + "learning_rate": 1.4181612827586854e-05, + "loss": 0.6802, "step": 8112 }, { - "epoch": 1.701195219123506, - "grad_norm": 8.779258400787134, - "learning_rate": 8.322506643071198e-06, - "loss": 0.9878, + "epoch": 1.145580344535442, + "grad_norm": 3.50289977568371, + "learning_rate": 1.4180228007403676e-05, + "loss": 0.5075, "step": 8113 }, { - "epoch": 1.7014049066890333, - "grad_norm": 6.19855312425762, - "learning_rate": 8.320274881448087e-06, - "loss": 0.6184, + "epoch": 1.1457215475854279, + "grad_norm": 4.148552016269906, + "learning_rate": 1.4178843090073802e-05, + "loss": 0.6524, "step": 8114 }, { - "epoch": 1.7016145942545609, - "grad_norm": 8.37459822414262, - "learning_rate": 8.318043205913804e-06, - "loss": 0.933, + "epoch": 1.1458627506354138, + "grad_norm": 3.377993296146897, + "learning_rate": 1.4177458075629422e-05, + "loss": 0.5914, "step": 8115 }, { - "epoch": 1.701824281820088, - "grad_norm": 7.215723416654507, - "learning_rate": 8.315811616582724e-06, - "loss": 0.6966, + "epoch": 1.1460039536853996, + "grad_norm": 3.8656728075330564, + "learning_rate": 1.4176072964102719e-05, + "loss": 0.7054, "step": 8116 }, { - "epoch": 1.7020339693856155, - "grad_norm": 8.40425324456381, - "learning_rate": 8.313580113569217e-06, - "loss": 1.0779, + "epoch": 1.1461451567353855, + "grad_norm": 3.0973466588485508, + "learning_rate": 1.4174687755525882e-05, + "loss": 0.5151, "step": 8117 }, { - "epoch": 1.7022436569511428, - "grad_norm": 7.154046177409504, - "learning_rate": 8.311348696987658e-06, - "loss": 0.7226, + "epoch": 1.1462863597853714, + "grad_norm": 4.913310481951282, + "learning_rate": 1.4173302449931107e-05, + "loss": 0.8259, "step": 8118 }, { - "epoch": 1.70245334451667, - "grad_norm": 6.621859149944271, - "learning_rate": 8.309117366952402e-06, - "loss": 0.7242, + "epoch": 1.1464275628353573, + "grad_norm": 3.3256874468735, + "learning_rate": 1.4171917047350586e-05, + "loss": 0.5903, "step": 8119 }, { - "epoch": 1.7026630320821976, - "grad_norm": 6.535446153009052, - "learning_rate": 8.306886123577818e-06, - "loss": 0.8294, + "epoch": 1.1465687658853432, + "grad_norm": 4.173796831917979, + "learning_rate": 1.4170531547816513e-05, + "loss": 0.7197, "step": 8120 }, { - "epoch": 1.702872719647725, - "grad_norm": 7.997105941339061, - "learning_rate": 8.304654966978257e-06, - "loss": 0.8766, + "epoch": 1.146709968935329, + "grad_norm": 2.7219784884728053, + "learning_rate": 1.416914595136109e-05, + "loss": 0.4772, "step": 8121 }, { - "epoch": 1.7030824072132522, - "grad_norm": 9.554788797922344, - "learning_rate": 8.302423897268067e-06, - "loss": 1.1145, + "epoch": 1.146851171985315, + "grad_norm": 3.5822018814537167, + "learning_rate": 1.4167760258016514e-05, + "loss": 0.7387, "step": 8122 }, { - "epoch": 1.7032920947787797, - "grad_norm": 6.7664162292072145, - "learning_rate": 8.300192914561596e-06, - "loss": 0.7203, + "epoch": 1.1469923750353008, + "grad_norm": 3.4049410457458102, + "learning_rate": 1.416637446781499e-05, + "loss": 0.5843, "step": 8123 }, { - "epoch": 1.7035017823443068, - "grad_norm": 6.436267864927759, - "learning_rate": 8.297962018973191e-06, - "loss": 0.7449, + "epoch": 1.1471335780852867, + "grad_norm": 3.497128157445856, + "learning_rate": 1.4164988580788723e-05, + "loss": 0.6104, "step": 8124 }, { - "epoch": 1.7037114699098344, - "grad_norm": 6.331257824372788, - "learning_rate": 8.295731210617179e-06, - "loss": 0.5993, + "epoch": 1.1472747811352726, + "grad_norm": 4.218975688142539, + "learning_rate": 1.4163602596969917e-05, + "loss": 0.6136, "step": 8125 }, { - "epoch": 1.7039211574753617, - "grad_norm": 5.517213105812385, - "learning_rate": 8.2935004896079e-06, - "loss": 0.6601, + "epoch": 1.1474159841852585, + "grad_norm": 3.3338549557085395, + "learning_rate": 1.4162216516390787e-05, + "loss": 0.6635, "step": 8126 }, { - "epoch": 1.704130845040889, - "grad_norm": 7.7416230803384405, - "learning_rate": 8.291269856059682e-06, - "loss": 0.777, + "epoch": 1.1475571872352444, + "grad_norm": 3.2847238133362837, + "learning_rate": 1.4160830339083544e-05, + "loss": 0.6439, "step": 8127 }, { - "epoch": 1.7043405326064165, - "grad_norm": 8.734991295828738, - "learning_rate": 8.289039310086849e-06, - "loss": 0.6993, + "epoch": 1.1476983902852302, + "grad_norm": 2.9964596840582503, + "learning_rate": 1.4159444065080398e-05, + "loss": 0.5001, "step": 8128 }, { - "epoch": 1.7045502201719438, - "grad_norm": 6.777897873825169, - "learning_rate": 8.286808851803713e-06, - "loss": 0.653, + "epoch": 1.1478395933352161, + "grad_norm": 3.9432677693612135, + "learning_rate": 1.4158057694413572e-05, + "loss": 0.6083, "step": 8129 }, { - "epoch": 1.704759907737471, - "grad_norm": 8.277010616846532, - "learning_rate": 8.284578481324602e-06, - "loss": 1.177, + "epoch": 1.147980796385202, + "grad_norm": 3.6007401494381934, + "learning_rate": 1.4156671227115278e-05, + "loss": 0.6174, "step": 8130 }, { - "epoch": 1.7049695953029986, - "grad_norm": 7.592793693392803, - "learning_rate": 8.282348198763813e-06, - "loss": 0.8078, + "epoch": 1.148121999435188, + "grad_norm": 2.6909012564739156, + "learning_rate": 1.415528466321774e-05, + "loss": 0.4757, "step": 8131 }, { - "epoch": 1.705179282868526, - "grad_norm": 6.966209191661069, - "learning_rate": 8.280118004235661e-06, - "loss": 0.6955, + "epoch": 1.1482632024851738, + "grad_norm": 3.640285119891178, + "learning_rate": 1.4153898002753184e-05, + "loss": 0.661, "step": 8132 }, { - "epoch": 1.7053889704340532, - "grad_norm": 7.5989210912196254, - "learning_rate": 8.277887897854445e-06, - "loss": 0.9461, + "epoch": 1.1484044055351597, + "grad_norm": 3.620044653348565, + "learning_rate": 1.4152511245753828e-05, + "loss": 0.6509, "step": 8133 }, { - "epoch": 1.7055986579995808, - "grad_norm": 7.206408451535142, - "learning_rate": 8.27565787973446e-06, - "loss": 0.624, + "epoch": 1.1485456085851453, + "grad_norm": 2.8746923393774253, + "learning_rate": 1.4151124392251905e-05, + "loss": 0.4544, "step": 8134 }, { - "epoch": 1.7058083455651079, - "grad_norm": 7.383036104731705, - "learning_rate": 8.273427949989997e-06, - "loss": 0.7754, + "epoch": 1.1486868116351312, + "grad_norm": 3.004189653586562, + "learning_rate": 1.4149737442279645e-05, + "loss": 0.5559, "step": 8135 }, { - "epoch": 1.7060180331306354, - "grad_norm": 6.371152439583857, - "learning_rate": 8.27119810873535e-06, - "loss": 0.5685, + "epoch": 1.148828014685117, + "grad_norm": 3.821614570217079, + "learning_rate": 1.4148350395869279e-05, + "loss": 0.5988, "step": 8136 }, { - "epoch": 1.7062277206961627, - "grad_norm": 6.657877386757704, - "learning_rate": 8.268968356084796e-06, - "loss": 0.6465, + "epoch": 1.148969217735103, + "grad_norm": 3.790070055383577, + "learning_rate": 1.414696325305304e-05, + "loss": 0.65, "step": 8137 }, { - "epoch": 1.70643740826169, - "grad_norm": 8.939307188093435, - "learning_rate": 8.266738692152616e-06, - "loss": 0.9293, + "epoch": 1.1491104207850888, + "grad_norm": 3.0046950491815836, + "learning_rate": 1.414557601386317e-05, + "loss": 0.4735, "step": 8138 }, { - "epoch": 1.7066470958272175, - "grad_norm": 6.303317512184404, - "learning_rate": 8.264509117053087e-06, - "loss": 0.5717, + "epoch": 1.1492516238350747, + "grad_norm": 3.369014208563364, + "learning_rate": 1.41441886783319e-05, + "loss": 0.6184, "step": 8139 }, { - "epoch": 1.7068567833927448, - "grad_norm": 7.035454710502382, - "learning_rate": 8.262279630900471e-06, - "loss": 0.9377, + "epoch": 1.1493928268850606, + "grad_norm": 4.459355891006322, + "learning_rate": 1.4142801246491476e-05, + "loss": 0.7221, "step": 8140 }, { - "epoch": 1.7070664709582721, - "grad_norm": 6.9056178319841, - "learning_rate": 8.260050233809041e-06, - "loss": 0.7148, + "epoch": 1.1495340299350465, + "grad_norm": 3.775783415908075, + "learning_rate": 1.414141371837414e-05, + "loss": 0.6911, "step": 8141 }, { - "epoch": 1.7072761585237997, - "grad_norm": 6.948421428376395, - "learning_rate": 8.257820925893057e-06, - "loss": 0.8333, + "epoch": 1.1496752329850324, + "grad_norm": 3.507550696299888, + "learning_rate": 1.4140026094012136e-05, + "loss": 0.6513, "step": 8142 }, { - "epoch": 1.7074858460893267, - "grad_norm": 9.419293130135118, - "learning_rate": 8.255591707266768e-06, - "loss": 0.9382, + "epoch": 1.1498164360350183, + "grad_norm": 4.029857857277068, + "learning_rate": 1.4138638373437713e-05, + "loss": 0.7478, "step": 8143 }, { - "epoch": 1.7076955336548543, - "grad_norm": 7.973645708642495, - "learning_rate": 8.25336257804443e-06, - "loss": 0.8281, + "epoch": 1.1499576390850041, + "grad_norm": 3.132778643999065, + "learning_rate": 1.4137250556683129e-05, + "loss": 0.5326, "step": 8144 }, { - "epoch": 1.7079052212203818, - "grad_norm": 7.243946153051919, - "learning_rate": 8.251133538340292e-06, - "loss": 0.8125, + "epoch": 1.15009884213499, + "grad_norm": 4.346210884243398, + "learning_rate": 1.413586264378062e-05, + "loss": 0.7506, "step": 8145 }, { - "epoch": 1.7081149087859089, - "grad_norm": 7.524324331670648, - "learning_rate": 8.248904588268589e-06, - "loss": 1.016, + "epoch": 1.150240045184976, + "grad_norm": 4.50166634545284, + "learning_rate": 1.4134474634762454e-05, + "loss": 0.7582, "step": 8146 }, { - "epoch": 1.7083245963514364, - "grad_norm": 7.6843625250358665, - "learning_rate": 8.246675727943566e-06, - "loss": 0.9363, + "epoch": 1.1503812482349618, + "grad_norm": 4.356499918691893, + "learning_rate": 1.4133086529660881e-05, + "loss": 0.6666, "step": 8147 }, { - "epoch": 1.7085342839169637, - "grad_norm": 7.174605121851173, - "learning_rate": 8.244446957479453e-06, - "loss": 0.6751, + "epoch": 1.1505224512849477, + "grad_norm": 3.579998590848396, + "learning_rate": 1.4131698328508163e-05, + "loss": 0.6416, "step": 8148 }, { - "epoch": 1.708743971482491, - "grad_norm": 5.679304368425895, - "learning_rate": 8.242218276990477e-06, - "loss": 0.5927, + "epoch": 1.1506636543349336, + "grad_norm": 2.810639898316791, + "learning_rate": 1.413031003133656e-05, + "loss": 0.4253, "step": 8149 }, { - "epoch": 1.7089536590480185, - "grad_norm": 6.803803237682092, - "learning_rate": 8.23998968659086e-06, - "loss": 0.6211, + "epoch": 1.1508048573849194, + "grad_norm": 2.8006785903372, + "learning_rate": 1.4128921638178336e-05, + "loss": 0.5341, "step": 8150 }, { - "epoch": 1.7091633466135459, - "grad_norm": 5.493763154146191, - "learning_rate": 8.237761186394832e-06, - "loss": 0.6388, + "epoch": 1.1509460604349053, + "grad_norm": 3.2209723933840624, + "learning_rate": 1.4127533149065756e-05, + "loss": 0.5517, "step": 8151 }, { - "epoch": 1.7093730341790732, - "grad_norm": 8.32410857997258, - "learning_rate": 8.235532776516594e-06, - "loss": 0.8094, + "epoch": 1.1510872634848912, + "grad_norm": 3.3408849974865054, + "learning_rate": 1.4126144564031085e-05, + "loss": 0.5214, "step": 8152 }, { - "epoch": 1.7095827217446007, - "grad_norm": 5.994065547148515, - "learning_rate": 8.233304457070363e-06, - "loss": 0.7588, + "epoch": 1.151228466534877, + "grad_norm": 3.2681881536602817, + "learning_rate": 1.4124755883106602e-05, + "loss": 0.5476, "step": 8153 }, { - "epoch": 1.7097924093101278, - "grad_norm": 6.12817375365778, - "learning_rate": 8.231076228170343e-06, - "loss": 0.4896, + "epoch": 1.151369669584863, + "grad_norm": 3.6744226824858064, + "learning_rate": 1.412336710632457e-05, + "loss": 0.5762, "step": 8154 }, { - "epoch": 1.7100020968756553, - "grad_norm": 6.115681697380893, - "learning_rate": 8.228848089930734e-06, - "loss": 0.527, + "epoch": 1.1515108726348489, + "grad_norm": 2.893841612491844, + "learning_rate": 1.4121978233717269e-05, + "loss": 0.4545, "step": 8155 }, { - "epoch": 1.7102117844411826, - "grad_norm": 7.440460060068651, - "learning_rate": 8.22662004246573e-06, - "loss": 0.7173, + "epoch": 1.1516520756848347, + "grad_norm": 3.4228677521409887, + "learning_rate": 1.4120589265316974e-05, + "loss": 0.5936, "step": 8156 }, { - "epoch": 1.71042147200671, - "grad_norm": 6.8054841025024615, - "learning_rate": 8.22439208588953e-06, - "loss": 0.8208, + "epoch": 1.1517932787348206, + "grad_norm": 3.1816121874173215, + "learning_rate": 1.4119200201155963e-05, + "loss": 0.6313, "step": 8157 }, { - "epoch": 1.7106311595722374, - "grad_norm": 7.56088015674912, - "learning_rate": 8.222164220316311e-06, - "loss": 0.7341, + "epoch": 1.1519344817848065, + "grad_norm": 3.4269762736471745, + "learning_rate": 1.4117811041266518e-05, + "loss": 0.6248, "step": 8158 }, { - "epoch": 1.7108408471377647, - "grad_norm": 8.725436202836754, - "learning_rate": 8.21993644586026e-06, - "loss": 1.0063, + "epoch": 1.1520756848347924, + "grad_norm": 3.7703587482541274, + "learning_rate": 1.4116421785680923e-05, + "loss": 0.5943, "step": 8159 }, { - "epoch": 1.711050534703292, - "grad_norm": 6.133455701981651, - "learning_rate": 8.217708762635556e-06, - "loss": 0.5859, + "epoch": 1.1522168878847783, + "grad_norm": 3.1524325754643026, + "learning_rate": 1.4115032434431461e-05, + "loss": 0.6141, "step": 8160 }, { - "epoch": 1.7112602222688196, - "grad_norm": 7.3899053133477, - "learning_rate": 8.215481170756366e-06, - "loss": 0.896, + "epoch": 1.1523580909347642, + "grad_norm": 3.5406119279976993, + "learning_rate": 1.411364298755043e-05, + "loss": 0.6164, "step": 8161 }, { - "epoch": 1.7114699098343467, - "grad_norm": 6.244954221894262, - "learning_rate": 8.21325367033686e-06, - "loss": 0.6577, + "epoch": 1.15249929398475, + "grad_norm": 3.8682901272025343, + "learning_rate": 1.4112253445070103e-05, + "loss": 0.6345, "step": 8162 }, { - "epoch": 1.7116795973998742, - "grad_norm": 5.8680274880511245, - "learning_rate": 8.211026261491206e-06, - "loss": 0.7622, + "epoch": 1.152640497034736, + "grad_norm": 3.16344294373622, + "learning_rate": 1.4110863807022788e-05, + "loss": 0.5797, "step": 8163 }, { - "epoch": 1.7118892849654017, - "grad_norm": 6.138147885366866, - "learning_rate": 8.208798944333556e-06, - "loss": 0.6731, + "epoch": 1.1527817000847218, + "grad_norm": 3.119799345128174, + "learning_rate": 1.410947407344077e-05, + "loss": 0.5673, "step": 8164 }, { - "epoch": 1.7120989725309288, - "grad_norm": 6.348901416135137, - "learning_rate": 8.206571718978069e-06, - "loss": 0.6636, + "epoch": 1.1529229031347077, + "grad_norm": 3.901952948524144, + "learning_rate": 1.4108084244356353e-05, + "loss": 0.7494, "step": 8165 }, { - "epoch": 1.7123086600964563, - "grad_norm": 7.284893736187619, - "learning_rate": 8.204344585538888e-06, - "loss": 0.8092, + "epoch": 1.1530641061846936, + "grad_norm": 2.698194817560956, + "learning_rate": 1.410669431980183e-05, + "loss": 0.5093, "step": 8166 }, { - "epoch": 1.7125183476619836, - "grad_norm": 6.449792419083231, - "learning_rate": 8.202117544130168e-06, - "loss": 0.6986, + "epoch": 1.1532053092346795, + "grad_norm": 3.8951359228476368, + "learning_rate": 1.4105304299809505e-05, + "loss": 0.6469, "step": 8167 }, { - "epoch": 1.712728035227511, - "grad_norm": 6.627422944022606, - "learning_rate": 8.199890594866038e-06, - "loss": 0.7195, + "epoch": 1.1533465122846653, + "grad_norm": 3.6001222701312257, + "learning_rate": 1.410391418441168e-05, + "loss": 0.6413, "step": 8168 }, { - "epoch": 1.7129377227930385, - "grad_norm": 7.449214401703932, - "learning_rate": 8.197663737860638e-06, - "loss": 0.9217, + "epoch": 1.1534877153346512, + "grad_norm": 3.728120905409748, + "learning_rate": 1.4102523973640665e-05, + "loss": 0.5315, "step": 8169 }, { - "epoch": 1.7131474103585658, - "grad_norm": 7.430350347788243, - "learning_rate": 8.195436973228098e-06, - "loss": 0.8214, + "epoch": 1.153628918384637, + "grad_norm": 3.617784104953617, + "learning_rate": 1.4101133667528761e-05, + "loss": 0.6133, "step": 8170 }, { - "epoch": 1.713357097924093, - "grad_norm": 6.325582353141731, - "learning_rate": 8.193210301082539e-06, - "loss": 0.635, + "epoch": 1.153770121434623, + "grad_norm": 4.267306386602099, + "learning_rate": 1.4099743266108281e-05, + "loss": 0.6904, "step": 8171 }, { - "epoch": 1.7135667854896206, - "grad_norm": 8.111513429633575, - "learning_rate": 8.190983721538088e-06, - "loss": 0.7857, + "epoch": 1.1539113244846089, + "grad_norm": 4.192866002205385, + "learning_rate": 1.409835276941154e-05, + "loss": 0.6971, "step": 8172 }, { - "epoch": 1.7137764730551477, - "grad_norm": 7.221307433607994, - "learning_rate": 8.188757234708863e-06, - "loss": 0.655, + "epoch": 1.1540525275345948, + "grad_norm": 3.073096575355924, + "learning_rate": 1.4096962177470851e-05, + "loss": 0.4921, "step": 8173 }, { - "epoch": 1.7139861606206752, - "grad_norm": 6.304837902808786, - "learning_rate": 8.186530840708968e-06, - "loss": 0.6685, + "epoch": 1.1541937305845806, + "grad_norm": 3.331287449235198, + "learning_rate": 1.4095571490318532e-05, + "loss": 0.5664, "step": 8174 }, { - "epoch": 1.7141958481862025, - "grad_norm": 6.126215867899674, - "learning_rate": 8.184304539652512e-06, - "loss": 0.6496, + "epoch": 1.1543349336345665, + "grad_norm": 3.176471881744591, + "learning_rate": 1.4094180707986897e-05, + "loss": 0.5177, "step": 8175 }, { - "epoch": 1.7144055357517298, - "grad_norm": 7.586864382644645, - "learning_rate": 8.1820783316536e-06, - "loss": 0.8729, + "epoch": 1.1544761366845524, + "grad_norm": 3.2591693074042047, + "learning_rate": 1.4092789830508271e-05, + "loss": 0.5757, "step": 8176 }, { - "epoch": 1.7146152233172574, - "grad_norm": 5.807782788030051, - "learning_rate": 8.179852216826321e-06, - "loss": 0.6608, + "epoch": 1.1546173397345383, + "grad_norm": 3.1566431196366382, + "learning_rate": 1.4091398857914978e-05, + "loss": 0.5607, "step": 8177 }, { - "epoch": 1.7148249108827847, - "grad_norm": 5.998664136034808, - "learning_rate": 8.177626195284776e-06, - "loss": 0.5823, + "epoch": 1.1547585427845242, + "grad_norm": 3.4869177774061355, + "learning_rate": 1.4090007790239344e-05, + "loss": 0.556, "step": 8178 }, { - "epoch": 1.715034598448312, - "grad_norm": 8.617672837822257, - "learning_rate": 8.175400267143053e-06, - "loss": 0.9128, + "epoch": 1.15489974583451, + "grad_norm": 3.852770474018618, + "learning_rate": 1.4088616627513691e-05, + "loss": 0.6287, "step": 8179 }, { - "epoch": 1.7152442860138395, - "grad_norm": 6.285258396491024, - "learning_rate": 8.173174432515227e-06, - "loss": 0.6837, + "epoch": 1.155040948884496, + "grad_norm": 3.909684332050486, + "learning_rate": 1.4087225369770356e-05, + "loss": 0.5752, "step": 8180 }, { - "epoch": 1.7154539735793668, - "grad_norm": 7.085117029459445, - "learning_rate": 8.170948691515379e-06, - "loss": 0.6301, + "epoch": 1.1551821519344818, + "grad_norm": 3.2785300018096626, + "learning_rate": 1.4085834017041669e-05, + "loss": 0.5948, "step": 8181 }, { - "epoch": 1.715663661144894, - "grad_norm": 8.861707047417154, - "learning_rate": 8.168723044257587e-06, - "loss": 0.8713, + "epoch": 1.1553233549844677, + "grad_norm": 3.6689428787647045, + "learning_rate": 1.4084442569359964e-05, + "loss": 0.7646, "step": 8182 }, { - "epoch": 1.7158733487104216, - "grad_norm": 10.404968662891305, - "learning_rate": 8.16649749085591e-06, - "loss": 1.0908, + "epoch": 1.1554645580344536, + "grad_norm": 3.5636362488466005, + "learning_rate": 1.4083051026757578e-05, + "loss": 0.6114, "step": 8183 }, { - "epoch": 1.7160830362759487, - "grad_norm": 7.802812799100203, - "learning_rate": 8.164272031424418e-06, - "loss": 0.7555, + "epoch": 1.1556057610844395, + "grad_norm": 4.159618274870332, + "learning_rate": 1.4081659389266846e-05, + "loss": 0.751, "step": 8184 }, { - "epoch": 1.7162927238414762, - "grad_norm": 5.549380862511597, - "learning_rate": 8.162046666077172e-06, - "loss": 0.6243, + "epoch": 1.1557469641344253, + "grad_norm": 3.234703171706695, + "learning_rate": 1.4080267656920112e-05, + "loss": 0.5834, "step": 8185 }, { - "epoch": 1.7165024114070035, - "grad_norm": 6.895903588544072, - "learning_rate": 8.15982139492822e-06, - "loss": 0.6109, + "epoch": 1.1558881671844112, + "grad_norm": 3.407195289164041, + "learning_rate": 1.4078875829749724e-05, + "loss": 0.6389, "step": 8186 }, { - "epoch": 1.7167120989725309, - "grad_norm": 6.02676419301265, - "learning_rate": 8.157596218091611e-06, - "loss": 0.6135, + "epoch": 1.1560293702343971, + "grad_norm": 3.4099677016712397, + "learning_rate": 1.4077483907788026e-05, + "loss": 0.6008, "step": 8187 }, { - "epoch": 1.7169217865380584, - "grad_norm": 6.151604225126431, - "learning_rate": 8.155371135681394e-06, - "loss": 0.6399, + "epoch": 1.156170573284383, + "grad_norm": 3.6868236007994852, + "learning_rate": 1.4076091891067355e-05, + "loss": 0.6065, "step": 8188 }, { - "epoch": 1.7171314741035857, - "grad_norm": 8.367767517571766, - "learning_rate": 8.153146147811604e-06, - "loss": 0.8615, + "epoch": 1.1563117763343689, + "grad_norm": 3.6144039531676664, + "learning_rate": 1.4074699779620073e-05, + "loss": 0.7386, "step": 8189 }, { - "epoch": 1.717341161669113, - "grad_norm": 7.191671002030148, - "learning_rate": 8.150921254596279e-06, - "loss": 0.9028, + "epoch": 1.1564529793843548, + "grad_norm": 3.270569326735332, + "learning_rate": 1.4073307573478528e-05, + "loss": 0.5111, "step": 8190 }, { - "epoch": 1.7175508492346405, - "grad_norm": 8.010597007260875, - "learning_rate": 8.148696456149446e-06, - "loss": 0.825, + "epoch": 1.1565941824343406, + "grad_norm": 2.9213847594137974, + "learning_rate": 1.4071915272675074e-05, + "loss": 0.467, "step": 8191 }, { - "epoch": 1.7177605368001676, - "grad_norm": 6.678832605098482, - "learning_rate": 8.14647175258513e-06, - "loss": 0.5557, + "epoch": 1.1567353854843265, + "grad_norm": 2.842145383359047, + "learning_rate": 1.4070522877242068e-05, + "loss": 0.4333, "step": 8192 }, { - "epoch": 1.7179702243656951, - "grad_norm": 8.134515799250186, - "learning_rate": 8.144247144017349e-06, - "loss": 0.7169, + "epoch": 1.1568765885343124, + "grad_norm": 4.6565940521982725, + "learning_rate": 1.4069130387211867e-05, + "loss": 0.6103, "step": 8193 }, { - "epoch": 1.7181799119312224, - "grad_norm": 7.247201982357617, - "learning_rate": 8.142022630560125e-06, - "loss": 0.815, + "epoch": 1.1570177915842983, + "grad_norm": 4.012139964648908, + "learning_rate": 1.4067737802616832e-05, + "loss": 0.668, "step": 8194 }, { - "epoch": 1.7183895994967497, - "grad_norm": 6.682812000129259, - "learning_rate": 8.13979821232746e-06, - "loss": 0.6736, + "epoch": 1.1571589946342842, + "grad_norm": 3.6080963740655254, + "learning_rate": 1.406634512348933e-05, + "loss": 0.6923, "step": 8195 }, { - "epoch": 1.7185992870622773, - "grad_norm": 6.322571647763374, - "learning_rate": 8.137573889433366e-06, - "loss": 0.6438, + "epoch": 1.15730019768427, + "grad_norm": 3.202099071341995, + "learning_rate": 1.406495234986172e-05, + "loss": 0.5884, "step": 8196 }, { - "epoch": 1.7188089746278046, - "grad_norm": 7.62503480365893, - "learning_rate": 8.135349661991841e-06, - "loss": 0.7344, + "epoch": 1.157441400734256, + "grad_norm": 2.6115673601981837, + "learning_rate": 1.4063559481766375e-05, + "loss": 0.4009, "step": 8197 }, { - "epoch": 1.7190186621933319, - "grad_norm": 5.639005474413709, - "learning_rate": 8.133125530116875e-06, - "loss": 0.6475, + "epoch": 1.1575826037842418, + "grad_norm": 3.2237665247432212, + "learning_rate": 1.4062166519235665e-05, + "loss": 0.5402, "step": 8198 }, { - "epoch": 1.7192283497588594, - "grad_norm": 5.57458800454167, - "learning_rate": 8.130901493922467e-06, - "loss": 0.5328, + "epoch": 1.1577238068342277, + "grad_norm": 3.9092128260486354, + "learning_rate": 1.4060773462301955e-05, + "loss": 0.6783, "step": 8199 }, { - "epoch": 1.7194380373243867, - "grad_norm": 7.614181660668505, - "learning_rate": 8.1286775535226e-06, - "loss": 0.7696, + "epoch": 1.1578650098842136, + "grad_norm": 2.8350704980508246, + "learning_rate": 1.4059380310997626e-05, + "loss": 0.4654, "step": 8200 }, { - "epoch": 1.719647724889914, - "grad_norm": 8.524163589792032, - "learning_rate": 8.126453709031252e-06, - "loss": 0.8641, + "epoch": 1.1580062129341995, + "grad_norm": 3.6829213796043256, + "learning_rate": 1.4057987065355052e-05, + "loss": 0.7002, "step": 8201 }, { - "epoch": 1.7198574124554415, - "grad_norm": 7.369601194228133, - "learning_rate": 8.1242299605624e-06, - "loss": 0.9053, + "epoch": 1.1581474159841854, + "grad_norm": 3.571936527405481, + "learning_rate": 1.4056593725406608e-05, + "loss": 0.6275, "step": 8202 }, { - "epoch": 1.7200671000209686, - "grad_norm": 6.542954888542638, - "learning_rate": 8.12200630823002e-06, - "loss": 0.6714, + "epoch": 1.1582886190341712, + "grad_norm": 3.739708326509807, + "learning_rate": 1.4055200291184683e-05, + "loss": 0.5984, "step": 8203 }, { - "epoch": 1.7202767875864962, - "grad_norm": 7.10957047605065, - "learning_rate": 8.11978275214807e-06, - "loss": 0.6898, + "epoch": 1.1584298220841571, + "grad_norm": 3.850302344740976, + "learning_rate": 1.4053806762721652e-05, + "loss": 0.5563, "step": 8204 }, { - "epoch": 1.7204864751520235, - "grad_norm": 5.81061884923644, - "learning_rate": 8.117559292430516e-06, - "loss": 0.5302, + "epoch": 1.158571025134143, + "grad_norm": 3.518784388758248, + "learning_rate": 1.4052413140049898e-05, + "loss": 0.5231, "step": 8205 }, { - "epoch": 1.7206961627175508, - "grad_norm": 7.28781818100964, - "learning_rate": 8.115335929191315e-06, - "loss": 0.7449, + "epoch": 1.158712228184129, + "grad_norm": 3.607101718310263, + "learning_rate": 1.405101942320182e-05, + "loss": 0.6024, "step": 8206 }, { - "epoch": 1.7209058502830783, - "grad_norm": 5.916880409329518, - "learning_rate": 8.113112662544416e-06, - "loss": 0.5273, + "epoch": 1.1588534312341148, + "grad_norm": 3.7631804541130554, + "learning_rate": 1.4049625612209797e-05, + "loss": 0.5965, "step": 8207 }, { - "epoch": 1.7211155378486056, - "grad_norm": 5.7786705332591035, - "learning_rate": 8.110889492603761e-06, - "loss": 0.6454, + "epoch": 1.1589946342841007, + "grad_norm": 4.784680716999602, + "learning_rate": 1.4048231707106222e-05, + "loss": 0.7696, "step": 8208 }, { - "epoch": 1.721325225414133, - "grad_norm": 5.786587315184699, - "learning_rate": 8.108666419483304e-06, - "loss": 0.6075, + "epoch": 1.1591358373340865, + "grad_norm": 3.3307427819564515, + "learning_rate": 1.4046837707923493e-05, + "loss": 0.5908, "step": 8209 }, { - "epoch": 1.7215349129796604, - "grad_norm": 8.346565556440467, - "learning_rate": 8.106443443296968e-06, - "loss": 0.7603, + "epoch": 1.1592770403840722, + "grad_norm": 3.919756688641482, + "learning_rate": 1.4045443614694004e-05, + "loss": 0.6526, "step": 8210 }, { - "epoch": 1.7217446005451875, - "grad_norm": 9.860244573205568, - "learning_rate": 8.104220564158689e-06, - "loss": 0.832, + "epoch": 1.159418243434058, + "grad_norm": 3.298925983746374, + "learning_rate": 1.4044049427450146e-05, + "loss": 0.5117, "step": 8211 }, { - "epoch": 1.721954288110715, - "grad_norm": 7.480555248009403, - "learning_rate": 8.101997782182399e-06, - "loss": 0.7123, + "epoch": 1.159559446484044, + "grad_norm": 3.028725467685604, + "learning_rate": 1.4042655146224333e-05, + "loss": 0.5214, "step": 8212 }, { - "epoch": 1.7221639756762424, - "grad_norm": 7.180737630710306, - "learning_rate": 8.099775097482011e-06, - "loss": 0.8537, + "epoch": 1.1597006495340298, + "grad_norm": 3.685974020426489, + "learning_rate": 1.4041260771048958e-05, + "loss": 0.6354, "step": 8213 }, { - "epoch": 1.7223736632417697, - "grad_norm": 7.95985565892812, - "learning_rate": 8.097552510171443e-06, - "loss": 0.8298, + "epoch": 1.1598418525840157, + "grad_norm": 3.913447815072083, + "learning_rate": 1.4039866301956427e-05, + "loss": 0.7092, "step": 8214 }, { - "epoch": 1.7225833508072972, - "grad_norm": 7.272196253291843, - "learning_rate": 8.095330020364614e-06, - "loss": 0.7509, + "epoch": 1.1599830556340016, + "grad_norm": 2.926529419536438, + "learning_rate": 1.403847173897915e-05, + "loss": 0.5114, "step": 8215 }, { - "epoch": 1.7227930383728245, - "grad_norm": 5.468905308425477, - "learning_rate": 8.09310762817542e-06, - "loss": 0.425, + "epoch": 1.1601242586839875, + "grad_norm": 3.89838089151196, + "learning_rate": 1.4037077082149533e-05, + "loss": 0.638, "step": 8216 }, { - "epoch": 1.7230027259383518, - "grad_norm": 8.797484365560411, - "learning_rate": 8.090885333717769e-06, - "loss": 0.916, + "epoch": 1.1602654617339734, + "grad_norm": 3.505469362493737, + "learning_rate": 1.4035682331499987e-05, + "loss": 0.5861, "step": 8217 }, { - "epoch": 1.7232124135038793, - "grad_norm": 6.589387539124437, - "learning_rate": 8.088663137105559e-06, - "loss": 0.6779, + "epoch": 1.1604066647839593, + "grad_norm": 3.649312170653838, + "learning_rate": 1.4034287487062928e-05, + "loss": 0.5812, "step": 8218 }, { - "epoch": 1.7234221010694066, - "grad_norm": 9.934658094397228, - "learning_rate": 8.086441038452673e-06, - "loss": 1.254, + "epoch": 1.1605478678339451, + "grad_norm": 3.3418896701069323, + "learning_rate": 1.4032892548870769e-05, + "loss": 0.552, "step": 8219 }, { - "epoch": 1.723631788634934, - "grad_norm": 8.003808458455985, - "learning_rate": 8.084219037873005e-06, - "loss": 0.8663, + "epoch": 1.160689070883931, + "grad_norm": 4.645062352186488, + "learning_rate": 1.403149751695593e-05, + "loss": 0.8188, "step": 8220 }, { - "epoch": 1.7238414762004615, - "grad_norm": 6.405511111005282, - "learning_rate": 8.081997135480436e-06, - "loss": 0.7108, + "epoch": 1.160830273933917, + "grad_norm": 3.38680125481749, + "learning_rate": 1.4030102391350828e-05, + "loss": 0.5751, "step": 8221 }, { - "epoch": 1.7240511637659885, - "grad_norm": 6.5854792333656995, - "learning_rate": 8.079775331388839e-06, - "loss": 0.7394, + "epoch": 1.1609714769839028, + "grad_norm": 3.9927982301599596, + "learning_rate": 1.4028707172087885e-05, + "loss": 0.6813, "step": 8222 }, { - "epoch": 1.724260851331516, - "grad_norm": 6.735144110736056, - "learning_rate": 8.077553625712086e-06, - "loss": 0.7217, + "epoch": 1.1611126800338887, + "grad_norm": 3.4371551226942754, + "learning_rate": 1.402731185919953e-05, + "loss": 0.5199, "step": 8223 }, { - "epoch": 1.7244705388970434, - "grad_norm": 6.845757268998984, - "learning_rate": 8.075332018564046e-06, - "loss": 0.6625, + "epoch": 1.1612538830838746, + "grad_norm": 4.592460265089685, + "learning_rate": 1.402591645271819e-05, + "loss": 0.7916, "step": 8224 }, { - "epoch": 1.7246802264625707, - "grad_norm": 6.672039650844115, - "learning_rate": 8.073110510058573e-06, - "loss": 0.7037, + "epoch": 1.1613950861338604, + "grad_norm": 3.235586539467011, + "learning_rate": 1.4024520952676286e-05, + "loss": 0.6107, "step": 8225 }, { - "epoch": 1.7248899140280982, - "grad_norm": 6.856333196103953, - "learning_rate": 8.070889100309532e-06, - "loss": 0.7383, + "epoch": 1.1615362891838463, + "grad_norm": 3.4564133148555287, + "learning_rate": 1.4023125359106253e-05, + "loss": 0.5892, "step": 8226 }, { - "epoch": 1.7250996015936255, - "grad_norm": 9.615850524176137, - "learning_rate": 8.06866778943077e-06, - "loss": 1.1337, + "epoch": 1.1616774922338322, + "grad_norm": 4.743146425001028, + "learning_rate": 1.4021729672040527e-05, + "loss": 0.8198, "step": 8227 }, { - "epoch": 1.7253092891591528, - "grad_norm": 8.583647409102799, - "learning_rate": 8.066446577536137e-06, - "loss": 0.8598, + "epoch": 1.161818695283818, + "grad_norm": 3.562759063869971, + "learning_rate": 1.4020333891511536e-05, + "loss": 0.7455, "step": 8228 }, { - "epoch": 1.7255189767246804, - "grad_norm": 4.539921241678876, - "learning_rate": 8.064225464739465e-06, - "loss": 0.504, + "epoch": 1.161959898333804, + "grad_norm": 3.6012549564426606, + "learning_rate": 1.4018938017551727e-05, + "loss": 0.6397, "step": 8229 }, { - "epoch": 1.7257286642902074, - "grad_norm": 7.949320314025649, - "learning_rate": 8.062004451154597e-06, - "loss": 0.871, + "epoch": 1.1621011013837899, + "grad_norm": 3.571457410026541, + "learning_rate": 1.4017542050193532e-05, + "loss": 0.5837, "step": 8230 }, { - "epoch": 1.725938351855735, - "grad_norm": 6.1514283000213466, - "learning_rate": 8.059783536895365e-06, - "loss": 0.6817, + "epoch": 1.1622423044337757, + "grad_norm": 2.8428310801874965, + "learning_rate": 1.401614598946939e-05, + "loss": 0.4914, "step": 8231 }, { - "epoch": 1.7261480394212623, - "grad_norm": 6.3662705175084415, - "learning_rate": 8.057562722075589e-06, - "loss": 0.7264, + "epoch": 1.1623835074837616, + "grad_norm": 3.439728449933566, + "learning_rate": 1.4014749835411754e-05, + "loss": 0.5854, "step": 8232 }, { - "epoch": 1.7263577269867896, - "grad_norm": 7.4341833140923095, - "learning_rate": 8.055342006809092e-06, - "loss": 0.6921, + "epoch": 1.1625247105337475, + "grad_norm": 3.5435094927846196, + "learning_rate": 1.4013353588053064e-05, + "loss": 0.6096, "step": 8233 }, { - "epoch": 1.726567414552317, - "grad_norm": 7.01816945301242, - "learning_rate": 8.053121391209695e-06, - "loss": 0.8566, + "epoch": 1.1626659135837334, + "grad_norm": 3.411961448348514, + "learning_rate": 1.401195724742577e-05, + "loss": 0.6951, "step": 8234 }, { - "epoch": 1.7267771021178444, - "grad_norm": 5.755900544814434, - "learning_rate": 8.050900875391196e-06, - "loss": 0.5714, + "epoch": 1.1628071166337193, + "grad_norm": 3.6003960524102663, + "learning_rate": 1.4010560813562327e-05, + "loss": 0.558, "step": 8235 }, { - "epoch": 1.7269867896833717, - "grad_norm": 7.229493471604383, - "learning_rate": 8.048680459467413e-06, - "loss": 0.8644, + "epoch": 1.1629483196837052, + "grad_norm": 3.5515967227176346, + "learning_rate": 1.4009164286495179e-05, + "loss": 0.6125, "step": 8236 }, { - "epoch": 1.7271964772488992, - "grad_norm": 5.782282972841823, - "learning_rate": 8.046460143552141e-06, - "loss": 0.6117, + "epoch": 1.163089522733691, + "grad_norm": 3.096296461027036, + "learning_rate": 1.4007767666256783e-05, + "loss": 0.4314, "step": 8237 }, { - "epoch": 1.7274061648144265, - "grad_norm": 6.481540888584115, - "learning_rate": 8.044239927759172e-06, - "loss": 0.8677, + "epoch": 1.163230725783677, + "grad_norm": 3.350280970181078, + "learning_rate": 1.40063709528796e-05, + "loss": 0.5526, "step": 8238 }, { - "epoch": 1.7276158523799539, - "grad_norm": 7.404074673348606, - "learning_rate": 8.042019812202297e-06, - "loss": 0.7362, + "epoch": 1.1633719288336628, + "grad_norm": 3.634435620366391, + "learning_rate": 1.4004974146396082e-05, + "loss": 0.6088, "step": 8239 }, { - "epoch": 1.7278255399454814, - "grad_norm": 7.867787277197765, - "learning_rate": 8.03979979699531e-06, - "loss": 0.7477, + "epoch": 1.1635131318836487, + "grad_norm": 3.431110944127314, + "learning_rate": 1.40035772468387e-05, + "loss": 0.6168, "step": 8240 }, { - "epoch": 1.7280352275110085, - "grad_norm": 6.2950031051578845, - "learning_rate": 8.037579882251977e-06, - "loss": 0.6074, + "epoch": 1.1636543349336346, + "grad_norm": 5.358322257686108, + "learning_rate": 1.4002180254239907e-05, + "loss": 0.9033, "step": 8241 }, { - "epoch": 1.728244915076536, - "grad_norm": 5.675253449097033, - "learning_rate": 8.03536006808608e-06, - "loss": 0.4734, + "epoch": 1.1637955379836205, + "grad_norm": 3.253048633261453, + "learning_rate": 1.4000783168632178e-05, + "loss": 0.494, "step": 8242 }, { - "epoch": 1.7284546026420633, - "grad_norm": 6.166922317284259, - "learning_rate": 8.033140354611389e-06, - "loss": 0.7166, + "epoch": 1.1639367410336063, + "grad_norm": 3.3078880252937957, + "learning_rate": 1.3999385990047971e-05, + "loss": 0.6062, "step": 8243 }, { - "epoch": 1.7286642902075906, - "grad_norm": 7.680997787423135, - "learning_rate": 8.030920741941665e-06, - "loss": 0.8547, + "epoch": 1.1640779440835922, + "grad_norm": 3.1605970448116856, + "learning_rate": 1.3997988718519766e-05, + "loss": 0.581, "step": 8244 }, { - "epoch": 1.7288739777731181, - "grad_norm": 7.982062802826913, - "learning_rate": 8.028701230190664e-06, - "loss": 0.8488, + "epoch": 1.164219147133578, + "grad_norm": 4.040110025344713, + "learning_rate": 1.3996591354080024e-05, + "loss": 0.6096, "step": 8245 }, { - "epoch": 1.7290836653386454, - "grad_norm": 5.451241781681055, - "learning_rate": 8.02648181947215e-06, - "loss": 0.5516, + "epoch": 1.164360350183564, + "grad_norm": 3.2546788620734532, + "learning_rate": 1.399519389676123e-05, + "loss": 0.566, "step": 8246 }, { - "epoch": 1.7292933529041727, - "grad_norm": 7.202149454924948, - "learning_rate": 8.024262509899863e-06, - "loss": 0.7099, + "epoch": 1.1645015532335499, + "grad_norm": 3.2712374807478715, + "learning_rate": 1.3993796346595854e-05, + "loss": 0.5439, "step": 8247 }, { - "epoch": 1.7295030404697003, - "grad_norm": 6.2974388870488776, - "learning_rate": 8.02204330158755e-06, - "loss": 0.7021, + "epoch": 1.1646427562835358, + "grad_norm": 3.667470317953565, + "learning_rate": 1.3992398703616372e-05, + "loss": 0.6948, "step": 8248 }, { - "epoch": 1.7297127280352274, - "grad_norm": 7.5498901851334255, - "learning_rate": 8.019824194648949e-06, - "loss": 0.8218, + "epoch": 1.1647839593335216, + "grad_norm": 4.168812369375702, + "learning_rate": 1.3991000967855272e-05, + "loss": 0.7419, "step": 8249 }, { - "epoch": 1.7299224156007549, - "grad_norm": 6.581751741224917, - "learning_rate": 8.017605189197788e-06, - "loss": 0.7693, + "epoch": 1.1649251623835075, + "grad_norm": 3.884926643780467, + "learning_rate": 1.3989603139345037e-05, + "loss": 0.7391, "step": 8250 }, { - "epoch": 1.7301321031662822, - "grad_norm": 6.309746109822411, - "learning_rate": 8.015386285347803e-06, - "loss": 0.6963, + "epoch": 1.1650663654334934, + "grad_norm": 4.317894937824882, + "learning_rate": 1.3988205218118141e-05, + "loss": 0.692, "step": 8251 }, { - "epoch": 1.7303417907318095, - "grad_norm": 6.303736070129266, - "learning_rate": 8.013167483212714e-06, - "loss": 0.8515, + "epoch": 1.1652075684834793, + "grad_norm": 3.777476044770958, + "learning_rate": 1.3986807204207077e-05, + "loss": 0.6687, "step": 8252 }, { - "epoch": 1.730551478297337, - "grad_norm": 5.350309776995385, - "learning_rate": 8.010948782906237e-06, - "loss": 0.4598, + "epoch": 1.1653487715334652, + "grad_norm": 3.7104032857127898, + "learning_rate": 1.398540909764434e-05, + "loss": 0.6621, "step": 8253 }, { - "epoch": 1.7307611658628643, - "grad_norm": 6.022426848843186, - "learning_rate": 8.008730184542084e-06, - "loss": 0.6121, + "epoch": 1.165489974583451, + "grad_norm": 3.28607080658845, + "learning_rate": 1.3984010898462417e-05, + "loss": 0.4879, "step": 8254 }, { - "epoch": 1.7309708534283916, - "grad_norm": 7.191221966783318, - "learning_rate": 8.006511688233964e-06, - "loss": 0.5704, + "epoch": 1.165631177633437, + "grad_norm": 3.4411351596177093, + "learning_rate": 1.39826126066938e-05, + "loss": 0.5212, "step": 8255 }, { - "epoch": 1.7311805409939192, - "grad_norm": 5.224210817696967, - "learning_rate": 8.004293294095573e-06, - "loss": 0.5177, + "epoch": 1.1657723806834228, + "grad_norm": 3.66304061963536, + "learning_rate": 1.3981214222370985e-05, + "loss": 0.5571, "step": 8256 }, { - "epoch": 1.7313902285594465, - "grad_norm": 7.402630925206769, - "learning_rate": 8.002075002240616e-06, - "loss": 0.7629, + "epoch": 1.1659135837334087, + "grad_norm": 3.577863090664369, + "learning_rate": 1.3979815745526471e-05, + "loss": 0.6686, "step": 8257 }, { - "epoch": 1.7315999161249738, - "grad_norm": 5.454537964849812, - "learning_rate": 7.99985681278278e-06, - "loss": 0.4808, + "epoch": 1.1660547867833946, + "grad_norm": 3.503490475286253, + "learning_rate": 1.397841717619276e-05, + "loss": 0.5411, "step": 8258 }, { - "epoch": 1.7318096036905013, - "grad_norm": 7.15322158328722, - "learning_rate": 7.997638725835751e-06, - "loss": 0.8472, + "epoch": 1.1661959898333805, + "grad_norm": 3.67508426476766, + "learning_rate": 1.397701851440235e-05, + "loss": 0.7675, "step": 8259 }, { - "epoch": 1.7320192912560284, - "grad_norm": 7.728920583018578, - "learning_rate": 7.995420741513207e-06, - "loss": 0.8911, + "epoch": 1.1663371928833663, + "grad_norm": 3.153221490778555, + "learning_rate": 1.3975619760187746e-05, + "loss": 0.5166, "step": 8260 }, { - "epoch": 1.732228978821556, - "grad_norm": 5.789871582707629, - "learning_rate": 7.993202859928835e-06, - "loss": 0.6883, + "epoch": 1.1664783959333522, + "grad_norm": 3.8751180980940476, + "learning_rate": 1.3974220913581458e-05, + "loss": 0.574, "step": 8261 }, { - "epoch": 1.7324386663870832, - "grad_norm": 7.34303947750113, - "learning_rate": 7.99098508119629e-06, - "loss": 0.6896, + "epoch": 1.1666195989833381, + "grad_norm": 3.644641565064742, + "learning_rate": 1.3972821974615991e-05, + "loss": 0.6282, "step": 8262 }, { - "epoch": 1.7326483539526105, - "grad_norm": 6.469918724805049, - "learning_rate": 7.988767405429246e-06, - "loss": 0.6805, + "epoch": 1.166760802033324, + "grad_norm": 4.138287659921616, + "learning_rate": 1.3971422943323858e-05, + "loss": 0.7505, "step": 8263 }, { - "epoch": 1.732858041518138, - "grad_norm": 7.323054779387471, - "learning_rate": 7.986549832741361e-06, - "loss": 0.8674, + "epoch": 1.1669020050833099, + "grad_norm": 4.496561678500985, + "learning_rate": 1.3970023819737573e-05, + "loss": 0.9951, "step": 8264 }, { - "epoch": 1.7330677290836654, - "grad_norm": 6.990265006957256, - "learning_rate": 7.98433236324629e-06, - "loss": 0.8337, + "epoch": 1.1670432081332958, + "grad_norm": 3.35614206043222, + "learning_rate": 1.3968624603889644e-05, + "loss": 0.5212, "step": 8265 }, { - "epoch": 1.7332774166491927, - "grad_norm": 6.906869385986223, - "learning_rate": 7.982114997057677e-06, - "loss": 0.6747, + "epoch": 1.1671844111832816, + "grad_norm": 4.306881089586144, + "learning_rate": 1.3967225295812595e-05, + "loss": 0.7243, "step": 8266 }, { - "epoch": 1.7334871042147202, - "grad_norm": 6.795244900853352, - "learning_rate": 7.979897734289175e-06, - "loss": 0.6391, + "epoch": 1.1673256142332675, + "grad_norm": 3.765357929367898, + "learning_rate": 1.3965825895538947e-05, + "loss": 0.6868, "step": 8267 }, { - "epoch": 1.7336967917802473, - "grad_norm": 6.707399235163398, - "learning_rate": 7.977680575054417e-06, - "loss": 0.6898, + "epoch": 1.1674668172832534, + "grad_norm": 3.532371131285415, + "learning_rate": 1.3964426403101212e-05, + "loss": 0.6491, "step": 8268 }, { - "epoch": 1.7339064793457748, - "grad_norm": 6.471878022383615, - "learning_rate": 7.975463519467037e-06, - "loss": 0.7686, + "epoch": 1.1676080203332393, + "grad_norm": 4.1285447857942605, + "learning_rate": 1.3963026818531924e-05, + "loss": 0.7461, "step": 8269 }, { - "epoch": 1.7341161669113023, - "grad_norm": 7.104654816408026, - "learning_rate": 7.973246567640664e-06, - "loss": 0.7643, + "epoch": 1.167749223383225, + "grad_norm": 3.298625227096976, + "learning_rate": 1.3961627141863603e-05, + "loss": 0.5743, "step": 8270 }, { - "epoch": 1.7343258544768294, - "grad_norm": 6.38410107808737, - "learning_rate": 7.97102971968892e-06, - "loss": 0.7341, + "epoch": 1.1678904264332108, + "grad_norm": 3.01311193286848, + "learning_rate": 1.3960227373128779e-05, + "loss": 0.4528, "step": 8271 }, { - "epoch": 1.734535542042357, - "grad_norm": 7.444919822500842, - "learning_rate": 7.968812975725416e-06, - "loss": 0.7015, + "epoch": 1.1680316294831967, + "grad_norm": 3.8081752452121638, + "learning_rate": 1.3958827512359979e-05, + "loss": 0.6205, "step": 8272 }, { - "epoch": 1.7347452296078842, - "grad_norm": 5.637188393391445, - "learning_rate": 7.966596335863776e-06, - "loss": 0.5601, + "epoch": 1.1681728325331826, + "grad_norm": 3.642622664440386, + "learning_rate": 1.3957427559589738e-05, + "loss": 0.5857, "step": 8273 }, { - "epoch": 1.7349549171734115, - "grad_norm": 7.110329623634496, - "learning_rate": 7.964379800217598e-06, - "loss": 0.6021, + "epoch": 1.1683140355831685, + "grad_norm": 3.785830079389926, + "learning_rate": 1.395602751485059e-05, + "loss": 0.6389, "step": 8274 }, { - "epoch": 1.735164604738939, - "grad_norm": 6.581437988032651, - "learning_rate": 7.962163368900486e-06, - "loss": 0.7464, + "epoch": 1.1684552386331544, + "grad_norm": 3.307034370662792, + "learning_rate": 1.3954627378175072e-05, + "loss": 0.6378, "step": 8275 }, { - "epoch": 1.7353742923044664, - "grad_norm": 7.459196854556857, - "learning_rate": 7.959947042026038e-06, - "loss": 0.6217, + "epoch": 1.1685964416831403, + "grad_norm": 4.053214506615257, + "learning_rate": 1.3953227149595718e-05, + "loss": 0.6831, "step": 8276 }, { - "epoch": 1.7355839798699937, - "grad_norm": 9.355437499294768, - "learning_rate": 7.957730819707838e-06, - "loss": 0.9417, + "epoch": 1.1687376447331261, + "grad_norm": 2.9480271065795094, + "learning_rate": 1.3951826829145074e-05, + "loss": 0.5369, "step": 8277 }, { - "epoch": 1.7357936674355212, - "grad_norm": 6.2292941407762035, - "learning_rate": 7.955514702059477e-06, - "loss": 0.5518, + "epoch": 1.168878847783112, + "grad_norm": 3.833672446881275, + "learning_rate": 1.3950426416855685e-05, + "loss": 0.6331, "step": 8278 }, { - "epoch": 1.7360033550010483, - "grad_norm": 7.382120295776903, - "learning_rate": 7.953298689194538e-06, - "loss": 0.6794, + "epoch": 1.169020050833098, + "grad_norm": 2.958382600890173, + "learning_rate": 1.3949025912760088e-05, + "loss": 0.576, "step": 8279 }, { - "epoch": 1.7362130425665758, - "grad_norm": 6.56409708270257, - "learning_rate": 7.951082781226586e-06, - "loss": 0.5082, + "epoch": 1.1691612538830838, + "grad_norm": 2.9413391028612366, + "learning_rate": 1.3947625316890836e-05, + "loss": 0.5332, "step": 8280 }, { - "epoch": 1.7364227301321031, - "grad_norm": 7.84310053467573, - "learning_rate": 7.948866978269194e-06, - "loss": 0.8546, + "epoch": 1.1693024569330697, + "grad_norm": 3.174621601032077, + "learning_rate": 1.3946224629280476e-05, + "loss": 0.5838, "step": 8281 }, { - "epoch": 1.7366324176976304, - "grad_norm": 6.534705237406541, - "learning_rate": 7.946651280435935e-06, - "loss": 0.7206, + "epoch": 1.1694436599830556, + "grad_norm": 4.260416608202992, + "learning_rate": 1.3944823849961557e-05, + "loss": 0.7717, "step": 8282 }, { - "epoch": 1.736842105263158, - "grad_norm": 8.990765198321185, - "learning_rate": 7.944435687840352e-06, - "loss": 0.7661, + "epoch": 1.1695848630330414, + "grad_norm": 3.7520727617527476, + "learning_rate": 1.394342297896664e-05, + "loss": 0.6443, "step": 8283 }, { - "epoch": 1.7370517928286853, - "grad_norm": 8.321480733497037, - "learning_rate": 7.942220200596007e-06, - "loss": 0.7348, + "epoch": 1.1697260660830273, + "grad_norm": 3.222661245377341, + "learning_rate": 1.3942022016328276e-05, + "loss": 0.59, "step": 8284 }, { - "epoch": 1.7372614803942126, - "grad_norm": 7.884262947725674, - "learning_rate": 7.940004818816446e-06, - "loss": 0.8127, + "epoch": 1.1698672691330132, + "grad_norm": 3.9948122755584676, + "learning_rate": 1.3940620962079018e-05, + "loss": 0.6516, "step": 8285 }, { - "epoch": 1.73747116795974, - "grad_norm": 6.328351740248241, - "learning_rate": 7.937789542615212e-06, - "loss": 0.5466, + "epoch": 1.170008472182999, + "grad_norm": 3.6564796644704414, + "learning_rate": 1.3939219816251435e-05, + "loss": 0.6581, "step": 8286 }, { - "epoch": 1.7376808555252672, - "grad_norm": 7.3128175928868275, - "learning_rate": 7.935574372105837e-06, - "loss": 0.7033, + "epoch": 1.170149675232985, + "grad_norm": 4.228637208210019, + "learning_rate": 1.3937818578878081e-05, + "loss": 0.6694, "step": 8287 }, { - "epoch": 1.7378905430907947, - "grad_norm": 8.268222288544365, - "learning_rate": 7.933359307401857e-06, - "loss": 0.8022, + "epoch": 1.1702908782829708, + "grad_norm": 4.102501999733961, + "learning_rate": 1.3936417249991528e-05, + "loss": 0.6356, "step": 8288 }, { - "epoch": 1.7381002306563222, - "grad_norm": 7.459539807304485, - "learning_rate": 7.931144348616802e-06, - "loss": 0.7717, + "epoch": 1.1704320813329567, + "grad_norm": 3.095838545828639, + "learning_rate": 1.3935015829624337e-05, + "loss": 0.6505, "step": 8289 }, { - "epoch": 1.7383099182218493, - "grad_norm": 5.454302510356499, - "learning_rate": 7.928929495864182e-06, - "loss": 0.4903, + "epoch": 1.1705732843829426, + "grad_norm": 3.531918495720782, + "learning_rate": 1.3933614317809076e-05, + "loss": 0.63, "step": 8290 }, { - "epoch": 1.7385196057873769, - "grad_norm": 6.635050422197754, - "learning_rate": 7.926714749257517e-06, - "loss": 0.7015, + "epoch": 1.1707144874329285, + "grad_norm": 3.4244590728157265, + "learning_rate": 1.3932212714578316e-05, + "loss": 0.5943, "step": 8291 }, { - "epoch": 1.7387292933529042, - "grad_norm": 6.249368287288514, - "learning_rate": 7.924500108910324e-06, - "loss": 0.715, + "epoch": 1.1708556904829144, + "grad_norm": 3.5008219869742043, + "learning_rate": 1.3930811019964633e-05, + "loss": 0.5981, "step": 8292 }, { - "epoch": 1.7389389809184315, - "grad_norm": 6.337758586607306, - "learning_rate": 7.922285574936092e-06, - "loss": 0.6696, + "epoch": 1.1709968935329003, + "grad_norm": 3.8861870520847828, + "learning_rate": 1.39294092340006e-05, + "loss": 0.6434, "step": 8293 }, { - "epoch": 1.739148668483959, - "grad_norm": 7.783572610343767, - "learning_rate": 7.920071147448332e-06, - "loss": 0.9077, + "epoch": 1.1711380965828861, + "grad_norm": 4.202121696914486, + "learning_rate": 1.3928007356718792e-05, + "loss": 0.7005, "step": 8294 }, { - "epoch": 1.7393583560494863, - "grad_norm": 7.261223859785903, - "learning_rate": 7.917856826560534e-06, - "loss": 0.8187, + "epoch": 1.171279299632872, + "grad_norm": 3.3188395465168905, + "learning_rate": 1.3926605388151791e-05, + "loss": 0.55, "step": 8295 }, { - "epoch": 1.7395680436150136, - "grad_norm": 7.83582434330073, - "learning_rate": 7.915642612386184e-06, - "loss": 0.6169, + "epoch": 1.171420502682858, + "grad_norm": 4.284812885762325, + "learning_rate": 1.3925203328332173e-05, + "loss": 0.9057, "step": 8296 }, { - "epoch": 1.7397777311805411, - "grad_norm": 6.18310439796562, - "learning_rate": 7.913428505038761e-06, - "loss": 0.5463, + "epoch": 1.1715617057328438, + "grad_norm": 4.002515883312534, + "learning_rate": 1.3923801177292529e-05, + "loss": 0.5579, "step": 8297 }, { - "epoch": 1.7399874187460682, - "grad_norm": 8.091328395805233, - "learning_rate": 7.911214504631752e-06, - "loss": 0.896, + "epoch": 1.1717029087828297, + "grad_norm": 3.4002931228706403, + "learning_rate": 1.3922398935065437e-05, + "loss": 0.5218, "step": 8298 }, { - "epoch": 1.7401971063115957, - "grad_norm": 7.6694064551194066, - "learning_rate": 7.90900061127862e-06, - "loss": 0.7617, + "epoch": 1.1718441118328156, + "grad_norm": 3.264394441548996, + "learning_rate": 1.392099660168349e-05, + "loss": 0.4535, "step": 8299 }, { - "epoch": 1.740406793877123, - "grad_norm": 7.627220139854269, - "learning_rate": 7.906786825092832e-06, - "loss": 0.7196, + "epoch": 1.1719853148828014, + "grad_norm": 4.208880127254522, + "learning_rate": 1.3919594177179272e-05, + "loss": 0.659, "step": 8300 }, { - "epoch": 1.7406164814426504, - "grad_norm": 8.19599473420918, - "learning_rate": 7.904573146187854e-06, - "loss": 0.9337, + "epoch": 1.1721265179327873, + "grad_norm": 3.4911476254059566, + "learning_rate": 1.3918191661585384e-05, + "loss": 0.5719, "step": 8301 }, { - "epoch": 1.7408261690081779, - "grad_norm": 7.594000218138649, - "learning_rate": 7.902359574677133e-06, - "loss": 0.6259, + "epoch": 1.1722677209827732, + "grad_norm": 4.150622998617397, + "learning_rate": 1.3916789054934408e-05, + "loss": 0.7169, "step": 8302 }, { - "epoch": 1.7410358565737052, - "grad_norm": 6.750478596740729, - "learning_rate": 7.900146110674121e-06, - "loss": 0.7981, + "epoch": 1.172408924032759, + "grad_norm": 3.573390661528358, + "learning_rate": 1.3915386357258948e-05, + "loss": 0.6264, "step": 8303 }, { - "epoch": 1.7412455441392325, - "grad_norm": 5.659334536088653, - "learning_rate": 7.897932754292266e-06, - "loss": 0.5247, + "epoch": 1.172550127082745, + "grad_norm": 4.4049483944352215, + "learning_rate": 1.39139835685916e-05, + "loss": 0.7604, "step": 8304 }, { - "epoch": 1.74145523170476, - "grad_norm": 6.9937158451341475, - "learning_rate": 7.895719505645002e-06, - "loss": 0.9145, + "epoch": 1.1726913301327309, + "grad_norm": 3.2214145451369376, + "learning_rate": 1.3912580688964964e-05, + "loss": 0.4604, "step": 8305 }, { - "epoch": 1.7416649192702873, - "grad_norm": 7.731880666962982, - "learning_rate": 7.893506364845762e-06, - "loss": 0.755, + "epoch": 1.1728325331827167, + "grad_norm": 4.309509357272494, + "learning_rate": 1.3911177718411643e-05, + "loss": 0.6167, "step": 8306 }, { - "epoch": 1.7418746068358146, - "grad_norm": 7.491943245009911, - "learning_rate": 7.891293332007977e-06, - "loss": 0.7756, + "epoch": 1.1729737362327026, + "grad_norm": 4.826575652329133, + "learning_rate": 1.3909774656964242e-05, + "loss": 0.7904, "step": 8307 }, { - "epoch": 1.7420842944013422, - "grad_norm": 8.330455988579322, - "learning_rate": 7.889080407245063e-06, - "loss": 0.865, + "epoch": 1.1731149392826885, + "grad_norm": 3.7086228340918796, + "learning_rate": 1.3908371504655365e-05, + "loss": 0.6548, "step": 8308 }, { - "epoch": 1.7422939819668692, - "grad_norm": 5.994755713686167, - "learning_rate": 7.88686759067044e-06, - "loss": 0.6359, + "epoch": 1.1732561423326744, + "grad_norm": 3.7580826952285853, + "learning_rate": 1.3906968261517624e-05, + "loss": 0.5661, "step": 8309 }, { - "epoch": 1.7425036695323968, - "grad_norm": 6.9876751484953035, - "learning_rate": 7.88465488239752e-06, - "loss": 0.6817, + "epoch": 1.1733973453826603, + "grad_norm": 3.6180821137143724, + "learning_rate": 1.3905564927583625e-05, + "loss": 0.6274, "step": 8310 }, { - "epoch": 1.742713357097924, - "grad_norm": 6.912174817659617, - "learning_rate": 7.882442282539704e-06, - "loss": 0.5292, + "epoch": 1.1735385484326462, + "grad_norm": 3.415224934719934, + "learning_rate": 1.3904161502885985e-05, + "loss": 0.4702, "step": 8311 }, { - "epoch": 1.7429230446634514, - "grad_norm": 6.139903703367255, - "learning_rate": 7.880229791210392e-06, - "loss": 0.7273, + "epoch": 1.173679751482632, + "grad_norm": 3.8132477084220304, + "learning_rate": 1.3902757987457318e-05, + "loss": 0.5765, "step": 8312 }, { - "epoch": 1.743132732228979, - "grad_norm": 7.244508436198552, - "learning_rate": 7.878017408522987e-06, - "loss": 0.6925, + "epoch": 1.173820954532618, + "grad_norm": 3.26836043729517, + "learning_rate": 1.3901354381330241e-05, + "loss": 0.5859, "step": 8313 }, { - "epoch": 1.7433424197945062, - "grad_norm": 6.380479138070323, - "learning_rate": 7.875805134590862e-06, - "loss": 0.6611, + "epoch": 1.1739621575826038, + "grad_norm": 3.1226303203555013, + "learning_rate": 1.3899950684537371e-05, + "loss": 0.5853, "step": 8314 }, { - "epoch": 1.7435521073600335, - "grad_norm": 6.18512297497648, - "learning_rate": 7.873592969527412e-06, - "loss": 0.5637, + "epoch": 1.1741033606325897, + "grad_norm": 3.4058714289576724, + "learning_rate": 1.3898546897111334e-05, + "loss": 0.5389, "step": 8315 }, { - "epoch": 1.743761794925561, - "grad_norm": 9.061430051757958, - "learning_rate": 7.871380913446011e-06, - "loss": 0.8406, + "epoch": 1.1742445636825756, + "grad_norm": 3.378011961602447, + "learning_rate": 1.3897143019084749e-05, + "loss": 0.6296, "step": 8316 }, { - "epoch": 1.7439714824910881, - "grad_norm": 7.764680019996193, - "learning_rate": 7.869168966460029e-06, - "loss": 0.5375, + "epoch": 1.1743857667325615, + "grad_norm": 3.739935881232564, + "learning_rate": 1.3895739050490243e-05, + "loss": 0.6554, "step": 8317 }, { - "epoch": 1.7441811700566157, - "grad_norm": 6.566147811939327, - "learning_rate": 7.866957128682831e-06, - "loss": 0.6677, + "epoch": 1.1745269697825473, + "grad_norm": 4.143397982433403, + "learning_rate": 1.3894334991360448e-05, + "loss": 0.7503, "step": 8318 }, { - "epoch": 1.744390857622143, - "grad_norm": 5.717880569223168, - "learning_rate": 7.864745400227786e-06, - "loss": 0.5312, + "epoch": 1.1746681728325332, + "grad_norm": 3.466208838799215, + "learning_rate": 1.3892930841727986e-05, + "loss": 0.6546, "step": 8319 }, { - "epoch": 1.7446005451876703, - "grad_norm": 7.263312454491009, - "learning_rate": 7.862533781208236e-06, - "loss": 0.7405, + "epoch": 1.174809375882519, + "grad_norm": 3.7228977069159286, + "learning_rate": 1.3891526601625492e-05, + "loss": 0.6596, "step": 8320 }, { - "epoch": 1.7448102327531978, - "grad_norm": 8.001183117798696, - "learning_rate": 7.86032227173754e-06, - "loss": 0.7638, + "epoch": 1.174950578932505, + "grad_norm": 4.362461127909217, + "learning_rate": 1.3890122271085601e-05, + "loss": 0.7648, "step": 8321 }, { - "epoch": 1.745019920318725, - "grad_norm": 6.025111626451926, - "learning_rate": 7.858110871929039e-06, - "loss": 0.5532, + "epoch": 1.1750917819824909, + "grad_norm": 3.4653020473009013, + "learning_rate": 1.388871785014095e-05, + "loss": 0.4865, "step": 8322 }, { - "epoch": 1.7452296078842524, - "grad_norm": 6.59798226897668, - "learning_rate": 7.855899581896069e-06, - "loss": 0.711, + "epoch": 1.1752329850324768, + "grad_norm": 3.6148768352082623, + "learning_rate": 1.3887313338824174e-05, + "loss": 0.7391, "step": 8323 }, { - "epoch": 1.74543929544978, - "grad_norm": 6.733854705201149, - "learning_rate": 7.853688401751963e-06, - "loss": 0.6982, + "epoch": 1.1753741880824626, + "grad_norm": 3.398800860716693, + "learning_rate": 1.3885908737167918e-05, + "loss": 0.6479, "step": 8324 }, { - "epoch": 1.7456489830153072, - "grad_norm": 6.221975489853401, - "learning_rate": 7.851477331610053e-06, - "loss": 0.679, + "epoch": 1.1755153911324485, + "grad_norm": 3.4081488909869293, + "learning_rate": 1.3884504045204813e-05, + "loss": 0.6479, "step": 8325 }, { - "epoch": 1.7458586705808345, - "grad_norm": 6.039367972478635, - "learning_rate": 7.849266371583655e-06, - "loss": 0.6311, + "epoch": 1.1756565941824344, + "grad_norm": 3.756416271515069, + "learning_rate": 1.3883099262967521e-05, + "loss": 0.5735, "step": 8326 }, { - "epoch": 1.746068358146362, - "grad_norm": 6.471004059676718, - "learning_rate": 7.847055521786085e-06, - "loss": 0.6347, + "epoch": 1.1757977972324203, + "grad_norm": 4.596490944387628, + "learning_rate": 1.3881694390488675e-05, + "loss": 0.663, "step": 8327 }, { - "epoch": 1.7462780457118892, - "grad_norm": 7.30014313388475, - "learning_rate": 7.844844782330655e-06, - "loss": 0.7341, + "epoch": 1.1759390002824062, + "grad_norm": 3.884484923109837, + "learning_rate": 1.3880289427800925e-05, + "loss": 0.5288, "step": 8328 }, { - "epoch": 1.7464877332774167, - "grad_norm": 6.3888100699811226, - "learning_rate": 7.842634153330665e-06, - "loss": 0.8078, + "epoch": 1.176080203332392, + "grad_norm": 3.464032328949635, + "learning_rate": 1.387888437493693e-05, + "loss": 0.5348, "step": 8329 }, { - "epoch": 1.746697420842944, - "grad_norm": 6.62822689195477, - "learning_rate": 7.840423634899417e-06, - "loss": 0.6549, + "epoch": 1.176221406382378, + "grad_norm": 3.8228028977411976, + "learning_rate": 1.3877479231929333e-05, + "loss": 0.7019, "step": 8330 }, { - "epoch": 1.7469071084084713, - "grad_norm": 6.105384023106563, - "learning_rate": 7.838213227150206e-06, - "loss": 0.8198, + "epoch": 1.1763626094323638, + "grad_norm": 3.685550372350385, + "learning_rate": 1.3876073998810794e-05, + "loss": 0.6144, "step": 8331 }, { - "epoch": 1.7471167959739988, - "grad_norm": 6.462280147479494, - "learning_rate": 7.836002930196314e-06, - "loss": 0.6268, + "epoch": 1.1765038124823497, + "grad_norm": 3.001103637796497, + "learning_rate": 1.3874668675613972e-05, + "loss": 0.511, "step": 8332 }, { - "epoch": 1.7473264835395261, - "grad_norm": 6.213619649004164, - "learning_rate": 7.833792744151025e-06, - "loss": 0.7088, + "epoch": 1.1766450155323356, + "grad_norm": 3.1192372408704223, + "learning_rate": 1.3873263262371522e-05, + "loss": 0.5263, "step": 8333 }, { - "epoch": 1.7475361711050534, - "grad_norm": 7.678576966618167, - "learning_rate": 7.831582669127618e-06, - "loss": 0.7619, + "epoch": 1.1767862185823215, + "grad_norm": 4.0135242614056, + "learning_rate": 1.3871857759116104e-05, + "loss": 0.6377, "step": 8334 }, { - "epoch": 1.747745858670581, - "grad_norm": 6.367222886338224, - "learning_rate": 7.829372705239356e-06, - "loss": 0.7127, + "epoch": 1.1769274216323073, + "grad_norm": 3.2673788682814675, + "learning_rate": 1.3870452165880389e-05, + "loss": 0.5977, "step": 8335 }, { - "epoch": 1.747955546236108, - "grad_norm": 6.479651165429848, - "learning_rate": 7.827162852599508e-06, - "loss": 0.5745, + "epoch": 1.1770686246822932, + "grad_norm": 3.6566080543905732, + "learning_rate": 1.386904648269703e-05, + "loss": 0.564, "step": 8336 }, { - "epoch": 1.7481652338016356, - "grad_norm": 5.906467716301577, - "learning_rate": 7.824953111321336e-06, - "loss": 0.588, + "epoch": 1.1772098277322791, + "grad_norm": 3.313640091179194, + "learning_rate": 1.3867640709598706e-05, + "loss": 0.5413, "step": 8337 }, { - "epoch": 1.7483749213671629, - "grad_norm": 7.224072639068199, - "learning_rate": 7.822743481518086e-06, - "loss": 0.9651, + "epoch": 1.177351030782265, + "grad_norm": 4.2114623935040125, + "learning_rate": 1.3866234846618083e-05, + "loss": 0.7549, "step": 8338 }, { - "epoch": 1.7485846089326902, - "grad_norm": 9.966451982051009, - "learning_rate": 7.82053396330301e-06, - "loss": 0.8825, + "epoch": 1.1774922338322509, + "grad_norm": 3.5471479140207154, + "learning_rate": 1.3864828893787832e-05, + "loss": 0.4768, "step": 8339 }, { - "epoch": 1.7487942964982177, - "grad_norm": 7.896737656740614, - "learning_rate": 7.818324556789352e-06, - "loss": 0.7555, + "epoch": 1.1776334368822368, + "grad_norm": 3.781268653955553, + "learning_rate": 1.3863422851140624e-05, + "loss": 0.5236, "step": 8340 }, { - "epoch": 1.749003984063745, - "grad_norm": 8.13569407162091, - "learning_rate": 7.816115262090338e-06, - "loss": 0.965, + "epoch": 1.1777746399322226, + "grad_norm": 3.914379208872212, + "learning_rate": 1.3862016718709139e-05, + "loss": 0.6661, "step": 8341 }, { - "epoch": 1.7492136716292723, - "grad_norm": 6.4256320791119235, - "learning_rate": 7.813906079319207e-06, - "loss": 0.5834, + "epoch": 1.1779158429822085, + "grad_norm": 4.127524743268275, + "learning_rate": 1.386061049652605e-05, + "loss": 0.6678, "step": 8342 }, { - "epoch": 1.7494233591947999, - "grad_norm": 6.690790509656073, - "learning_rate": 7.811697008589184e-06, - "loss": 0.6692, + "epoch": 1.1780570460321944, + "grad_norm": 3.8037868577972054, + "learning_rate": 1.3859204184624047e-05, + "loss": 0.5609, "step": 8343 }, { - "epoch": 1.7496330467603272, - "grad_norm": 5.985458221518401, - "learning_rate": 7.809488050013483e-06, - "loss": 0.5352, + "epoch": 1.1781982490821803, + "grad_norm": 3.74116522162721, + "learning_rate": 1.3857797783035803e-05, + "loss": 0.6308, "step": 8344 }, { - "epoch": 1.7498427343258545, - "grad_norm": 5.788203099395514, - "learning_rate": 7.807279203705315e-06, - "loss": 0.5902, + "epoch": 1.1783394521321662, + "grad_norm": 3.656034617196702, + "learning_rate": 1.3856391291794001e-05, + "loss": 0.5531, "step": 8345 }, { - "epoch": 1.750052421891382, - "grad_norm": 6.034995914847869, - "learning_rate": 7.805070469777895e-06, - "loss": 0.5335, + "epoch": 1.178480655182152, + "grad_norm": 3.055077367158639, + "learning_rate": 1.3854984710931332e-05, + "loss": 0.5561, "step": 8346 }, { - "epoch": 1.750262109456909, - "grad_norm": 8.126886560964602, - "learning_rate": 7.80286184834442e-06, - "loss": 0.9237, + "epoch": 1.1786218582321377, + "grad_norm": 3.708459349123848, + "learning_rate": 1.3853578040480486e-05, + "loss": 0.5627, "step": 8347 }, { - "epoch": 1.7504717970224366, - "grad_norm": 6.999918407684177, - "learning_rate": 7.800653339518084e-06, - "loss": 0.7555, + "epoch": 1.1787630612821236, + "grad_norm": 3.508671324356655, + "learning_rate": 1.3852171280474147e-05, + "loss": 0.6078, "step": 8348 }, { - "epoch": 1.750681484587964, - "grad_norm": 6.107678680492182, - "learning_rate": 7.79844494341208e-06, - "loss": 0.543, + "epoch": 1.1789042643321095, + "grad_norm": 3.4628667839587526, + "learning_rate": 1.3850764430945015e-05, + "loss": 0.5488, "step": 8349 }, { - "epoch": 1.7508911721534912, - "grad_norm": 6.979725930935626, - "learning_rate": 7.796236660139593e-06, - "loss": 0.7366, + "epoch": 1.1790454673820954, + "grad_norm": 3.658806655153012, + "learning_rate": 1.3849357491925779e-05, + "loss": 0.5298, "step": 8350 }, { - "epoch": 1.7511008597190187, - "grad_norm": 7.235925996085907, - "learning_rate": 7.794028489813797e-06, - "loss": 0.7713, + "epoch": 1.1791866704320813, + "grad_norm": 3.414388862097112, + "learning_rate": 1.3847950463449134e-05, + "loss": 0.5949, "step": 8351 }, { - "epoch": 1.751310547284546, - "grad_norm": 6.669378899441408, - "learning_rate": 7.791820432547866e-06, - "loss": 0.4604, + "epoch": 1.1793278734820671, + "grad_norm": 4.130044502875767, + "learning_rate": 1.3846543345547787e-05, + "loss": 0.666, "step": 8352 }, { - "epoch": 1.7515202348500734, - "grad_norm": 7.337929885968389, - "learning_rate": 7.789612488454971e-06, - "loss": 0.7075, + "epoch": 1.179469076532053, + "grad_norm": 3.4830137720688636, + "learning_rate": 1.3845136138254431e-05, + "loss": 0.6961, "step": 8353 }, { - "epoch": 1.7517299224156009, - "grad_norm": 6.647050511430966, - "learning_rate": 7.78740465764827e-06, - "loss": 0.769, + "epoch": 1.179610279582039, + "grad_norm": 3.241090487093376, + "learning_rate": 1.3843728841601771e-05, + "loss": 0.5524, "step": 8354 }, { - "epoch": 1.751939609981128, - "grad_norm": 6.602102977590646, - "learning_rate": 7.785196940240912e-06, - "loss": 0.7323, + "epoch": 1.1797514826320248, + "grad_norm": 3.3497957314281286, + "learning_rate": 1.3842321455622516e-05, + "loss": 0.6078, "step": 8355 }, { - "epoch": 1.7521492975466555, - "grad_norm": 6.997494973515076, - "learning_rate": 7.782989336346058e-06, - "loss": 0.7627, + "epoch": 1.1798926856820107, + "grad_norm": 3.2634814275418953, + "learning_rate": 1.3840913980349366e-05, + "loss": 0.5993, "step": 8356 }, { - "epoch": 1.7523589851121828, - "grad_norm": 7.03640027408144, - "learning_rate": 7.780781846076843e-06, - "loss": 0.5759, + "epoch": 1.1800338887319965, + "grad_norm": 3.103644174151894, + "learning_rate": 1.3839506415815037e-05, + "loss": 0.5211, "step": 8357 }, { - "epoch": 1.75256867267771, - "grad_norm": 5.8513813762683125, - "learning_rate": 7.778574469546408e-06, - "loss": 0.6003, + "epoch": 1.1801750917819824, + "grad_norm": 4.265917003708214, + "learning_rate": 1.3838098762052237e-05, + "loss": 0.6296, "step": 8358 }, { - "epoch": 1.7527783602432376, - "grad_norm": 8.212844802011938, - "learning_rate": 7.776367206867887e-06, - "loss": 0.734, + "epoch": 1.1803162948319683, + "grad_norm": 3.9357980891998707, + "learning_rate": 1.3836691019093677e-05, + "loss": 0.664, "step": 8359 }, { - "epoch": 1.752988047808765, - "grad_norm": 6.062959956204114, - "learning_rate": 7.774160058154399e-06, - "loss": 0.6097, + "epoch": 1.1804574978819542, + "grad_norm": 2.867139229762761, + "learning_rate": 1.3835283186972077e-05, + "loss": 0.4986, "step": 8360 }, { - "epoch": 1.7531977353742922, - "grad_norm": 6.775777736985307, - "learning_rate": 7.77195302351907e-06, - "loss": 0.6016, + "epoch": 1.18059870093194, + "grad_norm": 3.206676009379892, + "learning_rate": 1.3833875265720151e-05, + "loss": 0.5336, "step": 8361 }, { - "epoch": 1.7534074229398198, - "grad_norm": 7.367775890017337, - "learning_rate": 7.769746103075016e-06, - "loss": 0.8402, + "epoch": 1.180739903981926, + "grad_norm": 3.3862288611033167, + "learning_rate": 1.3832467255370618e-05, + "loss": 0.5648, "step": 8362 }, { - "epoch": 1.753617110505347, - "grad_norm": 6.705186706317168, - "learning_rate": 7.767539296935338e-06, - "loss": 0.5781, + "epoch": 1.1808811070319118, + "grad_norm": 3.86519326142991, + "learning_rate": 1.3831059155956201e-05, + "loss": 0.6319, "step": 8363 }, { - "epoch": 1.7538267980708744, - "grad_norm": 6.315655877832037, - "learning_rate": 7.765332605213144e-06, - "loss": 0.6865, + "epoch": 1.1810223100818977, + "grad_norm": 3.7328184709607375, + "learning_rate": 1.3829650967509629e-05, + "loss": 0.6697, "step": 8364 }, { - "epoch": 1.754036485636402, - "grad_norm": 8.795172077629546, - "learning_rate": 7.763126028021534e-06, - "loss": 0.7353, + "epoch": 1.1811635131318836, + "grad_norm": 3.904237600076821, + "learning_rate": 1.3828242690063616e-05, + "loss": 0.6438, "step": 8365 }, { - "epoch": 1.754246173201929, - "grad_norm": 6.139998738461982, - "learning_rate": 7.760919565473587e-06, - "loss": 0.5101, + "epoch": 1.1813047161818695, + "grad_norm": 3.8741684075295955, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.7464, "step": 8366 }, { - "epoch": 1.7544558607674565, - "grad_norm": 5.948526255729782, - "learning_rate": 7.7587132176824e-06, - "loss": 0.6087, + "epoch": 1.1814459192318554, + "grad_norm": 3.601145524124925, + "learning_rate": 1.3825425868304206e-05, + "loss": 0.6681, "step": 8367 }, { - "epoch": 1.7546655483329838, - "grad_norm": 10.357317149772832, - "learning_rate": 7.75650698476105e-06, - "loss": 0.964, + "epoch": 1.1815871222818413, + "grad_norm": 3.761479434174838, + "learning_rate": 1.3824017324056264e-05, + "loss": 0.6427, "step": 8368 }, { - "epoch": 1.7548752358985111, - "grad_norm": 9.807427891529505, - "learning_rate": 7.754300866822608e-06, - "loss": 0.8233, + "epoch": 1.1817283253318271, + "grad_norm": 2.5741328798285563, + "learning_rate": 1.3822608690939816e-05, + "loss": 0.5188, "step": 8369 }, { - "epoch": 1.7550849234640387, - "grad_norm": 7.612976670472568, - "learning_rate": 7.752094863980137e-06, - "loss": 0.8632, + "epoch": 1.181869528381813, + "grad_norm": 3.559961571546636, + "learning_rate": 1.382119996898759e-05, + "loss": 0.6745, "step": 8370 }, { - "epoch": 1.755294611029566, - "grad_norm": 7.633476815554445, - "learning_rate": 7.749888976346708e-06, - "loss": 0.6741, + "epoch": 1.182010731431799, + "grad_norm": 3.389592836087978, + "learning_rate": 1.3819791158232327e-05, + "loss": 0.6109, "step": 8371 }, { - "epoch": 1.7555042985950933, - "grad_norm": 7.804986759220151, - "learning_rate": 7.747683204035367e-06, - "loss": 0.7833, + "epoch": 1.1821519344817848, + "grad_norm": 3.463514448181128, + "learning_rate": 1.3818382258706766e-05, + "loss": 0.5515, "step": 8372 }, { - "epoch": 1.7557139861606208, - "grad_norm": 8.217312893624321, - "learning_rate": 7.74547754715917e-06, - "loss": 1.0026, + "epoch": 1.1822931375317707, + "grad_norm": 2.8883469560914508, + "learning_rate": 1.3816973270443654e-05, + "loss": 0.5424, "step": 8373 }, { - "epoch": 1.7559236737261479, - "grad_norm": 6.21481258991733, - "learning_rate": 7.743272005831162e-06, - "loss": 0.5765, + "epoch": 1.1824343405817566, + "grad_norm": 3.477282999663746, + "learning_rate": 1.381556419347573e-05, + "loss": 0.6608, "step": 8374 }, { - "epoch": 1.7561333612916754, - "grad_norm": 7.195771676684358, - "learning_rate": 7.741066580164377e-06, - "loss": 0.6951, + "epoch": 1.1825755436317424, + "grad_norm": 3.475698718447275, + "learning_rate": 1.3814155027835741e-05, + "loss": 0.636, "step": 8375 }, { - "epoch": 1.7563430488572027, - "grad_norm": 9.018197081559544, - "learning_rate": 7.738861270271842e-06, - "loss": 1.2057, + "epoch": 1.1827167466817283, + "grad_norm": 3.0706042479970557, + "learning_rate": 1.3812745773556438e-05, + "loss": 0.5006, "step": 8376 }, { - "epoch": 1.75655273642273, - "grad_norm": 6.906374772161643, - "learning_rate": 7.736656076266594e-06, - "loss": 0.6311, + "epoch": 1.1828579497317142, + "grad_norm": 3.7607126458555484, + "learning_rate": 1.3811336430670568e-05, + "loss": 0.6623, "step": 8377 }, { - "epoch": 1.7567624239882575, - "grad_norm": 7.250030024493052, - "learning_rate": 7.734450998261647e-06, - "loss": 0.7661, + "epoch": 1.1829991527817, + "grad_norm": 3.691589382587743, + "learning_rate": 1.3809926999210886e-05, + "loss": 0.5675, "step": 8378 }, { - "epoch": 1.7569721115537849, - "grad_norm": 5.089759870798284, - "learning_rate": 7.732246036370015e-06, - "loss": 0.5591, + "epoch": 1.183140355831686, + "grad_norm": 3.4228241870042075, + "learning_rate": 1.3808517479210143e-05, + "loss": 0.6875, "step": 8379 }, { - "epoch": 1.7571817991193122, - "grad_norm": 7.525142257922409, - "learning_rate": 7.730041190704709e-06, - "loss": 0.7682, + "epoch": 1.1832815588816719, + "grad_norm": 3.2513945103152184, + "learning_rate": 1.3807107870701102e-05, + "loss": 0.47, "step": 8380 }, { - "epoch": 1.7573914866848397, - "grad_norm": 8.417222828133237, - "learning_rate": 7.727836461378725e-06, - "loss": 1.142, + "epoch": 1.1834227619316577, + "grad_norm": 3.0989333119496743, + "learning_rate": 1.3805698173716522e-05, + "loss": 0.5975, "step": 8381 }, { - "epoch": 1.757601174250367, - "grad_norm": 7.133122471626611, - "learning_rate": 7.725631848505063e-06, - "loss": 0.7348, + "epoch": 1.1835639649816436, + "grad_norm": 3.4783733413830644, + "learning_rate": 1.3804288388289152e-05, + "loss": 0.6491, "step": 8382 }, { - "epoch": 1.7578108618158943, - "grad_norm": 7.138231293876203, - "learning_rate": 7.723427352196718e-06, - "loss": 0.7279, + "epoch": 1.1837051680316295, + "grad_norm": 3.2500485322399215, + "learning_rate": 1.3802878514451767e-05, + "loss": 0.538, "step": 8383 }, { - "epoch": 1.7580205493814218, - "grad_norm": 7.758087929314629, - "learning_rate": 7.721222972566667e-06, - "loss": 1.0003, + "epoch": 1.1838463710816154, + "grad_norm": 3.1990363014172996, + "learning_rate": 1.3801468552237127e-05, + "loss": 0.6089, "step": 8384 }, { - "epoch": 1.758230236946949, - "grad_norm": 7.619428796410866, - "learning_rate": 7.71901870972789e-06, - "loss": 0.7514, + "epoch": 1.1839875741316013, + "grad_norm": 3.8387696230670163, + "learning_rate": 1.3800058501678e-05, + "loss": 0.6477, "step": 8385 }, { - "epoch": 1.7584399245124764, - "grad_norm": 8.465870547632365, - "learning_rate": 7.716814563793365e-06, - "loss": 0.9024, + "epoch": 1.1841287771815872, + "grad_norm": 3.5741734122717554, + "learning_rate": 1.3798648362807154e-05, + "loss": 0.5261, "step": 8386 }, { - "epoch": 1.7586496120780037, - "grad_norm": 6.262868311063114, - "learning_rate": 7.714610534876047e-06, - "loss": 0.5254, + "epoch": 1.184269980231573, + "grad_norm": 4.209473239181189, + "learning_rate": 1.3797238135657363e-05, + "loss": 0.7371, "step": 8387 }, { - "epoch": 1.758859299643531, - "grad_norm": 5.884486237055454, - "learning_rate": 7.712406623088905e-06, - "loss": 0.4959, + "epoch": 1.184411183281559, + "grad_norm": 3.546938834048846, + "learning_rate": 1.3795827820261392e-05, + "loss": 0.5975, "step": 8388 }, { - "epoch": 1.7590689872090586, - "grad_norm": 6.544421002362297, - "learning_rate": 7.710202828544895e-06, - "loss": 0.7058, + "epoch": 1.1845523863315448, + "grad_norm": 3.1707022774178952, + "learning_rate": 1.3794417416652027e-05, + "loss": 0.5959, "step": 8389 }, { - "epoch": 1.7592786747745859, - "grad_norm": 5.976999000740742, - "learning_rate": 7.707999151356958e-06, - "loss": 0.6256, + "epoch": 1.1846935893815307, + "grad_norm": 4.853779947147142, + "learning_rate": 1.379300692486204e-05, + "loss": 0.7456, "step": 8390 }, { - "epoch": 1.7594883623401132, - "grad_norm": 6.5895410954666405, - "learning_rate": 7.70579559163804e-06, - "loss": 0.7145, + "epoch": 1.1848347924315166, + "grad_norm": 3.9485023388192477, + "learning_rate": 1.3791596344924206e-05, + "loss": 0.5487, "step": 8391 }, { - "epoch": 1.7596980499056407, - "grad_norm": 6.7149770478249025, - "learning_rate": 7.70359214950108e-06, - "loss": 0.7279, + "epoch": 1.1849759954815025, + "grad_norm": 3.119625911392598, + "learning_rate": 1.3790185676871312e-05, + "loss": 0.5593, "step": 8392 }, { - "epoch": 1.7599077374711678, - "grad_norm": 8.015850038418254, - "learning_rate": 7.701388825059001e-06, - "loss": 0.8607, + "epoch": 1.1851171985314883, + "grad_norm": 5.512215982449515, + "learning_rate": 1.3788774920736142e-05, + "loss": 0.7187, "step": 8393 }, { - "epoch": 1.7601174250366953, - "grad_norm": 8.51327284603959, - "learning_rate": 7.699185618424733e-06, - "loss": 0.7749, + "epoch": 1.1852584015814742, + "grad_norm": 3.787095604393217, + "learning_rate": 1.3787364076551478e-05, + "loss": 0.726, "step": 8394 }, { - "epoch": 1.7603271126022229, - "grad_norm": 5.754919899193569, - "learning_rate": 7.696982529711195e-06, - "loss": 0.7151, + "epoch": 1.18539960463146, + "grad_norm": 4.282618517932755, + "learning_rate": 1.3785953144350108e-05, + "loss": 0.7294, "step": 8395 }, { - "epoch": 1.76053680016775, - "grad_norm": 8.94363215710054, - "learning_rate": 7.694779559031293e-06, - "loss": 1.0975, + "epoch": 1.185540807681446, + "grad_norm": 3.6887867919269666, + "learning_rate": 1.3784542124164821e-05, + "loss": 0.7391, "step": 8396 }, { - "epoch": 1.7607464877332775, - "grad_norm": 5.615187907482772, - "learning_rate": 7.692576706497936e-06, - "loss": 0.6542, + "epoch": 1.1856820107314319, + "grad_norm": 3.356123967667483, + "learning_rate": 1.3783131016028408e-05, + "loss": 0.6809, "step": 8397 }, { - "epoch": 1.7609561752988048, - "grad_norm": 6.06698202826116, - "learning_rate": 7.690373972224032e-06, - "loss": 0.629, + "epoch": 1.1858232137814178, + "grad_norm": 3.567548466275041, + "learning_rate": 1.378171981997367e-05, + "loss": 0.6609, "step": 8398 }, { - "epoch": 1.761165862864332, - "grad_norm": 6.816281666341303, - "learning_rate": 7.68817135632246e-06, - "loss": 0.8312, + "epoch": 1.1859644168314036, + "grad_norm": 3.580124029286584, + "learning_rate": 1.378030853603339e-05, + "loss": 0.6763, "step": 8399 }, { - "epoch": 1.7613755504298596, - "grad_norm": 6.529156810605829, - "learning_rate": 7.68596885890612e-06, - "loss": 0.7894, + "epoch": 1.1861056198813895, + "grad_norm": 2.9652953956030186, + "learning_rate": 1.3778897164240378e-05, + "loss": 0.5861, "step": 8400 }, { - "epoch": 1.761585237995387, - "grad_norm": 6.5634570218672135, - "learning_rate": 7.68376648008789e-06, - "loss": 0.7903, + "epoch": 1.1862468229313754, + "grad_norm": 4.469148906017059, + "learning_rate": 1.3777485704627424e-05, + "loss": 0.8899, "step": 8401 }, { - "epoch": 1.7617949255609142, - "grad_norm": 4.6176445672439534, - "learning_rate": 7.681564219980642e-06, - "loss": 0.3915, + "epoch": 1.1863880259813613, + "grad_norm": 4.342840984553117, + "learning_rate": 1.3776074157227335e-05, + "loss": 0.7652, "step": 8402 }, { - "epoch": 1.7620046131264417, - "grad_norm": 6.586754072040553, - "learning_rate": 7.679362078697247e-06, - "loss": 0.695, + "epoch": 1.1865292290313472, + "grad_norm": 3.628054985910775, + "learning_rate": 1.3774662522072912e-05, + "loss": 0.5921, "step": 8403 }, { - "epoch": 1.7622143006919688, - "grad_norm": 7.4457966868721845, - "learning_rate": 7.677160056350577e-06, - "loss": 0.6014, + "epoch": 1.186670432081333, + "grad_norm": 4.4083318862956045, + "learning_rate": 1.3773250799196964e-05, + "loss": 0.8165, "step": 8404 }, { - "epoch": 1.7624239882574964, - "grad_norm": 7.122957288802633, - "learning_rate": 7.674958153053479e-06, - "loss": 0.758, + "epoch": 1.186811635131319, + "grad_norm": 3.186653268399574, + "learning_rate": 1.3771838988632295e-05, + "loss": 0.5753, "step": 8405 }, { - "epoch": 1.7626336758230237, - "grad_norm": 7.0382553163305355, - "learning_rate": 7.672756368918809e-06, - "loss": 0.7179, + "epoch": 1.1869528381813046, + "grad_norm": 3.9783873710803146, + "learning_rate": 1.3770427090411717e-05, + "loss": 0.6142, "step": 8406 }, { - "epoch": 1.762843363388551, - "grad_norm": 6.982633009966255, - "learning_rate": 7.670554704059412e-06, - "loss": 0.7201, + "epoch": 1.1870940412312905, + "grad_norm": 3.827393791442306, + "learning_rate": 1.3769015104568046e-05, + "loss": 0.6166, "step": 8407 }, { - "epoch": 1.7630530509540785, - "grad_norm": 7.463009917197567, - "learning_rate": 7.668353158588123e-06, - "loss": 0.7152, + "epoch": 1.1872352442812764, + "grad_norm": 3.689853079758535, + "learning_rate": 1.3767603031134087e-05, + "loss": 0.708, "step": 8408 }, { - "epoch": 1.7632627385196058, - "grad_norm": 6.920167631942121, - "learning_rate": 7.666151732617782e-06, - "loss": 0.6441, + "epoch": 1.1873764473312622, + "grad_norm": 3.9974949816348193, + "learning_rate": 1.3766190870142662e-05, + "loss": 0.7098, "step": 8409 }, { - "epoch": 1.763472426085133, - "grad_norm": 8.205821644911513, - "learning_rate": 7.66395042626121e-06, - "loss": 0.6917, + "epoch": 1.1875176503812481, + "grad_norm": 3.6933356777340087, + "learning_rate": 1.3764778621626586e-05, + "loss": 0.6512, "step": 8410 }, { - "epoch": 1.7636821136506606, - "grad_norm": 8.140473842590335, - "learning_rate": 7.661749239631235e-06, - "loss": 0.6963, + "epoch": 1.187658853431234, + "grad_norm": 4.1230929665490335, + "learning_rate": 1.3763366285618685e-05, + "loss": 0.7175, "step": 8411 }, { - "epoch": 1.7638918012161877, - "grad_norm": 7.568298445425577, - "learning_rate": 7.659548172840664e-06, - "loss": 0.7371, + "epoch": 1.18780005648122, + "grad_norm": 3.261845796214726, + "learning_rate": 1.3761953862151773e-05, + "loss": 0.4423, "step": 8412 }, { - "epoch": 1.7641014887817152, - "grad_norm": 9.164700518022064, - "learning_rate": 7.657347226002306e-06, - "loss": 0.9727, + "epoch": 1.1879412595312058, + "grad_norm": 3.663606594033593, + "learning_rate": 1.376054135125868e-05, + "loss": 0.6396, "step": 8413 }, { - "epoch": 1.7643111763472428, - "grad_norm": 6.550522139752771, - "learning_rate": 7.655146399228971e-06, - "loss": 0.5577, + "epoch": 1.1880824625811917, + "grad_norm": 3.7123692373156394, + "learning_rate": 1.3759128752972229e-05, + "loss": 0.6191, "step": 8414 }, { - "epoch": 1.7645208639127699, - "grad_norm": 7.4425018632890145, - "learning_rate": 7.652945692633446e-06, - "loss": 0.6697, + "epoch": 1.1882236656311775, + "grad_norm": 3.5033470689398576, + "learning_rate": 1.3757716067325251e-05, + "loss": 0.4708, "step": 8415 }, { - "epoch": 1.7647305514782974, - "grad_norm": 7.050784670150012, - "learning_rate": 7.650745106328528e-06, - "loss": 0.6148, + "epoch": 1.1883648686811634, + "grad_norm": 3.1166529678224175, + "learning_rate": 1.375630329435057e-05, + "loss": 0.538, "step": 8416 }, { - "epoch": 1.7649402390438247, - "grad_norm": 6.858320959137793, - "learning_rate": 7.648544640426998e-06, - "loss": 0.8089, + "epoch": 1.1885060717311493, + "grad_norm": 3.252307266671689, + "learning_rate": 1.3754890434081025e-05, + "loss": 0.4973, "step": 8417 }, { - "epoch": 1.765149926609352, - "grad_norm": 6.65943940491548, - "learning_rate": 7.646344295041629e-06, - "loss": 0.6146, + "epoch": 1.1886472747811352, + "grad_norm": 4.714718749878313, + "learning_rate": 1.3753477486549449e-05, + "loss": 0.8573, "step": 8418 }, { - "epoch": 1.7653596141748795, - "grad_norm": 7.544510056427483, - "learning_rate": 7.6441440702852e-06, - "loss": 1.0247, + "epoch": 1.188788477831121, + "grad_norm": 3.2500352083878563, + "learning_rate": 1.3752064451788678e-05, + "loss": 0.6157, "step": 8419 }, { - "epoch": 1.7655693017404068, - "grad_norm": 7.9584480877266355, - "learning_rate": 7.641943966270475e-06, - "loss": 0.6129, + "epoch": 1.188929680881107, + "grad_norm": 4.114929593689272, + "learning_rate": 1.3750651329831548e-05, + "loss": 0.7551, "step": 8420 }, { - "epoch": 1.7657789893059341, - "grad_norm": 7.8336602517, - "learning_rate": 7.63974398311021e-06, - "loss": 0.8525, + "epoch": 1.1890708839310928, + "grad_norm": 3.2482425839648017, + "learning_rate": 1.3749238120710902e-05, + "loss": 0.5369, "step": 8421 }, { - "epoch": 1.7659886768714617, - "grad_norm": 7.979725956016501, - "learning_rate": 7.637544120917158e-06, - "loss": 0.9821, + "epoch": 1.1892120869810787, + "grad_norm": 3.3801284425244096, + "learning_rate": 1.3747824824459577e-05, + "loss": 0.6063, "step": 8422 }, { - "epoch": 1.7661983644369887, - "grad_norm": 8.379511918449996, - "learning_rate": 7.635344379804073e-06, - "loss": 0.8005, + "epoch": 1.1893532900310646, + "grad_norm": 2.9444265574433794, + "learning_rate": 1.3746411441110425e-05, + "loss": 0.4955, "step": 8423 }, { - "epoch": 1.7664080520025163, - "grad_norm": 7.905665915337931, - "learning_rate": 7.633144759883682e-06, - "loss": 0.8621, + "epoch": 1.1894944930810505, + "grad_norm": 3.398336403997849, + "learning_rate": 1.3744997970696291e-05, + "loss": 0.6201, "step": 8424 }, { - "epoch": 1.7666177395680436, - "grad_norm": 5.8295354608155385, - "learning_rate": 7.63094526126873e-06, - "loss": 0.6837, + "epoch": 1.1896356961310364, + "grad_norm": 4.777759873747928, + "learning_rate": 1.3743584413250019e-05, + "loss": 0.9248, "step": 8425 }, { - "epoch": 1.7668274271335709, - "grad_norm": 6.966053558106857, - "learning_rate": 7.6287458840719465e-06, - "loss": 0.7568, + "epoch": 1.1897768991810223, + "grad_norm": 2.9728537264917287, + "learning_rate": 1.3742170768804464e-05, + "loss": 0.5553, "step": 8426 }, { - "epoch": 1.7670371146990984, - "grad_norm": 5.533883951873656, - "learning_rate": 7.6265466284060455e-06, - "loss": 0.6848, + "epoch": 1.1899181022310081, + "grad_norm": 2.9204095687261016, + "learning_rate": 1.3740757037392474e-05, + "loss": 0.6315, "step": 8427 }, { - "epoch": 1.7672468022646257, - "grad_norm": 6.438766029778968, - "learning_rate": 7.624347494383744e-06, - "loss": 0.7024, + "epoch": 1.190059305280994, + "grad_norm": 3.7423754302762613, + "learning_rate": 1.3739343219046908e-05, + "loss": 0.6599, "step": 8428 }, { - "epoch": 1.767456489830153, - "grad_norm": 7.177831894088845, - "learning_rate": 7.62214848211776e-06, - "loss": 0.7058, + "epoch": 1.19020050833098, + "grad_norm": 4.097185546243689, + "learning_rate": 1.373792931380062e-05, + "loss": 0.6482, "step": 8429 }, { - "epoch": 1.7676661773956805, - "grad_norm": 7.769997318571442, - "learning_rate": 7.619949591720784e-06, - "loss": 0.8546, + "epoch": 1.1903417113809658, + "grad_norm": 3.477306396290203, + "learning_rate": 1.3736515321686468e-05, + "loss": 0.6132, "step": 8430 }, { - "epoch": 1.7678758649612079, - "grad_norm": 6.663177887954277, - "learning_rate": 7.617750823305523e-06, - "loss": 0.5998, + "epoch": 1.1904829144309517, + "grad_norm": 4.142551396089924, + "learning_rate": 1.3735101242737313e-05, + "loss": 0.5596, "step": 8431 }, { - "epoch": 1.7680855525267352, - "grad_norm": 6.58023226204452, - "learning_rate": 7.615552176984665e-06, - "loss": 0.7618, + "epoch": 1.1906241174809375, + "grad_norm": 3.9633752559349382, + "learning_rate": 1.373368707698602e-05, + "loss": 0.6352, "step": 8432 }, { - "epoch": 1.7682952400922627, - "grad_norm": 7.277939778438598, - "learning_rate": 7.613353652870892e-06, - "loss": 0.8417, + "epoch": 1.1907653205309234, + "grad_norm": 3.3197447769286006, + "learning_rate": 1.373227282446545e-05, + "loss": 0.6155, "step": 8433 }, { - "epoch": 1.7685049276577898, - "grad_norm": 6.53106863751666, - "learning_rate": 7.611155251076881e-06, - "loss": 0.6965, + "epoch": 1.1909065235809093, + "grad_norm": 4.021473478734827, + "learning_rate": 1.3730858485208471e-05, + "loss": 0.7521, "step": 8434 }, { - "epoch": 1.7687146152233173, - "grad_norm": 7.065560428909784, - "learning_rate": 7.608956971715311e-06, - "loss": 0.8685, + "epoch": 1.1910477266308952, + "grad_norm": 3.88054815220302, + "learning_rate": 1.3729444059247954e-05, + "loss": 0.6559, "step": 8435 }, { - "epoch": 1.7689243027888446, - "grad_norm": 8.309969928855029, - "learning_rate": 7.606758814898841e-06, - "loss": 0.8374, + "epoch": 1.191188929680881, + "grad_norm": 3.0387360084160178, + "learning_rate": 1.3728029546616769e-05, + "loss": 0.5875, "step": 8436 }, { - "epoch": 1.769133990354372, - "grad_norm": 7.08047651998436, - "learning_rate": 7.604560780740134e-06, - "loss": 0.5909, + "epoch": 1.191330132730867, + "grad_norm": 3.5073141787512743, + "learning_rate": 1.3726614947347784e-05, + "loss": 0.5851, "step": 8437 }, { - "epoch": 1.7693436779198994, - "grad_norm": 5.723651254946721, - "learning_rate": 7.6023628693518415e-06, - "loss": 0.6721, + "epoch": 1.1914713357808528, + "grad_norm": 3.704859812609219, + "learning_rate": 1.3725200261473879e-05, + "loss": 0.7267, "step": 8438 }, { - "epoch": 1.7695533654854267, - "grad_norm": 6.973590855378079, - "learning_rate": 7.600165080846608e-06, - "loss": 0.6979, + "epoch": 1.1916125388308387, + "grad_norm": 3.472071453049201, + "learning_rate": 1.3723785489027926e-05, + "loss": 0.6012, "step": 8439 }, { - "epoch": 1.769763053050954, - "grad_norm": 6.368218673246091, - "learning_rate": 7.597967415337078e-06, - "loss": 0.5685, + "epoch": 1.1917537418808246, + "grad_norm": 3.5612454325212823, + "learning_rate": 1.3722370630042809e-05, + "loss": 0.6082, "step": 8440 }, { - "epoch": 1.7699727406164816, - "grad_norm": 5.958216468128591, - "learning_rate": 7.595769872935885e-06, - "loss": 0.5501, + "epoch": 1.1918949449308105, + "grad_norm": 3.7361880744180187, + "learning_rate": 1.372095568455141e-05, + "loss": 0.5201, "step": 8441 }, { - "epoch": 1.7701824281820087, - "grad_norm": 6.087656295948984, - "learning_rate": 7.593572453755655e-06, - "loss": 0.6289, + "epoch": 1.1920361479807964, + "grad_norm": 3.2001903684982502, + "learning_rate": 1.3719540652586601e-05, + "loss": 0.567, "step": 8442 }, { - "epoch": 1.7703921157475362, - "grad_norm": 7.550056991387706, - "learning_rate": 7.5913751579090115e-06, - "loss": 0.8357, + "epoch": 1.1921773510307823, + "grad_norm": 4.065173060412272, + "learning_rate": 1.3718125534181277e-05, + "loss": 0.6917, "step": 8443 }, { - "epoch": 1.7706018033130635, - "grad_norm": 7.41018237105327, - "learning_rate": 7.589177985508569e-06, - "loss": 0.764, + "epoch": 1.1923185540807681, + "grad_norm": 3.3613100687907043, + "learning_rate": 1.3716710329368319e-05, + "loss": 0.58, "step": 8444 }, { - "epoch": 1.7708114908785908, - "grad_norm": 6.474432522528981, - "learning_rate": 7.586980936666932e-06, - "loss": 0.6047, + "epoch": 1.192459757130754, + "grad_norm": 3.0478226485921613, + "learning_rate": 1.371529503818062e-05, + "loss": 0.5007, "step": 8445 }, { - "epoch": 1.7710211784441183, - "grad_norm": 6.0824007309177475, - "learning_rate": 7.584784011496711e-06, - "loss": 0.6894, + "epoch": 1.19260096018074, + "grad_norm": 3.023701824519928, + "learning_rate": 1.3713879660651069e-05, + "loss": 0.5308, "step": 8446 }, { - "epoch": 1.7712308660096456, - "grad_norm": 7.246433112842389, - "learning_rate": 7.582587210110499e-06, - "loss": 0.6914, + "epoch": 1.1927421632307258, + "grad_norm": 3.532508408549909, + "learning_rate": 1.3712464196812558e-05, + "loss": 0.6012, "step": 8447 }, { - "epoch": 1.771440553575173, - "grad_norm": 7.42884640114442, - "learning_rate": 7.580390532620883e-06, - "loss": 0.6648, + "epoch": 1.1928833662807117, + "grad_norm": 4.2241636911007685, + "learning_rate": 1.3711048646697979e-05, + "loss": 0.7859, "step": 8448 }, { - "epoch": 1.7716502411407005, - "grad_norm": 6.263779586785348, - "learning_rate": 7.5781939791404465e-06, - "loss": 0.5921, + "epoch": 1.1930245693306976, + "grad_norm": 3.3416667581693633, + "learning_rate": 1.370963301034024e-05, + "loss": 0.5934, "step": 8449 }, { - "epoch": 1.7718599287062278, - "grad_norm": 6.586723961996054, - "learning_rate": 7.575997549781777e-06, - "loss": 0.6502, + "epoch": 1.1931657723806834, + "grad_norm": 3.5984709222099576, + "learning_rate": 1.3708217287772227e-05, + "loss": 0.5452, "step": 8450 }, { - "epoch": 1.772069616271755, - "grad_norm": 7.1385855725625404, - "learning_rate": 7.573801244657431e-06, - "loss": 0.6738, + "epoch": 1.1933069754306693, + "grad_norm": 4.193074495603969, + "learning_rate": 1.3706801479026844e-05, + "loss": 0.638, "step": 8451 }, { - "epoch": 1.7722793038372826, - "grad_norm": 6.709043001698923, - "learning_rate": 7.571605063879979e-06, - "loss": 0.784, + "epoch": 1.1934481784806552, + "grad_norm": 3.3811873854617653, + "learning_rate": 1.3705385584136997e-05, + "loss": 0.6089, "step": 8452 }, { - "epoch": 1.7724889914028097, - "grad_norm": 6.654812508432464, - "learning_rate": 7.569409007561983e-06, - "loss": 0.7903, + "epoch": 1.193589381530641, + "grad_norm": 3.602948868471054, + "learning_rate": 1.370396960313559e-05, + "loss": 0.637, "step": 8453 }, { - "epoch": 1.7726986789683372, - "grad_norm": 7.08056384452814, - "learning_rate": 7.567213075815989e-06, - "loss": 0.6277, + "epoch": 1.193730584580627, + "grad_norm": 3.389911254884805, + "learning_rate": 1.3702553536055529e-05, + "loss": 0.5187, "step": 8454 }, { - "epoch": 1.7729083665338645, - "grad_norm": 8.095824896675438, - "learning_rate": 7.565017268754542e-06, - "loss": 1.0345, + "epoch": 1.1938717876306129, + "grad_norm": 4.183621518433871, + "learning_rate": 1.3701137382929724e-05, + "loss": 0.6934, "step": 8455 }, { - "epoch": 1.7731180540993918, - "grad_norm": 6.6993076513191525, - "learning_rate": 7.5628215864901864e-06, - "loss": 0.4817, + "epoch": 1.1940129906805987, + "grad_norm": 3.0065174649482067, + "learning_rate": 1.3699721143791083e-05, + "loss": 0.514, "step": 8456 }, { - "epoch": 1.7733277416649194, - "grad_norm": 5.665278812728131, - "learning_rate": 7.56062602913545e-06, - "loss": 0.4828, + "epoch": 1.1941541937305846, + "grad_norm": 3.5194347376052537, + "learning_rate": 1.3698304818672519e-05, + "loss": 0.6432, "step": 8457 }, { - "epoch": 1.7735374292304467, - "grad_norm": 6.361843127812987, - "learning_rate": 7.558430596802862e-06, - "loss": 0.6614, + "epoch": 1.1942953967805705, + "grad_norm": 3.5240683293150554, + "learning_rate": 1.3696888407606952e-05, + "loss": 0.5645, "step": 8458 }, { - "epoch": 1.773747116795974, - "grad_norm": 8.029712324777721, - "learning_rate": 7.556235289604941e-06, - "loss": 0.7534, + "epoch": 1.1944365998305564, + "grad_norm": 3.263407012944965, + "learning_rate": 1.3695471910627292e-05, + "loss": 0.5877, "step": 8459 }, { - "epoch": 1.7739568043615015, - "grad_norm": 6.598077813373454, - "learning_rate": 7.554040107654199e-06, - "loss": 0.6324, + "epoch": 1.1945778028805423, + "grad_norm": 3.529119779712467, + "learning_rate": 1.369405532776646e-05, + "loss": 0.5354, "step": 8460 }, { - "epoch": 1.7741664919270286, - "grad_norm": 7.326996972421166, - "learning_rate": 7.55184505106314e-06, - "loss": 0.6744, + "epoch": 1.1947190059305282, + "grad_norm": 4.1225157981782, + "learning_rate": 1.369263865905738e-05, + "loss": 0.7623, "step": 8461 }, { - "epoch": 1.774376179492556, - "grad_norm": 7.442197958188835, - "learning_rate": 7.549650119944274e-06, - "loss": 0.7828, + "epoch": 1.194860208980514, + "grad_norm": 3.248715536346656, + "learning_rate": 1.3691221904532972e-05, + "loss": 0.5388, "step": 8462 }, { - "epoch": 1.7745858670580834, - "grad_norm": 5.327338970528745, - "learning_rate": 7.547455314410087e-06, - "loss": 0.5452, + "epoch": 1.1950014120305, + "grad_norm": 3.3284582869391737, + "learning_rate": 1.368980506422616e-05, + "loss": 0.4776, "step": 8463 }, { - "epoch": 1.7747955546236107, - "grad_norm": 8.010569178903205, - "learning_rate": 7.545260634573068e-06, - "loss": 0.9968, + "epoch": 1.1951426150804858, + "grad_norm": 4.160924712694896, + "learning_rate": 1.3688388138169873e-05, + "loss": 0.7079, "step": 8464 }, { - "epoch": 1.7750052421891382, - "grad_norm": 7.076586679055972, - "learning_rate": 7.543066080545702e-06, - "loss": 0.7472, + "epoch": 1.1952838181304717, + "grad_norm": 2.8692248041175192, + "learning_rate": 1.3686971126397035e-05, + "loss": 0.4065, "step": 8465 }, { - "epoch": 1.7752149297546655, - "grad_norm": 7.388871805086191, - "learning_rate": 7.540871652440456e-06, - "loss": 0.9161, + "epoch": 1.1954250211804576, + "grad_norm": 3.363327236238631, + "learning_rate": 1.3685554028940586e-05, + "loss": 0.5936, "step": 8466 }, { - "epoch": 1.7754246173201929, - "grad_norm": 9.278049173740676, - "learning_rate": 7.538677350369805e-06, - "loss": 0.9707, + "epoch": 1.1955662242304435, + "grad_norm": 3.0684011278181718, + "learning_rate": 1.368413684583345e-05, + "loss": 0.5115, "step": 8467 }, { - "epoch": 1.7756343048857204, - "grad_norm": 5.224657037820614, - "learning_rate": 7.536483174446212e-06, - "loss": 0.5524, + "epoch": 1.1957074272804293, + "grad_norm": 3.217957706838382, + "learning_rate": 1.3682719577108566e-05, + "loss": 0.5513, "step": 8468 }, { - "epoch": 1.7758439924512477, - "grad_norm": 7.2835324080757635, - "learning_rate": 7.534289124782126e-06, - "loss": 0.6948, + "epoch": 1.1958486303304152, + "grad_norm": 2.7918981144234856, + "learning_rate": 1.3681302222798867e-05, + "loss": 0.4151, "step": 8469 }, { - "epoch": 1.776053680016775, - "grad_norm": 9.122722851301939, - "learning_rate": 7.532095201489996e-06, - "loss": 0.838, + "epoch": 1.195989833380401, + "grad_norm": 3.051424640753601, + "learning_rate": 1.3679884782937295e-05, + "loss": 0.5218, "step": 8470 }, { - "epoch": 1.7762633675823025, - "grad_norm": 7.281238872341526, - "learning_rate": 7.529901404682271e-06, - "loss": 0.8466, + "epoch": 1.196131036430387, + "grad_norm": 5.378519879637901, + "learning_rate": 1.3678467257556791e-05, + "loss": 0.8515, "step": 8471 }, { - "epoch": 1.7764730551478296, - "grad_norm": 6.6302619988034355, - "learning_rate": 7.527707734471383e-06, - "loss": 0.6242, + "epoch": 1.1962722394803729, + "grad_norm": 3.0431780229863667, + "learning_rate": 1.3677049646690298e-05, + "loss": 0.5409, "step": 8472 }, { - "epoch": 1.7766827427133571, - "grad_norm": 7.106371540684246, - "learning_rate": 7.5255141909697615e-06, - "loss": 0.6226, + "epoch": 1.1964134425303588, + "grad_norm": 5.315474202352966, + "learning_rate": 1.3675631950370757e-05, + "loss": 0.9506, "step": 8473 }, { - "epoch": 1.7768924302788844, - "grad_norm": 7.228130737592883, - "learning_rate": 7.52332077428983e-06, - "loss": 0.7849, + "epoch": 1.1965546455803446, + "grad_norm": 3.8404528632437893, + "learning_rate": 1.3674214168631114e-05, + "loss": 0.6706, "step": 8474 }, { - "epoch": 1.7771021178444117, - "grad_norm": 7.109879141004516, - "learning_rate": 7.521127484544004e-06, - "loss": 0.7588, + "epoch": 1.1966958486303305, + "grad_norm": 4.064403456633462, + "learning_rate": 1.3672796301504325e-05, + "loss": 0.6745, "step": 8475 }, { - "epoch": 1.7773118054099393, - "grad_norm": 5.863221213162364, - "learning_rate": 7.518934321844692e-06, - "loss": 0.5756, + "epoch": 1.1968370516803164, + "grad_norm": 3.1738407073378236, + "learning_rate": 1.3671378349023333e-05, + "loss": 0.5892, "step": 8476 }, { - "epoch": 1.7775214929754666, - "grad_norm": 7.288874584895362, - "learning_rate": 7.5167412863042996e-06, - "loss": 0.712, + "epoch": 1.1969782547303023, + "grad_norm": 3.14070293638681, + "learning_rate": 1.3669960311221098e-05, + "loss": 0.5858, "step": 8477 }, { - "epoch": 1.7777311805409939, - "grad_norm": 6.127213247144366, - "learning_rate": 7.514548378035227e-06, - "loss": 0.6488, + "epoch": 1.1971194577802882, + "grad_norm": 3.0034149481967467, + "learning_rate": 1.3668542188130567e-05, + "loss": 0.5801, "step": 8478 }, { - "epoch": 1.7779408681065214, - "grad_norm": 7.693121770117709, - "learning_rate": 7.512355597149857e-06, - "loss": 0.7371, + "epoch": 1.197260660830274, + "grad_norm": 7.694344647774244, + "learning_rate": 1.36671239797847e-05, + "loss": 0.6505, "step": 8479 }, { - "epoch": 1.7781505556720485, - "grad_norm": 5.857298323106373, - "learning_rate": 7.510162943760576e-06, - "loss": 0.5392, + "epoch": 1.19740186388026, + "grad_norm": 3.443314511662318, + "learning_rate": 1.3665705686216457e-05, + "loss": 0.6659, "step": 8480 }, { - "epoch": 1.778360243237576, - "grad_norm": 6.768684921200561, - "learning_rate": 7.507970417979769e-06, - "loss": 0.5919, + "epoch": 1.1975430669302458, + "grad_norm": 3.0368095601011262, + "learning_rate": 1.3664287307458794e-05, + "loss": 0.5097, "step": 8481 }, { - "epoch": 1.7785699308031033, - "grad_norm": 5.632072122303574, - "learning_rate": 7.505778019919794e-06, - "loss": 0.5949, + "epoch": 1.1976842699802317, + "grad_norm": 3.8884070149836347, + "learning_rate": 1.3662868843544676e-05, + "loss": 0.6981, "step": 8482 }, { - "epoch": 1.7787796183686306, - "grad_norm": 6.079162821020614, - "learning_rate": 7.503585749693023e-06, - "loss": 0.5917, + "epoch": 1.1978254730302174, + "grad_norm": 4.592943557892394, + "learning_rate": 1.3661450294507075e-05, + "loss": 0.7482, "step": 8483 }, { - "epoch": 1.7789893059341582, - "grad_norm": 8.336901285689663, - "learning_rate": 7.501393607411816e-06, - "loss": 0.848, + "epoch": 1.1979666760802032, + "grad_norm": 3.718094572798151, + "learning_rate": 1.3660031660378945e-05, + "loss": 0.6251, "step": 8484 }, { - "epoch": 1.7791989934996855, - "grad_norm": 6.9987330876139415, - "learning_rate": 7.499201593188517e-06, - "loss": 0.7346, + "epoch": 1.1981078791301891, + "grad_norm": 4.204987294158051, + "learning_rate": 1.365861294119326e-05, + "loss": 0.6811, "step": 8485 }, { - "epoch": 1.7794086810652128, - "grad_norm": 6.937464921772398, - "learning_rate": 7.4970097071354695e-06, - "loss": 0.6266, + "epoch": 1.198249082180175, + "grad_norm": 3.829493305470706, + "learning_rate": 1.365719413698299e-05, + "loss": 0.7043, "step": 8486 }, { - "epoch": 1.7796183686307403, - "grad_norm": 7.906609192753642, - "learning_rate": 7.494817949365023e-06, - "loss": 0.9338, + "epoch": 1.198390285230161, + "grad_norm": 3.198977842889897, + "learning_rate": 1.365577524778111e-05, + "loss": 0.618, "step": 8487 }, { - "epoch": 1.7798280561962676, - "grad_norm": 6.779320388485007, - "learning_rate": 7.4926263199894975e-06, - "loss": 0.5937, + "epoch": 1.1985314882801468, + "grad_norm": 4.745353176567218, + "learning_rate": 1.365435627362059e-05, + "loss": 0.7534, "step": 8488 }, { - "epoch": 1.780037743761795, - "grad_norm": 7.1963417171913, - "learning_rate": 7.490434819121224e-06, - "loss": 0.6775, + "epoch": 1.1986726913301327, + "grad_norm": 3.834693528378682, + "learning_rate": 1.365293721453441e-05, + "loss": 0.5713, "step": 8489 }, { - "epoch": 1.7802474313273224, - "grad_norm": 6.745052684731232, - "learning_rate": 7.4882434468725185e-06, - "loss": 0.6341, + "epoch": 1.1988138943801185, + "grad_norm": 3.7669121312559093, + "learning_rate": 1.3651518070555545e-05, + "loss": 0.6758, "step": 8490 }, { - "epoch": 1.7804571188928495, - "grad_norm": 7.547785178381326, - "learning_rate": 7.486052203355692e-06, - "loss": 0.7576, + "epoch": 1.1989550974301044, + "grad_norm": 3.0903143347236717, + "learning_rate": 1.3650098841716978e-05, + "loss": 0.5773, "step": 8491 }, { - "epoch": 1.780666806458377, - "grad_norm": 6.939044548480447, - "learning_rate": 7.483861088683047e-06, - "loss": 0.6648, + "epoch": 1.1990963004800903, + "grad_norm": 3.1038476163699555, + "learning_rate": 1.3648679528051689e-05, + "loss": 0.5124, "step": 8492 }, { - "epoch": 1.7808764940239044, - "grad_norm": 7.130755297314795, - "learning_rate": 7.4816701029668915e-06, - "loss": 0.6636, + "epoch": 1.1992375035300762, + "grad_norm": 3.2073192892049422, + "learning_rate": 1.3647260129592666e-05, + "loss": 0.5787, "step": 8493 }, { - "epoch": 1.7810861815894317, - "grad_norm": 7.173704550541052, - "learning_rate": 7.479479246319507e-06, - "loss": 0.7725, + "epoch": 1.199378706580062, + "grad_norm": 3.574112406249967, + "learning_rate": 1.364584064637289e-05, + "loss": 0.6135, "step": 8494 }, { - "epoch": 1.7812958691549592, - "grad_norm": 5.9263373906390635, - "learning_rate": 7.477288518853184e-06, - "loss": 0.6012, + "epoch": 1.199519909630048, + "grad_norm": 3.5346483096154757, + "learning_rate": 1.3644421078425354e-05, + "loss": 0.593, "step": 8495 }, { - "epoch": 1.7815055567204865, - "grad_norm": 7.284666260970255, - "learning_rate": 7.475097920680201e-06, - "loss": 0.7472, + "epoch": 1.1996611126800338, + "grad_norm": 3.3629649950192153, + "learning_rate": 1.3643001425783045e-05, + "loss": 0.5753, "step": 8496 }, { - "epoch": 1.7817152442860138, - "grad_norm": 6.493557186018949, - "learning_rate": 7.4729074519128255e-06, - "loss": 0.76, + "epoch": 1.1998023157300197, + "grad_norm": 4.230447753081659, + "learning_rate": 1.3641581688478955e-05, + "loss": 0.8089, "step": 8497 }, { - "epoch": 1.7819249318515413, - "grad_norm": 5.706050163951453, - "learning_rate": 7.470717112663328e-06, - "loss": 0.476, + "epoch": 1.1999435187800056, + "grad_norm": 3.337523468076622, + "learning_rate": 1.3640161866546083e-05, + "loss": 0.5784, "step": 8498 }, { - "epoch": 1.7821346194170684, - "grad_norm": 6.739242453364852, - "learning_rate": 7.4685269030439665e-06, - "loss": 0.7578, + "epoch": 1.2000847218299915, + "grad_norm": 3.8715684783159485, + "learning_rate": 1.3638741960017416e-05, + "loss": 0.7553, "step": 8499 }, { - "epoch": 1.782344306982596, - "grad_norm": 7.925675552898601, - "learning_rate": 7.466336823166991e-06, - "loss": 0.6092, + "epoch": 1.2002259248799774, + "grad_norm": 3.5417246787389978, + "learning_rate": 1.3637321968925964e-05, + "loss": 0.7248, "step": 8500 }, { - "epoch": 1.7825539945481232, - "grad_norm": 7.422968994624976, - "learning_rate": 7.464146873144644e-06, - "loss": 0.7752, + "epoch": 1.2003671279299633, + "grad_norm": 3.6226594348749845, + "learning_rate": 1.3635901893304714e-05, + "loss": 0.5876, "step": 8501 }, { - "epoch": 1.7827636821136505, - "grad_norm": 7.101561734647468, - "learning_rate": 7.461957053089176e-06, - "loss": 0.6913, + "epoch": 1.2005083309799491, + "grad_norm": 3.1410478976294396, + "learning_rate": 1.3634481733186675e-05, + "loss": 0.5346, "step": 8502 }, { - "epoch": 1.782973369679178, - "grad_norm": 6.913747249571515, - "learning_rate": 7.459767363112804e-06, - "loss": 0.7032, + "epoch": 1.200649534029935, + "grad_norm": 4.1035957021501694, + "learning_rate": 1.3633061488604854e-05, + "loss": 0.7281, "step": 8503 }, { - "epoch": 1.7831830572447054, - "grad_norm": 5.962879301941932, - "learning_rate": 7.457577803327762e-06, - "loss": 0.5643, + "epoch": 1.200790737079921, + "grad_norm": 3.419767278564955, + "learning_rate": 1.3631641159592253e-05, + "loss": 0.5522, "step": 8504 }, { - "epoch": 1.7833927448102327, - "grad_norm": 6.20392450259821, - "learning_rate": 7.4553883738462705e-06, - "loss": 0.609, + "epoch": 1.2009319401299068, + "grad_norm": 3.860021115183452, + "learning_rate": 1.3630220746181874e-05, + "loss": 0.5234, "step": 8505 }, { - "epoch": 1.7836024323757602, - "grad_norm": 6.727553472947862, - "learning_rate": 7.453199074780535e-06, - "loss": 0.7763, + "epoch": 1.2010731431798927, + "grad_norm": 3.479976169822163, + "learning_rate": 1.3628800248406738e-05, + "loss": 0.6797, "step": 8506 }, { - "epoch": 1.7838121199412875, - "grad_norm": 8.43277264679798, - "learning_rate": 7.4510099062427635e-06, - "loss": 0.6382, + "epoch": 1.2012143462298785, + "grad_norm": 3.203569020479435, + "learning_rate": 1.3627379666299849e-05, + "loss": 0.5456, "step": 8507 }, { - "epoch": 1.7840218075068148, - "grad_norm": 7.862389121160747, - "learning_rate": 7.4488208683451605e-06, - "loss": 0.896, + "epoch": 1.2013555492798644, + "grad_norm": 3.741860491073354, + "learning_rate": 1.3625958999894225e-05, + "loss": 0.7266, "step": 8508 }, { - "epoch": 1.7842314950723424, - "grad_norm": 8.268207906810831, - "learning_rate": 7.446631961199906e-06, - "loss": 0.8226, + "epoch": 1.2014967523298503, + "grad_norm": 3.742409880158705, + "learning_rate": 1.3624538249222879e-05, + "loss": 0.6227, "step": 8509 }, { - "epoch": 1.7844411826378694, - "grad_norm": 5.973492408226178, - "learning_rate": 7.444443184919194e-06, - "loss": 0.6757, + "epoch": 1.2016379553798362, + "grad_norm": 3.4866935218126844, + "learning_rate": 1.3623117414318827e-05, + "loss": 0.5741, "step": 8510 }, { - "epoch": 1.784650870203397, - "grad_norm": 7.019481034117828, - "learning_rate": 7.4422545396152045e-06, - "loss": 0.6544, + "epoch": 1.201779158429822, + "grad_norm": 3.173622142344666, + "learning_rate": 1.3621696495215091e-05, + "loss": 0.5393, "step": 8511 }, { - "epoch": 1.7848605577689243, - "grad_norm": 6.89496597724163, - "learning_rate": 7.440066025400104e-06, - "loss": 0.6717, + "epoch": 1.201920361479808, + "grad_norm": 3.2582502912214304, + "learning_rate": 1.3620275491944695e-05, + "loss": 0.5222, "step": 8512 }, { - "epoch": 1.7850702453344516, - "grad_norm": 6.399418888646156, - "learning_rate": 7.437877642386057e-06, - "loss": 0.5971, + "epoch": 1.2020615645297938, + "grad_norm": 3.161994567403225, + "learning_rate": 1.361885440454066e-05, + "loss": 0.5865, "step": 8513 }, { - "epoch": 1.785279932899979, - "grad_norm": 4.718987828578632, - "learning_rate": 7.435689390685229e-06, - "loss": 0.4631, + "epoch": 1.2022027675797797, + "grad_norm": 3.1872926167372424, + "learning_rate": 1.361743323303601e-05, + "loss": 0.4506, "step": 8514 }, { - "epoch": 1.7854896204655064, - "grad_norm": 7.042004414252741, - "learning_rate": 7.433501270409764e-06, - "loss": 0.8621, + "epoch": 1.2023439706297656, + "grad_norm": 3.564685075897357, + "learning_rate": 1.3616011977463776e-05, + "loss": 0.6667, "step": 8515 }, { - "epoch": 1.7856993080310337, - "grad_norm": 6.3338115064525695, - "learning_rate": 7.431313281671813e-06, - "loss": 0.7602, + "epoch": 1.2024851736797515, + "grad_norm": 3.1111746361691446, + "learning_rate": 1.3614590637856986e-05, + "loss": 0.4603, "step": 8516 }, { - "epoch": 1.7859089955965612, - "grad_norm": 7.500686755005799, - "learning_rate": 7.429125424583511e-06, - "loss": 0.7085, + "epoch": 1.2026263767297374, + "grad_norm": 3.6018899945912692, + "learning_rate": 1.3613169214248667e-05, + "loss": 0.6262, "step": 8517 }, { - "epoch": 1.7861186831620883, - "grad_norm": 7.239593402366131, - "learning_rate": 7.426937699256986e-06, - "loss": 0.666, + "epoch": 1.2027675797797233, + "grad_norm": 3.654989321140412, + "learning_rate": 1.3611747706671859e-05, + "loss": 0.621, "step": 8518 }, { - "epoch": 1.7863283707276159, - "grad_norm": 5.803147653713699, - "learning_rate": 7.424750105804371e-06, - "loss": 0.6201, + "epoch": 1.2029087828297091, + "grad_norm": 4.181405229810992, + "learning_rate": 1.3610326115159591e-05, + "loss": 0.687, "step": 8519 }, { - "epoch": 1.7865380582931434, - "grad_norm": 8.358827457366093, - "learning_rate": 7.4225626443377784e-06, - "loss": 0.8998, + "epoch": 1.203049985879695, + "grad_norm": 4.462507535101452, + "learning_rate": 1.3608904439744905e-05, + "loss": 0.7367, "step": 8520 }, { - "epoch": 1.7867477458586705, - "grad_norm": 8.465956629514688, - "learning_rate": 7.420375314969322e-06, - "loss": 0.7821, + "epoch": 1.203191188929681, + "grad_norm": 4.1142562973201455, + "learning_rate": 1.3607482680460842e-05, + "loss": 0.6151, "step": 8521 }, { - "epoch": 1.786957433424198, - "grad_norm": 7.5536516282182715, - "learning_rate": 7.418188117811102e-06, - "loss": 0.9381, + "epoch": 1.2033323919796668, + "grad_norm": 3.4267884097250536, + "learning_rate": 1.3606060837340431e-05, + "loss": 0.5668, "step": 8522 }, { - "epoch": 1.7871671209897253, - "grad_norm": 5.523300575558262, - "learning_rate": 7.416001052975221e-06, - "loss": 0.4752, + "epoch": 1.2034735950296527, + "grad_norm": 2.6628149207078007, + "learning_rate": 1.360463891041673e-05, + "loss": 0.4759, "step": 8523 }, { - "epoch": 1.7873768085552526, - "grad_norm": 7.004006369662888, - "learning_rate": 7.413814120573765e-06, - "loss": 0.7069, + "epoch": 1.2036147980796386, + "grad_norm": 3.570926320344101, + "learning_rate": 1.3603216899722775e-05, + "loss": 0.6769, "step": 8524 }, { - "epoch": 1.7875864961207801, - "grad_norm": 7.270733864256641, - "learning_rate": 7.411627320718824e-06, - "loss": 0.726, + "epoch": 1.2037560011296244, + "grad_norm": 3.940339814897452, + "learning_rate": 1.3601794805291617e-05, + "loss": 0.6856, "step": 8525 }, { - "epoch": 1.7877961836863074, - "grad_norm": 7.92000082896209, - "learning_rate": 7.4094406535224725e-06, - "loss": 0.7617, + "epoch": 1.2038972041796103, + "grad_norm": 3.42785845120409, + "learning_rate": 1.3600372627156304e-05, + "loss": 0.5286, "step": 8526 }, { - "epoch": 1.7880058712518347, - "grad_norm": 7.920088044009678, - "learning_rate": 7.407254119096778e-06, - "loss": 0.8987, + "epoch": 1.2040384072295962, + "grad_norm": 3.2527794155224403, + "learning_rate": 1.3598950365349884e-05, + "loss": 0.6319, "step": 8527 }, { - "epoch": 1.7882155588173623, - "grad_norm": 7.152442108771772, - "learning_rate": 7.4050677175538055e-06, - "loss": 0.7, + "epoch": 1.204179610279582, + "grad_norm": 3.1438447410507355, + "learning_rate": 1.3597528019905411e-05, + "loss": 0.5353, "step": 8528 }, { - "epoch": 1.7884252463828894, - "grad_norm": 6.0669959777087294, - "learning_rate": 7.40288144900562e-06, - "loss": 0.5454, + "epoch": 1.204320813329568, + "grad_norm": 3.505633342545751, + "learning_rate": 1.359610559085594e-05, + "loss": 0.6547, "step": 8529 }, { - "epoch": 1.7886349339484169, - "grad_norm": 7.482284654172083, - "learning_rate": 7.4006953135642565e-06, - "loss": 0.708, + "epoch": 1.2044620163795539, + "grad_norm": 3.209384542513002, + "learning_rate": 1.3594683078234532e-05, + "loss": 0.5399, "step": 8530 }, { - "epoch": 1.7888446215139442, - "grad_norm": 8.556294684349144, - "learning_rate": 7.39850931134177e-06, - "loss": 0.9896, + "epoch": 1.2046032194295397, + "grad_norm": 4.227870854079469, + "learning_rate": 1.3593260482074238e-05, + "loss": 0.6747, "step": 8531 }, { - "epoch": 1.7890543090794715, - "grad_norm": 5.806225283537285, - "learning_rate": 7.396323442450192e-06, - "loss": 0.6357, + "epoch": 1.2047444224795256, + "grad_norm": 3.544573190428461, + "learning_rate": 1.3591837802408123e-05, + "loss": 0.666, "step": 8532 }, { - "epoch": 1.789263996644999, - "grad_norm": 6.031312566176968, - "learning_rate": 7.394137707001558e-06, - "loss": 0.7069, + "epoch": 1.2048856255295115, + "grad_norm": 3.50923350732052, + "learning_rate": 1.3590415039269251e-05, + "loss": 0.5833, "step": 8533 }, { - "epoch": 1.7894736842105263, - "grad_norm": 6.9112563648421395, - "learning_rate": 7.3919521051078814e-06, - "loss": 0.7064, + "epoch": 1.2050268285794974, + "grad_norm": 3.185623948290316, + "learning_rate": 1.3588992192690683e-05, + "loss": 0.5503, "step": 8534 }, { - "epoch": 1.7896833717760536, - "grad_norm": 6.784390587780009, - "learning_rate": 7.389766636881184e-06, - "loss": 0.7397, + "epoch": 1.2051680316294833, + "grad_norm": 3.609065163298797, + "learning_rate": 1.3587569262705485e-05, + "loss": 0.6249, "step": 8535 }, { - "epoch": 1.7898930593415812, - "grad_norm": 6.9086898918562705, - "learning_rate": 7.387581302433476e-06, - "loss": 0.8434, + "epoch": 1.2053092346794692, + "grad_norm": 3.256133112240713, + "learning_rate": 1.3586146249346728e-05, + "loss": 0.5907, "step": 8536 }, { - "epoch": 1.7901027469071082, - "grad_norm": 7.160303378245445, - "learning_rate": 7.385396101876756e-06, - "loss": 0.8045, + "epoch": 1.205450437729455, + "grad_norm": 3.5292369065014224, + "learning_rate": 1.3584723152647479e-05, + "loss": 0.6071, "step": 8537 }, { - "epoch": 1.7903124344726358, - "grad_norm": 7.389959177808936, - "learning_rate": 7.383211035323017e-06, - "loss": 0.7965, + "epoch": 1.205591640779441, + "grad_norm": 3.6971951607065767, + "learning_rate": 1.3583299972640815e-05, + "loss": 0.675, "step": 8538 }, { - "epoch": 1.7905221220381633, - "grad_norm": 6.4625515188848945, - "learning_rate": 7.3810261028842585e-06, - "loss": 0.5236, + "epoch": 1.2057328438294268, + "grad_norm": 4.2606284498679585, + "learning_rate": 1.3581876709359804e-05, + "loss": 0.6918, "step": 8539 }, { - "epoch": 1.7907318096036904, - "grad_norm": 7.619907526106467, - "learning_rate": 7.378841304672448e-06, - "loss": 0.7905, + "epoch": 1.2058740468794127, + "grad_norm": 3.44284691839895, + "learning_rate": 1.3580453362837527e-05, + "loss": 0.6868, "step": 8540 }, { - "epoch": 1.790941497169218, - "grad_norm": 5.609252433248018, - "learning_rate": 7.376656640799569e-06, - "loss": 0.375, + "epoch": 1.2060152499293986, + "grad_norm": 3.378145807020263, + "learning_rate": 1.3579029933107059e-05, + "loss": 0.661, "step": 8541 }, { - "epoch": 1.7911511847347452, - "grad_norm": 6.638066150542521, - "learning_rate": 7.374472111377588e-06, - "loss": 0.7529, + "epoch": 1.2061564529793842, + "grad_norm": 3.367405027180481, + "learning_rate": 1.3577606420201483e-05, + "loss": 0.5004, "step": 8542 }, { - "epoch": 1.7913608723002725, - "grad_norm": 8.690569909822019, - "learning_rate": 7.372287716518465e-06, - "loss": 1.0888, + "epoch": 1.2062976560293701, + "grad_norm": 2.9351763633420647, + "learning_rate": 1.3576182824153879e-05, + "loss": 0.5275, "step": 8543 }, { - "epoch": 1.7915705598658, - "grad_norm": 7.669644012449896, - "learning_rate": 7.37010345633415e-06, - "loss": 0.5809, + "epoch": 1.206438859079356, + "grad_norm": 3.48957598898269, + "learning_rate": 1.357475914499733e-05, + "loss": 0.6836, "step": 8544 }, { - "epoch": 1.7917802474313274, - "grad_norm": 5.680955272776126, - "learning_rate": 7.367919330936599e-06, - "loss": 0.5131, + "epoch": 1.2065800621293419, + "grad_norm": 3.723050565377529, + "learning_rate": 1.3573335382764919e-05, + "loss": 0.7019, "step": 8545 }, { - "epoch": 1.7919899349968547, - "grad_norm": 7.934008735150557, - "learning_rate": 7.365735340437745e-06, - "loss": 0.7781, + "epoch": 1.2067212651793278, + "grad_norm": 3.6439295754732712, + "learning_rate": 1.3571911537489739e-05, + "loss": 0.572, "step": 8546 }, { - "epoch": 1.7921996225623822, - "grad_norm": 7.231129444804259, - "learning_rate": 7.363551484949525e-06, - "loss": 0.8326, + "epoch": 1.2068624682293136, + "grad_norm": 3.7028130266524886, + "learning_rate": 1.357048760920488e-05, + "loss": 0.6723, "step": 8547 }, { - "epoch": 1.7924093101279093, - "grad_norm": 5.933860113921983, - "learning_rate": 7.361367764583866e-06, - "loss": 0.6042, + "epoch": 1.2070036712792995, + "grad_norm": 3.7362060519518128, + "learning_rate": 1.3569063597943428e-05, + "loss": 0.5774, "step": 8548 }, { - "epoch": 1.7926189976934368, - "grad_norm": 7.787922775329241, - "learning_rate": 7.359184179452681e-06, - "loss": 0.7345, + "epoch": 1.2071448743292854, + "grad_norm": 3.583105759278942, + "learning_rate": 1.3567639503738482e-05, + "loss": 0.6938, "step": 8549 }, { - "epoch": 1.792828685258964, - "grad_norm": 6.746325515798119, - "learning_rate": 7.357000729667888e-06, - "loss": 0.6081, + "epoch": 1.2072860773792713, + "grad_norm": 4.361245070989145, + "learning_rate": 1.3566215326623131e-05, + "loss": 0.6487, "step": 8550 }, { - "epoch": 1.7930383728244914, - "grad_norm": 6.325859698100907, - "learning_rate": 7.354817415341395e-06, - "loss": 0.7407, + "epoch": 1.2074272804292572, + "grad_norm": 3.3493731405979554, + "learning_rate": 1.3564791066630478e-05, + "loss": 0.5868, "step": 8551 }, { - "epoch": 1.793248060390019, - "grad_norm": 7.8733365791769865, - "learning_rate": 7.352634236585095e-06, - "loss": 0.7032, + "epoch": 1.207568483479243, + "grad_norm": 3.3907147582174693, + "learning_rate": 1.3563366723793621e-05, + "loss": 0.5766, "step": 8552 }, { - "epoch": 1.7934577479555462, - "grad_norm": 7.993327796391577, - "learning_rate": 7.350451193510882e-06, - "loss": 0.8547, + "epoch": 1.207709686529229, + "grad_norm": 3.3263859590184275, + "learning_rate": 1.3561942298145661e-05, + "loss": 0.518, "step": 8553 }, { - "epoch": 1.7936674355210735, - "grad_norm": 4.796742340359671, - "learning_rate": 7.348268286230642e-06, - "loss": 0.4277, + "epoch": 1.2078508895792148, + "grad_norm": 3.6789863438351924, + "learning_rate": 1.3560517789719696e-05, + "loss": 0.681, "step": 8554 }, { - "epoch": 1.793877123086601, - "grad_norm": 6.6736381070831285, - "learning_rate": 7.346085514856248e-06, - "loss": 0.9108, + "epoch": 1.2079920926292007, + "grad_norm": 3.5508380351202833, + "learning_rate": 1.355909319854884e-05, + "loss": 0.6455, "step": 8555 }, { - "epoch": 1.7940868106521284, - "grad_norm": 5.696958726823761, - "learning_rate": 7.343902879499576e-06, - "loss": 0.5378, + "epoch": 1.2081332956791866, + "grad_norm": 3.310680608142239, + "learning_rate": 1.355766852466619e-05, + "loss": 0.5233, "step": 8556 }, { - "epoch": 1.7942964982176557, - "grad_norm": 7.895400209230349, - "learning_rate": 7.3417203802724915e-06, - "loss": 0.8163, + "epoch": 1.2082744987291725, + "grad_norm": 4.113737558100836, + "learning_rate": 1.3556243768104864e-05, + "loss": 0.6577, "step": 8557 }, { - "epoch": 1.7945061857831832, - "grad_norm": 5.722944233519205, - "learning_rate": 7.339538017286844e-06, - "loss": 0.4031, + "epoch": 1.2084157017791584, + "grad_norm": 4.4560172983662545, + "learning_rate": 1.3554818928897965e-05, + "loss": 0.7653, "step": 8558 }, { - "epoch": 1.7947158733487103, - "grad_norm": 6.369467306371739, - "learning_rate": 7.337355790654488e-06, - "loss": 0.6541, + "epoch": 1.2085569048291442, + "grad_norm": 3.9647366298478084, + "learning_rate": 1.3553394007078615e-05, + "loss": 0.7097, "step": 8559 }, { - "epoch": 1.7949255609142378, - "grad_norm": 7.154725611267172, - "learning_rate": 7.3351737004872705e-06, - "loss": 0.7313, + "epoch": 1.2086981078791301, + "grad_norm": 3.1010730419016346, + "learning_rate": 1.355196900267992e-05, + "loss": 0.6087, "step": 8560 }, { - "epoch": 1.7951352484797651, - "grad_norm": 5.957504054520665, - "learning_rate": 7.332991746897016e-06, - "loss": 0.4537, + "epoch": 1.208839310929116, + "grad_norm": 3.557315267689678, + "learning_rate": 1.3550543915735e-05, + "loss": 0.5643, "step": 8561 }, { - "epoch": 1.7953449360452924, - "grad_norm": 8.967212137890364, - "learning_rate": 7.330809929995564e-06, - "loss": 1.1113, + "epoch": 1.208980513979102, + "grad_norm": 3.3813415193793754, + "learning_rate": 1.3549118746276968e-05, + "loss": 0.619, "step": 8562 }, { - "epoch": 1.79555462361082, - "grad_norm": 6.882344923905063, - "learning_rate": 7.3286282498947335e-06, - "loss": 0.8033, + "epoch": 1.2091217170290878, + "grad_norm": 3.2575783157116662, + "learning_rate": 1.3547693494338953e-05, + "loss": 0.5183, "step": 8563 }, { - "epoch": 1.7957643111763473, - "grad_norm": 8.147665361420723, - "learning_rate": 7.326446706706336e-06, - "loss": 0.6587, + "epoch": 1.2092629200790737, + "grad_norm": 3.4551415663666987, + "learning_rate": 1.3546268159954075e-05, + "loss": 0.5212, "step": 8564 }, { - "epoch": 1.7959739987418746, - "grad_norm": 6.515093185369705, - "learning_rate": 7.32426530054218e-06, - "loss": 0.6989, + "epoch": 1.2094041231290595, + "grad_norm": 3.7427195230464085, + "learning_rate": 1.3544842743155453e-05, + "loss": 0.6575, "step": 8565 }, { - "epoch": 1.796183686307402, - "grad_norm": 6.400558489849805, - "learning_rate": 7.322084031514073e-06, - "loss": 0.6788, + "epoch": 1.2095453261790454, + "grad_norm": 3.537673644437599, + "learning_rate": 1.3543417243976217e-05, + "loss": 0.5876, "step": 8566 }, { - "epoch": 1.7963933738729292, - "grad_norm": 7.266156517860445, - "learning_rate": 7.319902899733802e-06, - "loss": 0.715, + "epoch": 1.2096865292290313, + "grad_norm": 3.784301756949045, + "learning_rate": 1.3541991662449498e-05, + "loss": 0.5814, "step": 8567 }, { - "epoch": 1.7966030614384567, - "grad_norm": 7.804231603393305, - "learning_rate": 7.317721905313155e-06, - "loss": 0.8506, + "epoch": 1.2098277322790172, + "grad_norm": 4.271722913573719, + "learning_rate": 1.3540565998608419e-05, + "loss": 0.6513, "step": 8568 }, { - "epoch": 1.796812749003984, - "grad_norm": 7.07112405608429, - "learning_rate": 7.3155410483639145e-06, - "loss": 0.6164, + "epoch": 1.209968935329003, + "grad_norm": 3.675029375034489, + "learning_rate": 1.3539140252486119e-05, + "loss": 0.71, "step": 8569 }, { - "epoch": 1.7970224365695113, - "grad_norm": 7.749679480450499, - "learning_rate": 7.313360328997849e-06, - "loss": 0.7337, + "epoch": 1.210110138378989, + "grad_norm": 3.530925765704981, + "learning_rate": 1.3537714424115725e-05, + "loss": 0.7401, "step": 8570 }, { - "epoch": 1.7972321241350389, - "grad_norm": 7.732527425478706, - "learning_rate": 7.311179747326725e-06, - "loss": 0.6781, + "epoch": 1.2102513414289748, + "grad_norm": 3.1849349102572746, + "learning_rate": 1.3536288513530374e-05, + "loss": 0.5438, "step": 8571 }, { - "epoch": 1.7974418117005662, - "grad_norm": 9.026050454145706, - "learning_rate": 7.3089993034623076e-06, - "loss": 1.0107, + "epoch": 1.2103925444789607, + "grad_norm": 3.434011398998355, + "learning_rate": 1.353486252076321e-05, + "loss": 0.5211, "step": 8572 }, { - "epoch": 1.7976514992660935, - "grad_norm": 7.459617566269735, - "learning_rate": 7.306818997516338e-06, - "loss": 0.8722, + "epoch": 1.2105337475289466, + "grad_norm": 3.989834468236496, + "learning_rate": 1.353343644584736e-05, + "loss": 0.6783, "step": 8573 }, { - "epoch": 1.797861186831621, - "grad_norm": 7.744838682897569, - "learning_rate": 7.304638829600569e-06, - "loss": 0.774, + "epoch": 1.2106749505789325, + "grad_norm": 3.61727967478047, + "learning_rate": 1.353201028881598e-05, + "loss": 0.6017, "step": 8574 }, { - "epoch": 1.7980708743971483, - "grad_norm": 10.090243215329245, - "learning_rate": 7.302458799826735e-06, - "loss": 0.9973, + "epoch": 1.2108161536289184, + "grad_norm": 3.5959256461846962, + "learning_rate": 1.3530584049702204e-05, + "loss": 0.5322, "step": 8575 }, { - "epoch": 1.7982805619626756, - "grad_norm": 6.0432982918975675, - "learning_rate": 7.300278908306564e-06, - "loss": 0.6413, + "epoch": 1.2109573566789043, + "grad_norm": 3.037092612937834, + "learning_rate": 1.3529157728539179e-05, + "loss": 0.4418, "step": 8576 }, { - "epoch": 1.7984902495282031, - "grad_norm": 7.019722322045745, - "learning_rate": 7.298099155151781e-06, - "loss": 0.7167, + "epoch": 1.2110985597288901, + "grad_norm": 3.0716390423925186, + "learning_rate": 1.3527731325360053e-05, + "loss": 0.5976, "step": 8577 }, { - "epoch": 1.7986999370937302, - "grad_norm": 5.509042752792497, - "learning_rate": 7.2959195404741056e-06, - "loss": 0.5491, + "epoch": 1.211239762778876, + "grad_norm": 3.8894253575639803, + "learning_rate": 1.3526304840197976e-05, + "loss": 0.5864, "step": 8578 }, { - "epoch": 1.7989096246592577, - "grad_norm": 5.629109441172555, - "learning_rate": 7.293740064385242e-06, - "loss": 0.6278, + "epoch": 1.211380965828862, + "grad_norm": 3.553511383788996, + "learning_rate": 1.3524878273086093e-05, + "loss": 0.6257, "step": 8579 }, { - "epoch": 1.799119312224785, - "grad_norm": 7.553742932804851, - "learning_rate": 7.291560726996892e-06, - "loss": 0.8958, + "epoch": 1.2115221688788478, + "grad_norm": 3.7684484684640167, + "learning_rate": 1.3523451624057566e-05, + "loss": 0.6431, "step": 8580 }, { - "epoch": 1.7993289997903124, - "grad_norm": 5.150973779562384, - "learning_rate": 7.2893815284207585e-06, - "loss": 0.5672, + "epoch": 1.2116633719288337, + "grad_norm": 3.4618412854900633, + "learning_rate": 1.3522024893145544e-05, + "loss": 0.5607, "step": 8581 }, { - "epoch": 1.7995386873558399, - "grad_norm": 7.259753547167847, - "learning_rate": 7.287202468768516e-06, - "loss": 0.6715, + "epoch": 1.2118045749788195, + "grad_norm": 3.8461643334558366, + "learning_rate": 1.3520598080383183e-05, + "loss": 0.629, "step": 8582 }, { - "epoch": 1.7997483749213672, - "grad_norm": 8.96453074114518, - "learning_rate": 7.285023548151855e-06, - "loss": 0.9256, + "epoch": 1.2119457780288054, + "grad_norm": 4.128154580157784, + "learning_rate": 1.3519171185803641e-05, + "loss": 0.6334, "step": 8583 }, { - "epoch": 1.7999580624868945, - "grad_norm": 6.542165717479757, - "learning_rate": 7.282844766682448e-06, - "loss": 0.8384, + "epoch": 1.2120869810787913, + "grad_norm": 3.4813518183171093, + "learning_rate": 1.3517744209440085e-05, + "loss": 0.6604, "step": 8584 }, { - "epoch": 1.800167750052422, - "grad_norm": 6.186439156804989, - "learning_rate": 7.2806661244719565e-06, - "loss": 0.6021, + "epoch": 1.2122281841287772, + "grad_norm": 3.3321193262460027, + "learning_rate": 1.3516317151325671e-05, + "loss": 0.5559, "step": 8585 }, { - "epoch": 1.800377437617949, - "grad_norm": 6.095291406134879, - "learning_rate": 7.278487621632041e-06, - "loss": 0.837, + "epoch": 1.212369387178763, + "grad_norm": 2.6223781781424487, + "learning_rate": 1.3514890011493564e-05, + "loss": 0.4308, "step": 8586 }, { - "epoch": 1.8005871251834766, - "grad_norm": 6.684685476075956, - "learning_rate": 7.276309258274361e-06, - "loss": 0.6958, + "epoch": 1.212510590228749, + "grad_norm": 3.760956847628895, + "learning_rate": 1.3513462789976933e-05, + "loss": 0.6591, "step": 8587 }, { - "epoch": 1.800796812749004, - "grad_norm": 8.153883895466917, - "learning_rate": 7.27413103451055e-06, - "loss": 0.6804, + "epoch": 1.2126517932787348, + "grad_norm": 3.9659461129689024, + "learning_rate": 1.3512035486808942e-05, + "loss": 0.6586, "step": 8588 }, { - "epoch": 1.8010065003145312, - "grad_norm": 5.842255463706033, - "learning_rate": 7.271952950452253e-06, - "loss": 0.714, + "epoch": 1.2127929963287207, + "grad_norm": 4.2370735273003595, + "learning_rate": 1.3510608102022765e-05, + "loss": 0.6172, "step": 8589 }, { - "epoch": 1.8012161878800588, - "grad_norm": 6.158669257345333, - "learning_rate": 7.269775006211101e-06, - "loss": 0.6882, + "epoch": 1.2129341993787066, + "grad_norm": 3.744984364391922, + "learning_rate": 1.350918063565157e-05, + "loss": 0.6758, "step": 8590 }, { - "epoch": 1.801425875445586, - "grad_norm": 5.356304311678659, - "learning_rate": 7.267597201898713e-06, - "loss": 0.5209, + "epoch": 1.2130754024286925, + "grad_norm": 3.8578046226243323, + "learning_rate": 1.3507753087728532e-05, + "loss": 0.6326, "step": 8591 }, { - "epoch": 1.8016355630111134, - "grad_norm": 6.198955110233281, - "learning_rate": 7.265419537626705e-06, - "loss": 0.561, + "epoch": 1.2132166054786784, + "grad_norm": 3.5473662443160277, + "learning_rate": 1.3506325458286829e-05, + "loss": 0.6453, "step": 8592 }, { - "epoch": 1.801845250576641, - "grad_norm": 6.3374765446001255, - "learning_rate": 7.263242013506692e-06, - "loss": 0.6499, + "epoch": 1.2133578085286643, + "grad_norm": 3.2785099255278003, + "learning_rate": 1.3504897747359636e-05, + "loss": 0.5544, "step": 8593 }, { - "epoch": 1.8020549381421682, - "grad_norm": 7.687225966880493, - "learning_rate": 7.261064629650274e-06, - "loss": 0.8481, + "epoch": 1.2134990115786501, + "grad_norm": 3.517138990766305, + "learning_rate": 1.3503469954980133e-05, + "loss": 0.5606, "step": 8594 }, { - "epoch": 1.8022646257076955, - "grad_norm": 7.761273975616362, - "learning_rate": 7.258887386169041e-06, - "loss": 0.7833, + "epoch": 1.213640214628636, + "grad_norm": 3.0758348976654384, + "learning_rate": 1.3502042081181502e-05, + "loss": 0.5131, "step": 8595 }, { - "epoch": 1.802474313273223, - "grad_norm": 8.124199805722295, - "learning_rate": 7.2567102831745824e-06, - "loss": 0.83, + "epoch": 1.213781417678622, + "grad_norm": 3.444880122013575, + "learning_rate": 1.3500614125996924e-05, + "loss": 0.5971, "step": 8596 }, { - "epoch": 1.8026840008387501, - "grad_norm": 6.028072353623083, - "learning_rate": 7.2545333207784854e-06, - "loss": 0.6831, + "epoch": 1.2139226207286078, + "grad_norm": 4.455252170548031, + "learning_rate": 1.3499186089459587e-05, + "loss": 0.8704, "step": 8597 }, { - "epoch": 1.8028936884042777, - "grad_norm": 7.268137658346249, - "learning_rate": 7.252356499092312e-06, - "loss": 0.5859, + "epoch": 1.2140638237785937, + "grad_norm": 3.4452404700899013, + "learning_rate": 1.3497757971602677e-05, + "loss": 0.5541, "step": 8598 }, { - "epoch": 1.803103375969805, - "grad_norm": 8.369194385574005, - "learning_rate": 7.250179818227636e-06, - "loss": 0.8506, + "epoch": 1.2142050268285796, + "grad_norm": 3.7477138541272654, + "learning_rate": 1.3496329772459378e-05, + "loss": 0.676, "step": 8599 }, { - "epoch": 1.8033130635353323, - "grad_norm": 6.2456208268317415, - "learning_rate": 7.248003278296016e-06, - "loss": 0.7053, + "epoch": 1.2143462298785654, + "grad_norm": 3.6574001072858753, + "learning_rate": 1.3494901492062889e-05, + "loss": 0.6385, "step": 8600 }, { - "epoch": 1.8035227511008598, - "grad_norm": 5.850378126886383, - "learning_rate": 7.245826879408998e-06, - "loss": 0.5834, + "epoch": 1.2144874329285513, + "grad_norm": 3.1330699365348207, + "learning_rate": 1.3493473130446398e-05, + "loss": 0.5664, "step": 8601 }, { - "epoch": 1.803732438666387, - "grad_norm": 5.33436037814003, - "learning_rate": 7.243650621678128e-06, - "loss": 0.5472, + "epoch": 1.2146286359785372, + "grad_norm": 3.227589933804556, + "learning_rate": 1.34920446876431e-05, + "loss": 0.6729, "step": 8602 }, { - "epoch": 1.8039421262319144, - "grad_norm": 7.252917637319995, - "learning_rate": 7.2414745052149475e-06, - "loss": 0.6878, + "epoch": 1.214769839028523, + "grad_norm": 3.86666391893981, + "learning_rate": 1.3490616163686193e-05, + "loss": 0.7414, "step": 8603 }, { - "epoch": 1.804151813797442, - "grad_norm": 6.694583813107809, - "learning_rate": 7.239298530130982e-06, - "loss": 0.7088, + "epoch": 1.214911042078509, + "grad_norm": 4.239813656644816, + "learning_rate": 1.3489187558608871e-05, + "loss": 0.675, "step": 8604 }, { - "epoch": 1.804361501362969, - "grad_norm": 6.728995746208721, - "learning_rate": 7.237122696537754e-06, - "loss": 0.5775, + "epoch": 1.2150522451284949, + "grad_norm": 4.340006516198574, + "learning_rate": 1.3487758872444337e-05, + "loss": 0.7567, "step": 8605 }, { - "epoch": 1.8045711889284966, - "grad_norm": 5.606599004951529, - "learning_rate": 7.234947004546782e-06, - "loss": 0.526, + "epoch": 1.2151934481784807, + "grad_norm": 3.5017297322271412, + "learning_rate": 1.3486330105225797e-05, + "loss": 0.5903, "step": 8606 }, { - "epoch": 1.8047808764940239, - "grad_norm": 6.641404477821367, - "learning_rate": 7.232771454269567e-06, - "loss": 0.7342, + "epoch": 1.2153346512284666, + "grad_norm": 3.3594113946610635, + "learning_rate": 1.3484901256986448e-05, + "loss": 0.4827, "step": 8607 }, { - "epoch": 1.8049905640595512, - "grad_norm": 6.442118483887747, - "learning_rate": 7.230596045817619e-06, - "loss": 0.5325, + "epoch": 1.2154758542784525, + "grad_norm": 3.170211099506976, + "learning_rate": 1.3483472327759496e-05, + "loss": 0.5725, "step": 8608 }, { - "epoch": 1.8052002516250787, - "grad_norm": 7.141080158515472, - "learning_rate": 7.228420779302427e-06, - "loss": 0.7336, + "epoch": 1.2156170573284384, + "grad_norm": 2.6579878657503464, + "learning_rate": 1.3482043317578154e-05, + "loss": 0.477, "step": 8609 }, { - "epoch": 1.805409939190606, - "grad_norm": 7.729646547936568, - "learning_rate": 7.226245654835475e-06, - "loss": 0.7979, + "epoch": 1.2157582603784243, + "grad_norm": 3.838288270545291, + "learning_rate": 1.3480614226475632e-05, + "loss": 0.6255, "step": 8610 }, { - "epoch": 1.8056196267561333, - "grad_norm": 9.018149517894344, - "learning_rate": 7.224070672528243e-06, - "loss": 1.1419, + "epoch": 1.2158994634284102, + "grad_norm": 3.4177385721360376, + "learning_rate": 1.3479185054485137e-05, + "loss": 0.6034, "step": 8611 }, { - "epoch": 1.8058293143216608, - "grad_norm": 6.684690663582744, - "learning_rate": 7.2218958324922095e-06, - "loss": 0.7942, + "epoch": 1.216040666478396, + "grad_norm": 3.183650433639726, + "learning_rate": 1.3477755801639883e-05, + "loss": 0.57, "step": 8612 }, { - "epoch": 1.8060390018871881, - "grad_norm": 6.892969088223079, - "learning_rate": 7.219721134838826e-06, - "loss": 0.7447, + "epoch": 1.216181869528382, + "grad_norm": 3.428732221972703, + "learning_rate": 1.3476326467973087e-05, + "loss": 0.6101, "step": 8613 }, { - "epoch": 1.8062486894527154, - "grad_norm": 9.89638321122598, - "learning_rate": 7.21754657967956e-06, - "loss": 0.884, + "epoch": 1.2163230725783678, + "grad_norm": 3.2315133997340793, + "learning_rate": 1.3474897053517963e-05, + "loss": 0.6142, "step": 8614 }, { - "epoch": 1.806458377018243, - "grad_norm": 7.211392984439015, - "learning_rate": 7.215372167125858e-06, - "loss": 0.7147, + "epoch": 1.2164642756283537, + "grad_norm": 3.2781712905940084, + "learning_rate": 1.3473467558307736e-05, + "loss": 0.608, "step": 8615 }, { - "epoch": 1.80666806458377, - "grad_norm": 5.659392056170881, - "learning_rate": 7.213197897289161e-06, - "loss": 0.5751, + "epoch": 1.2166054786783396, + "grad_norm": 3.241053531102113, + "learning_rate": 1.347203798237562e-05, + "loss": 0.5365, "step": 8616 }, { - "epoch": 1.8068777521492976, - "grad_norm": 6.36876674444938, - "learning_rate": 7.211023770280903e-06, - "loss": 0.6965, + "epoch": 1.2167466817283255, + "grad_norm": 3.3052234346771403, + "learning_rate": 1.347060832575484e-05, + "loss": 0.5503, "step": 8617 }, { - "epoch": 1.8070874397148249, - "grad_norm": 8.624524251542926, - "learning_rate": 7.208849786212519e-06, - "loss": 0.9057, + "epoch": 1.2168878847783113, + "grad_norm": 3.5386127987796647, + "learning_rate": 1.3469178588478621e-05, + "loss": 0.6891, "step": 8618 }, { - "epoch": 1.8072971272803522, - "grad_norm": 6.646663165931561, - "learning_rate": 7.20667594519542e-06, - "loss": 0.6309, + "epoch": 1.217029087828297, + "grad_norm": 3.5660582576540243, + "learning_rate": 1.3467748770580193e-05, + "loss": 0.6116, "step": 8619 }, { - "epoch": 1.8075068148458797, - "grad_norm": 6.0118187343916185, - "learning_rate": 7.204502247341025e-06, - "loss": 0.5764, + "epoch": 1.2171702908782829, + "grad_norm": 3.0463352710576705, + "learning_rate": 1.346631887209278e-05, + "loss": 0.4931, "step": 8620 }, { - "epoch": 1.807716502411407, - "grad_norm": 5.407080188304323, - "learning_rate": 7.202328692760741e-06, - "loss": 0.47, + "epoch": 1.2173114939282688, + "grad_norm": 3.3812911964143435, + "learning_rate": 1.3464888893049612e-05, + "loss": 0.6282, "step": 8621 }, { - "epoch": 1.8079261899769343, - "grad_norm": 6.98178398633119, - "learning_rate": 7.200155281565961e-06, - "loss": 0.8821, + "epoch": 1.2174526969782546, + "grad_norm": 3.674696147908224, + "learning_rate": 1.3463458833483923e-05, + "loss": 0.5547, "step": 8622 }, { - "epoch": 1.8081358775424619, - "grad_norm": 8.601075894487982, - "learning_rate": 7.1979820138680786e-06, - "loss": 1.015, + "epoch": 1.2175939000282405, + "grad_norm": 3.957241833318662, + "learning_rate": 1.3462028693428951e-05, + "loss": 0.7094, "step": 8623 }, { - "epoch": 1.808345565107989, - "grad_norm": 7.087302266044096, - "learning_rate": 7.195808889778479e-06, - "loss": 0.7439, + "epoch": 1.2177351030782264, + "grad_norm": 3.2563181493888242, + "learning_rate": 1.3460598472917923e-05, + "loss": 0.6156, "step": 8624 }, { - "epoch": 1.8085552526735165, - "grad_norm": 6.338547635960465, - "learning_rate": 7.193635909408538e-06, - "loss": 0.6436, + "epoch": 1.2178763061282123, + "grad_norm": 3.4703480223227197, + "learning_rate": 1.3459168171984079e-05, + "loss": 0.5381, "step": 8625 }, { - "epoch": 1.8087649402390438, - "grad_norm": 5.580416864208404, - "learning_rate": 7.191463072869624e-06, - "loss": 0.5888, + "epoch": 1.2180175091781982, + "grad_norm": 3.891617948874913, + "learning_rate": 1.3457737790660663e-05, + "loss": 0.5109, "step": 8626 }, { - "epoch": 1.808974627804571, - "grad_norm": 7.260647629594801, - "learning_rate": 7.1892903802731e-06, - "loss": 0.7591, + "epoch": 1.218158712228184, + "grad_norm": 3.009528709851622, + "learning_rate": 1.3456307328980915e-05, + "loss": 0.4959, "step": 8627 }, { - "epoch": 1.8091843153700986, - "grad_norm": 7.203299896522841, - "learning_rate": 7.187117831730314e-06, - "loss": 0.8843, + "epoch": 1.21829991527817, + "grad_norm": 3.5403681733648624, + "learning_rate": 1.3454876786978076e-05, + "loss": 0.5796, "step": 8628 }, { - "epoch": 1.809394002935626, - "grad_norm": 9.300361403530278, - "learning_rate": 7.18494542735262e-06, - "loss": 0.9758, + "epoch": 1.2184411183281558, + "grad_norm": 3.3127842765928857, + "learning_rate": 1.3453446164685394e-05, + "loss": 0.5861, "step": 8629 }, { - "epoch": 1.8096036905011532, - "grad_norm": 5.66501252921601, - "learning_rate": 7.182773167251358e-06, - "loss": 0.6254, + "epoch": 1.2185823213781417, + "grad_norm": 3.4940941279201168, + "learning_rate": 1.3452015462136113e-05, + "loss": 0.5456, "step": 8630 }, { - "epoch": 1.8098133780666807, - "grad_norm": 6.593871328516726, - "learning_rate": 7.180601051537854e-06, - "loss": 0.5968, + "epoch": 1.2187235244281276, + "grad_norm": 4.136366741650121, + "learning_rate": 1.3450584679363483e-05, + "loss": 0.569, "step": 8631 }, { - "epoch": 1.810023065632208, - "grad_norm": 6.824785349043687, - "learning_rate": 7.178429080323438e-06, - "loss": 0.5883, + "epoch": 1.2188647274781135, + "grad_norm": 4.069479425265647, + "learning_rate": 1.3449153816400758e-05, + "loss": 0.6491, "step": 8632 }, { - "epoch": 1.8102327531977354, - "grad_norm": 5.438131640321373, - "learning_rate": 7.1762572537194275e-06, - "loss": 0.5901, + "epoch": 1.2190059305280994, + "grad_norm": 3.3728440082937694, + "learning_rate": 1.3447722873281186e-05, + "loss": 0.56, "step": 8633 }, { - "epoch": 1.8104424407632629, - "grad_norm": 7.6265716733312665, - "learning_rate": 7.1740855718371234e-06, - "loss": 0.8291, + "epoch": 1.2191471335780852, + "grad_norm": 2.875231976442875, + "learning_rate": 1.3446291850038021e-05, + "loss": 0.4815, "step": 8634 }, { - "epoch": 1.81065212832879, - "grad_norm": 4.538021245207589, - "learning_rate": 7.17191403478784e-06, - "loss": 0.31, + "epoch": 1.2192883366280711, + "grad_norm": 3.900931577749558, + "learning_rate": 1.3444860746704525e-05, + "loss": 0.5908, "step": 8635 }, { - "epoch": 1.8108618158943175, - "grad_norm": 7.231341794372251, - "learning_rate": 7.169742642682868e-06, - "loss": 0.7058, + "epoch": 1.219429539678057, + "grad_norm": 3.834368671341001, + "learning_rate": 1.3443429563313952e-05, + "loss": 0.6266, "step": 8636 }, { - "epoch": 1.8110715034598448, - "grad_norm": 7.303722398924656, - "learning_rate": 7.167571395633494e-06, - "loss": 0.5058, + "epoch": 1.219570742728043, + "grad_norm": 3.783421321294592, + "learning_rate": 1.3441998299899564e-05, + "loss": 0.6338, "step": 8637 }, { - "epoch": 1.811281191025372, - "grad_norm": 5.750226412531893, - "learning_rate": 7.165400293750995e-06, - "loss": 0.6363, + "epoch": 1.2197119457780288, + "grad_norm": 4.438207149848002, + "learning_rate": 1.344056695649462e-05, + "loss": 0.5973, "step": 8638 }, { - "epoch": 1.8114908785908996, - "grad_norm": 9.850912588086373, - "learning_rate": 7.163229337146653e-06, - "loss": 1.0093, + "epoch": 1.2198531488280147, + "grad_norm": 3.761510830021975, + "learning_rate": 1.3439135533132389e-05, + "loss": 0.7268, "step": 8639 }, { - "epoch": 1.811700566156427, - "grad_norm": 6.690438615366756, - "learning_rate": 7.161058525931724e-06, - "loss": 0.6675, + "epoch": 1.2199943518780005, + "grad_norm": 3.2762204672586477, + "learning_rate": 1.343770402984613e-05, + "loss": 0.6784, "step": 8640 }, { - "epoch": 1.8119102537219542, - "grad_norm": 6.394408418976392, - "learning_rate": 7.1588878602174696e-06, - "loss": 0.7519, + "epoch": 1.2201355549279864, + "grad_norm": 3.3557494198415863, + "learning_rate": 1.3436272446669117e-05, + "loss": 0.68, "step": 8641 }, { - "epoch": 1.8121199412874818, - "grad_norm": 5.242323509816903, - "learning_rate": 7.1567173401151425e-06, - "loss": 0.4907, + "epoch": 1.2202767579779723, + "grad_norm": 3.2897323047668423, + "learning_rate": 1.3434840783634611e-05, + "loss": 0.5514, "step": 8642 }, { - "epoch": 1.8123296288530089, - "grad_norm": 8.752254718827217, - "learning_rate": 7.15454696573598e-06, - "loss": 0.7236, + "epoch": 1.2204179610279582, + "grad_norm": 3.4295679602253593, + "learning_rate": 1.3433409040775894e-05, + "loss": 0.5515, "step": 8643 }, { - "epoch": 1.8125393164185364, - "grad_norm": 8.620133056919888, - "learning_rate": 7.15237673719122e-06, - "loss": 1.0653, + "epoch": 1.220559164077944, + "grad_norm": 3.315950819195173, + "learning_rate": 1.3431977218126234e-05, + "loss": 0.5317, "step": 8644 }, { - "epoch": 1.812749003984064, - "grad_norm": 8.172353012991103, - "learning_rate": 7.150206654592094e-06, - "loss": 0.9265, + "epoch": 1.22070036712793, + "grad_norm": 3.6681723092155263, + "learning_rate": 1.3430545315718901e-05, + "loss": 0.7062, "step": 8645 }, { - "epoch": 1.812958691549591, - "grad_norm": 6.468407315325875, - "learning_rate": 7.148036718049819e-06, - "loss": 0.6829, + "epoch": 1.2208415701779158, + "grad_norm": 4.183329008022825, + "learning_rate": 1.3429113333587181e-05, + "loss": 0.7053, "step": 8646 }, { - "epoch": 1.8131683791151185, - "grad_norm": 5.919430984380698, - "learning_rate": 7.1458669276756075e-06, - "loss": 0.745, + "epoch": 1.2209827732279017, + "grad_norm": 4.496857321741503, + "learning_rate": 1.3427681271764343e-05, + "loss": 0.7416, "step": 8647 }, { - "epoch": 1.8133780666806458, - "grad_norm": 5.721880332891165, - "learning_rate": 7.1436972835806685e-06, - "loss": 0.5213, + "epoch": 1.2211239762778876, + "grad_norm": 3.9468481737996073, + "learning_rate": 1.3426249130283677e-05, + "loss": 0.6425, "step": 8648 }, { - "epoch": 1.8135877542461731, - "grad_norm": 8.489978423340053, - "learning_rate": 7.141527785876197e-06, - "loss": 0.8967, + "epoch": 1.2212651793278735, + "grad_norm": 4.115070319931377, + "learning_rate": 1.342481690917846e-05, + "loss": 0.7116, "step": 8649 }, { - "epoch": 1.8137974418117007, - "grad_norm": 6.625590841525813, - "learning_rate": 7.139358434673381e-06, - "loss": 0.7818, + "epoch": 1.2214063823778594, + "grad_norm": 3.3309160562230207, + "learning_rate": 1.3423384608481978e-05, + "loss": 0.5448, "step": 8650 }, { - "epoch": 1.814007129377228, - "grad_norm": 6.432927693583469, - "learning_rate": 7.13718923008341e-06, - "loss": 0.7661, + "epoch": 1.2215475854278453, + "grad_norm": 3.0595835494512036, + "learning_rate": 1.3421952228227513e-05, + "loss": 0.541, "step": 8651 }, { - "epoch": 1.8142168169427553, - "grad_norm": 9.05009793219841, - "learning_rate": 7.1350201722174564e-06, - "loss": 1.0373, + "epoch": 1.2216887884778311, + "grad_norm": 3.481262881395974, + "learning_rate": 1.342051976844836e-05, + "loss": 0.6375, "step": 8652 }, { - "epoch": 1.8144265045082828, - "grad_norm": 6.20000010135193, - "learning_rate": 7.132851261186686e-06, - "loss": 0.4625, + "epoch": 1.221829991527817, + "grad_norm": 3.104621306616388, + "learning_rate": 1.34190872291778e-05, + "loss": 0.49, "step": 8653 }, { - "epoch": 1.8146361920738099, - "grad_norm": 9.989354991151128, - "learning_rate": 7.13068249710226e-06, - "loss": 0.9588, + "epoch": 1.221971194577803, + "grad_norm": 3.9152543364027554, + "learning_rate": 1.3417654610449131e-05, + "loss": 0.7503, "step": 8654 }, { - "epoch": 1.8148458796393374, - "grad_norm": 6.881295863547874, - "learning_rate": 7.128513880075338e-06, - "loss": 0.7785, + "epoch": 1.2221123976277888, + "grad_norm": 6.502413455851264, + "learning_rate": 1.3416221912295646e-05, + "loss": 0.629, "step": 8655 }, { - "epoch": 1.8150555672048647, - "grad_norm": 7.630944376028559, - "learning_rate": 7.126345410217056e-06, - "loss": 0.7834, + "epoch": 1.2222536006777747, + "grad_norm": 2.8990964619407915, + "learning_rate": 1.341478913475064e-05, + "loss": 0.4511, "step": 8656 }, { - "epoch": 1.815265254770392, - "grad_norm": 6.386635629013877, - "learning_rate": 7.124177087638556e-06, - "loss": 0.7218, + "epoch": 1.2223948037277605, + "grad_norm": 3.6167372976067598, + "learning_rate": 1.3413356277847407e-05, + "loss": 0.6126, "step": 8657 }, { - "epoch": 1.8154749423359196, - "grad_norm": 7.718779493624984, - "learning_rate": 7.12200891245097e-06, - "loss": 0.7711, + "epoch": 1.2225360067777464, + "grad_norm": 4.033882756022542, + "learning_rate": 1.341192334161925e-05, + "loss": 0.6139, "step": 8658 }, { - "epoch": 1.8156846299014469, - "grad_norm": 8.103924425147584, - "learning_rate": 7.119840884765417e-06, - "loss": 0.8802, + "epoch": 1.2226772098277323, + "grad_norm": 3.2063581644267227, + "learning_rate": 1.3410490326099466e-05, + "loss": 0.614, "step": 8659 }, { - "epoch": 1.8158943174669742, - "grad_norm": 7.0877105041431925, - "learning_rate": 7.117673004693014e-06, - "loss": 0.5965, + "epoch": 1.2228184128777182, + "grad_norm": 3.8950727745007354, + "learning_rate": 1.3409057231321363e-05, + "loss": 0.7888, "step": 8660 }, { - "epoch": 1.8161040050325017, - "grad_norm": 9.442081468503812, - "learning_rate": 7.1155052723448715e-06, - "loss": 0.9724, + "epoch": 1.222959615927704, + "grad_norm": 3.9035403668366033, + "learning_rate": 1.3407624057318241e-05, + "loss": 0.6694, "step": 8661 }, { - "epoch": 1.8163136925980288, - "grad_norm": 6.303786693800357, - "learning_rate": 7.113337687832085e-06, - "loss": 0.5789, + "epoch": 1.22310081897769, + "grad_norm": 3.979011833176761, + "learning_rate": 1.3406190804123405e-05, + "loss": 0.7471, "step": 8662 }, { - "epoch": 1.8165233801635563, - "grad_norm": 5.963784002220839, - "learning_rate": 7.111170251265748e-06, - "loss": 0.5568, + "epoch": 1.2232420220276758, + "grad_norm": 3.348418197190203, + "learning_rate": 1.3404757471770168e-05, + "loss": 0.5958, "step": 8663 }, { - "epoch": 1.8167330677290838, - "grad_norm": 6.82532624503764, - "learning_rate": 7.109002962756948e-06, - "loss": 0.7707, + "epoch": 1.2233832250776617, + "grad_norm": 3.5997083603236204, + "learning_rate": 1.3403324060291837e-05, + "loss": 0.5985, "step": 8664 }, { - "epoch": 1.816942755294611, - "grad_norm": 7.4172184917790105, - "learning_rate": 7.1068358224167555e-06, - "loss": 0.8225, + "epoch": 1.2235244281276476, + "grad_norm": 3.19066917317145, + "learning_rate": 1.3401890569721725e-05, + "loss": 0.6062, "step": 8665 }, { - "epoch": 1.8171524428601384, - "grad_norm": 6.176463429518696, - "learning_rate": 7.104668830356249e-06, - "loss": 0.5975, + "epoch": 1.2236656311776335, + "grad_norm": 4.130663238812506, + "learning_rate": 1.3400457000093145e-05, + "loss": 0.7731, "step": 8666 }, { - "epoch": 1.8173621304256657, - "grad_norm": 6.385399875021124, - "learning_rate": 7.1025019866864876e-06, - "loss": 0.724, + "epoch": 1.2238068342276194, + "grad_norm": 3.7333617085083532, + "learning_rate": 1.3399023351439416e-05, + "loss": 0.6675, "step": 8667 }, { - "epoch": 1.817571817991193, - "grad_norm": 6.635831513129939, - "learning_rate": 7.100335291518523e-06, - "loss": 0.7083, + "epoch": 1.2239480372776053, + "grad_norm": 3.698225902897182, + "learning_rate": 1.3397589623793845e-05, + "loss": 0.7208, "step": 8668 }, { - "epoch": 1.8177815055567206, - "grad_norm": 7.51733592573119, - "learning_rate": 7.098168744963401e-06, - "loss": 0.7331, + "epoch": 1.2240892403275911, + "grad_norm": 3.4375251457035714, + "learning_rate": 1.3396155817189767e-05, + "loss": 0.6355, "step": 8669 }, { - "epoch": 1.8179911931222479, - "grad_norm": 7.3677078143731825, - "learning_rate": 7.0960023471321715e-06, - "loss": 0.8664, + "epoch": 1.224230443377577, + "grad_norm": 3.9830572879603148, + "learning_rate": 1.3394721931660488e-05, + "loss": 0.6773, "step": 8670 }, { - "epoch": 1.8182008806877752, - "grad_norm": 7.287418692482427, - "learning_rate": 7.093836098135851e-06, - "loss": 0.725, + "epoch": 1.224371646427563, + "grad_norm": 3.154221504442012, + "learning_rate": 1.3393287967239339e-05, + "loss": 0.6301, "step": 8671 }, { - "epoch": 1.8184105682533027, - "grad_norm": 6.158429707255441, - "learning_rate": 7.091669998085473e-06, - "loss": 0.6579, + "epoch": 1.2245128494775488, + "grad_norm": 3.5714632920178193, + "learning_rate": 1.3391853923959644e-05, + "loss": 0.5697, "step": 8672 }, { - "epoch": 1.8186202558188298, - "grad_norm": 7.119260868056512, - "learning_rate": 7.089504047092055e-06, - "loss": 0.7561, + "epoch": 1.2246540525275347, + "grad_norm": 3.274560478021891, + "learning_rate": 1.3390419801854729e-05, + "loss": 0.5433, "step": 8673 }, { - "epoch": 1.8188299433843573, - "grad_norm": 8.33814240104753, - "learning_rate": 7.0873382452666e-06, - "loss": 0.8211, + "epoch": 1.2247952555775206, + "grad_norm": 5.170403187765152, + "learning_rate": 1.3388985600957922e-05, + "loss": 0.9513, "step": 8674 }, { - "epoch": 1.8190396309498846, - "grad_norm": 7.602171652949819, - "learning_rate": 7.0851725927201085e-06, - "loss": 0.5861, + "epoch": 1.2249364586275064, + "grad_norm": 3.409885881919999, + "learning_rate": 1.3387551321302553e-05, + "loss": 0.5147, "step": 8675 }, { - "epoch": 1.819249318515412, - "grad_norm": 6.837141994899123, - "learning_rate": 7.083007089563583e-06, - "loss": 0.6277, + "epoch": 1.2250776616774923, + "grad_norm": 3.814537936970504, + "learning_rate": 1.3386116962921951e-05, + "loss": 0.6673, "step": 8676 }, { - "epoch": 1.8194590060809395, - "grad_norm": 6.6498267403844, - "learning_rate": 7.080841735908002e-06, - "loss": 0.6819, + "epoch": 1.2252188647274782, + "grad_norm": 2.906133058884447, + "learning_rate": 1.3384682525849458e-05, + "loss": 0.4589, "step": 8677 }, { - "epoch": 1.8196686936464668, - "grad_norm": 7.066677932566973, - "learning_rate": 7.078676531864344e-06, - "loss": 0.7498, + "epoch": 1.225360067777464, + "grad_norm": 3.7565480488160925, + "learning_rate": 1.3383248010118404e-05, + "loss": 0.6455, "step": 8678 }, { - "epoch": 1.819878381211994, - "grad_norm": 6.335117026203028, - "learning_rate": 7.076511477543582e-06, - "loss": 0.6251, + "epoch": 1.2255012708274498, + "grad_norm": 3.9281259081223596, + "learning_rate": 1.3381813415762124e-05, + "loss": 0.6238, "step": 8679 }, { - "epoch": 1.8200880687775216, - "grad_norm": 6.909444284393642, - "learning_rate": 7.074346573056674e-06, - "loss": 0.6151, + "epoch": 1.2256424738774356, + "grad_norm": 3.9086429734862085, + "learning_rate": 1.3380378742813964e-05, + "loss": 0.6002, "step": 8680 }, { - "epoch": 1.820297756343049, - "grad_norm": 7.378694225817318, - "learning_rate": 7.072181818514578e-06, - "loss": 0.8965, + "epoch": 1.2257836769274215, + "grad_norm": 3.6605463214179226, + "learning_rate": 1.337894399130726e-05, + "loss": 0.6169, "step": 8681 }, { - "epoch": 1.8205074439085762, - "grad_norm": 7.753541982295481, - "learning_rate": 7.070017214028246e-06, - "loss": 0.7945, + "epoch": 1.2259248799774074, + "grad_norm": 3.0267969696340815, + "learning_rate": 1.337750916127536e-05, + "loss": 0.6623, "step": 8682 }, { - "epoch": 1.8207171314741037, - "grad_norm": 7.027446948571806, - "learning_rate": 7.067852759708611e-06, - "loss": 0.5932, + "epoch": 1.2260660830273933, + "grad_norm": 3.212405455609593, + "learning_rate": 1.3376074252751604e-05, + "loss": 0.5239, "step": 8683 }, { - "epoch": 1.8209268190396308, - "grad_norm": 9.056426725274095, - "learning_rate": 7.065688455666608e-06, - "loss": 0.9956, + "epoch": 1.2262072860773792, + "grad_norm": 3.506126059898003, + "learning_rate": 1.3374639265769337e-05, + "loss": 0.5975, "step": 8684 }, { - "epoch": 1.8211365066051584, - "grad_norm": 6.506502036862909, - "learning_rate": 7.063524302013164e-06, - "loss": 0.7481, + "epoch": 1.226348489127365, + "grad_norm": 3.6403411215180337, + "learning_rate": 1.3373204200361914e-05, + "loss": 0.7259, "step": 8685 }, { - "epoch": 1.8213461941706857, - "grad_norm": 7.794916094646638, - "learning_rate": 7.0613602988591875e-06, - "loss": 0.7796, + "epoch": 1.226489692177351, + "grad_norm": 3.226312766692689, + "learning_rate": 1.3371769056562683e-05, + "loss": 0.6025, "step": 8686 }, { - "epoch": 1.821555881736213, - "grad_norm": 7.433348913014491, - "learning_rate": 7.0591964463155944e-06, - "loss": 0.7806, + "epoch": 1.2266308952273368, + "grad_norm": 3.189727790563823, + "learning_rate": 1.3370333834404994e-05, + "loss": 0.5978, "step": 8687 }, { - "epoch": 1.8217655693017405, - "grad_norm": 6.566015970731753, - "learning_rate": 7.057032744493286e-06, - "loss": 0.5266, + "epoch": 1.2267720982773227, + "grad_norm": 3.676676836942921, + "learning_rate": 1.3368898533922202e-05, + "loss": 0.5658, "step": 8688 }, { - "epoch": 1.8219752568672678, - "grad_norm": 6.985525387980135, - "learning_rate": 7.054869193503152e-06, - "loss": 0.8061, + "epoch": 1.2269133013273086, + "grad_norm": 3.9842953077049836, + "learning_rate": 1.3367463155147661e-05, + "loss": 0.6029, "step": 8689 }, { - "epoch": 1.822184944432795, - "grad_norm": 6.51650743246444, - "learning_rate": 7.0527057934560785e-06, - "loss": 0.6479, + "epoch": 1.2270545043772945, + "grad_norm": 3.1985296668742627, + "learning_rate": 1.3366027698114734e-05, + "loss": 0.5362, "step": 8690 }, { - "epoch": 1.8223946319983226, - "grad_norm": 6.6337575682891075, - "learning_rate": 7.050542544462951e-06, - "loss": 0.7169, + "epoch": 1.2271957074272803, + "grad_norm": 3.7663589956570283, + "learning_rate": 1.3364592162856777e-05, + "loss": 0.511, "step": 8691 }, { - "epoch": 1.8226043195638497, - "grad_norm": 8.21266523067796, - "learning_rate": 7.048379446634625e-06, - "loss": 0.9336, + "epoch": 1.2273369104772662, + "grad_norm": 3.9798869481993355, + "learning_rate": 1.3363156549407151e-05, + "loss": 0.6465, "step": 8692 }, { - "epoch": 1.8228140071293772, - "grad_norm": 7.012486241821403, - "learning_rate": 7.046216500081976e-06, - "loss": 0.7059, + "epoch": 1.2274781135272521, + "grad_norm": 3.600116782901093, + "learning_rate": 1.3361720857799218e-05, + "loss": 0.5678, "step": 8693 }, { - "epoch": 1.8230236946949046, - "grad_norm": 7.23715081602742, - "learning_rate": 7.044053704915857e-06, - "loss": 0.6544, + "epoch": 1.227619316577238, + "grad_norm": 4.353373280940051, + "learning_rate": 1.3360285088066343e-05, + "loss": 0.7273, "step": 8694 }, { - "epoch": 1.8232333822604319, - "grad_norm": 6.672387298864582, - "learning_rate": 7.041891061247108e-06, - "loss": 0.7263, + "epoch": 1.2277605196272239, + "grad_norm": 3.1038583367283104, + "learning_rate": 1.3358849240241897e-05, + "loss": 0.4743, "step": 8695 }, { - "epoch": 1.8234430698259594, - "grad_norm": 8.220262527893999, - "learning_rate": 7.039728569186572e-06, - "loss": 1.0388, + "epoch": 1.2279017226772098, + "grad_norm": 4.419471958944962, + "learning_rate": 1.3357413314359242e-05, + "loss": 0.8118, "step": 8696 }, { - "epoch": 1.8236527573914867, - "grad_norm": 6.360117539432224, - "learning_rate": 7.037566228845087e-06, - "loss": 0.7424, + "epoch": 1.2280429257271956, + "grad_norm": 3.3226239160925144, + "learning_rate": 1.3355977310451754e-05, + "loss": 0.5518, "step": 8697 }, { - "epoch": 1.823862444957014, - "grad_norm": 7.8922182913005665, - "learning_rate": 7.0354040403334666e-06, - "loss": 0.7306, + "epoch": 1.2281841287771815, + "grad_norm": 3.2776457193499704, + "learning_rate": 1.3354541228552802e-05, + "loss": 0.5904, "step": 8698 }, { - "epoch": 1.8240721325225415, - "grad_norm": 7.8680786219552, - "learning_rate": 7.03324200376253e-06, - "loss": 0.9052, + "epoch": 1.2283253318271674, + "grad_norm": 3.4803200465895094, + "learning_rate": 1.3353105068695759e-05, + "loss": 0.6501, "step": 8699 }, { - "epoch": 1.8242818200880688, - "grad_norm": 5.514099947851406, - "learning_rate": 7.031080119243091e-06, - "loss": 0.6161, + "epoch": 1.2284665348771533, + "grad_norm": 3.107582201927797, + "learning_rate": 1.3351668830914004e-05, + "loss": 0.5286, "step": 8700 }, { - "epoch": 1.8244915076535961, - "grad_norm": 6.2165817198942595, - "learning_rate": 7.028918386885942e-06, - "loss": 0.7789, + "epoch": 1.2286077379271392, + "grad_norm": 3.5479816711720042, + "learning_rate": 1.3350232515240913e-05, + "loss": 0.4949, "step": 8701 }, { - "epoch": 1.8247011952191237, - "grad_norm": 6.87572875143172, - "learning_rate": 7.026756806801876e-06, - "loss": 0.5704, + "epoch": 1.228748940977125, + "grad_norm": 3.365327904351478, + "learning_rate": 1.3348796121709862e-05, + "loss": 0.5708, "step": 8702 }, { - "epoch": 1.8249108827846507, - "grad_norm": 7.222352085187118, - "learning_rate": 7.024595379101686e-06, - "loss": 0.7873, + "epoch": 1.228890144027111, + "grad_norm": 3.247106685718305, + "learning_rate": 1.334735965035424e-05, + "loss": 0.519, "step": 8703 }, { - "epoch": 1.8251205703501783, - "grad_norm": 7.076588332161067, - "learning_rate": 7.022434103896141e-06, - "loss": 0.7913, + "epoch": 1.2290313470770968, + "grad_norm": 4.4212674430501915, + "learning_rate": 1.3345923101207423e-05, + "loss": 0.779, "step": 8704 }, { - "epoch": 1.8253302579157056, - "grad_norm": 5.741026646510769, - "learning_rate": 7.020272981296012e-06, - "loss": 0.6904, + "epoch": 1.2291725501270827, + "grad_norm": 3.2772878319155017, + "learning_rate": 1.3344486474302798e-05, + "loss": 0.4847, "step": 8705 }, { - "epoch": 1.8255399454812329, - "grad_norm": 6.513183262584279, - "learning_rate": 7.018112011412064e-06, - "loss": 0.6948, + "epoch": 1.2293137531770686, + "grad_norm": 5.255088256681144, + "learning_rate": 1.334304976967375e-05, + "loss": 0.8484, "step": 8706 }, { - "epoch": 1.8257496330467604, - "grad_norm": 8.135417303975887, - "learning_rate": 7.0159511943550415e-06, - "loss": 0.7501, + "epoch": 1.2294549562270545, + "grad_norm": 3.723380392294394, + "learning_rate": 1.3341612987353671e-05, + "loss": 0.6545, "step": 8707 }, { - "epoch": 1.8259593206122877, - "grad_norm": 6.2538296083070355, - "learning_rate": 7.013790530235698e-06, - "loss": 0.5833, + "epoch": 1.2295961592770404, + "grad_norm": 3.225034887113844, + "learning_rate": 1.334017612737595e-05, + "loss": 0.5339, "step": 8708 }, { - "epoch": 1.826169008177815, - "grad_norm": 5.773500697999223, - "learning_rate": 7.011630019164772e-06, - "loss": 0.5896, + "epoch": 1.2297373623270262, + "grad_norm": 3.1661664658221116, + "learning_rate": 1.333873918977398e-05, + "loss": 0.5411, "step": 8709 }, { - "epoch": 1.8263786957433426, - "grad_norm": 6.9028336742922916, - "learning_rate": 7.009469661252988e-06, - "loss": 0.6876, + "epoch": 1.2298785653770121, + "grad_norm": 3.3400107518287, + "learning_rate": 1.333730217458115e-05, + "loss": 0.5636, "step": 8710 }, { - "epoch": 1.8265883833088696, - "grad_norm": 6.001094809190476, - "learning_rate": 7.007309456611071e-06, - "loss": 0.6823, + "epoch": 1.230019768426998, + "grad_norm": 3.478840793294304, + "learning_rate": 1.3335865081830858e-05, + "loss": 0.6096, "step": 8711 }, { - "epoch": 1.8267980708743972, - "grad_norm": 5.912935784258116, - "learning_rate": 7.005149405349737e-06, - "loss": 0.627, + "epoch": 1.2301609714769839, + "grad_norm": 4.130375300777989, + "learning_rate": 1.3334427911556506e-05, + "loss": 0.6898, "step": 8712 }, { - "epoch": 1.8270077584399245, - "grad_norm": 6.797852956637171, - "learning_rate": 7.002989507579686e-06, - "loss": 0.9309, + "epoch": 1.2303021745269698, + "grad_norm": 3.4396497978684946, + "learning_rate": 1.3332990663791486e-05, + "loss": 0.5467, "step": 8713 }, { - "epoch": 1.8272174460054518, - "grad_norm": 8.351978425462695, - "learning_rate": 7.000829763411623e-06, - "loss": 1.0725, + "epoch": 1.2304433775769557, + "grad_norm": 3.2920673045485866, + "learning_rate": 1.3331553338569204e-05, + "loss": 0.61, "step": 8714 }, { - "epoch": 1.8274271335709793, - "grad_norm": 7.437460364809131, - "learning_rate": 6.998670172956237e-06, - "loss": 0.9832, + "epoch": 1.2305845806269415, + "grad_norm": 3.8797046174351846, + "learning_rate": 1.333011593592306e-05, + "loss": 0.6901, "step": 8715 }, { - "epoch": 1.8276368211365066, - "grad_norm": 7.246507728580772, - "learning_rate": 6.996510736324211e-06, - "loss": 0.7723, + "epoch": 1.2307257836769274, + "grad_norm": 3.3470547566034514, + "learning_rate": 1.3328678455886461e-05, + "loss": 0.5391, "step": 8716 }, { - "epoch": 1.827846508702034, - "grad_norm": 5.553491283028539, - "learning_rate": 6.994351453626216e-06, - "loss": 0.5879, + "epoch": 1.2308669867269133, + "grad_norm": 3.1900555088897358, + "learning_rate": 1.332724089849281e-05, + "loss": 0.5123, "step": 8717 }, { - "epoch": 1.8280561962675614, - "grad_norm": 7.428693580281207, - "learning_rate": 6.992192324972923e-06, - "loss": 0.6408, + "epoch": 1.2310081897768992, + "grad_norm": 3.5290729950105026, + "learning_rate": 1.3325803263775521e-05, + "loss": 0.5649, "step": 8718 }, { - "epoch": 1.8282658838330887, - "grad_norm": 7.578631914773462, - "learning_rate": 6.990033350474993e-06, - "loss": 0.7565, + "epoch": 1.231149392826885, + "grad_norm": 4.112362098580588, + "learning_rate": 1.3324365551767997e-05, + "loss": 0.8645, "step": 8719 }, { - "epoch": 1.828475571398616, - "grad_norm": 7.618102656433465, - "learning_rate": 6.987874530243073e-06, - "loss": 0.6942, + "epoch": 1.231290595876871, + "grad_norm": 3.6469944009374675, + "learning_rate": 1.3322927762503656e-05, + "loss": 0.5721, "step": 8720 }, { - "epoch": 1.8286852589641436, - "grad_norm": 6.297917652123478, - "learning_rate": 6.985715864387803e-06, - "loss": 0.7384, + "epoch": 1.2314317989268568, + "grad_norm": 2.8580873561702735, + "learning_rate": 1.3321489896015908e-05, + "loss": 0.4982, "step": 8721 }, { - "epoch": 1.8288949465296707, - "grad_norm": 7.140646263436267, - "learning_rate": 6.983557353019832e-06, - "loss": 0.7352, + "epoch": 1.2315730019768427, + "grad_norm": 2.9387662426394745, + "learning_rate": 1.3320051952338166e-05, + "loss": 0.4603, "step": 8722 }, { - "epoch": 1.8291046340951982, - "grad_norm": 6.511628378719846, - "learning_rate": 6.9813989962497705e-06, - "loss": 0.8202, + "epoch": 1.2317142050268286, + "grad_norm": 3.242165368209716, + "learning_rate": 1.3318613931503854e-05, + "loss": 0.5198, "step": 8723 }, { - "epoch": 1.8293143216607255, - "grad_norm": 7.300741156278667, - "learning_rate": 6.979240794188249e-06, - "loss": 0.7353, + "epoch": 1.2318554080768145, + "grad_norm": 3.6759919511289336, + "learning_rate": 1.3317175833546387e-05, + "loss": 0.5658, "step": 8724 }, { - "epoch": 1.8295240092262528, - "grad_norm": 5.860088581820466, - "learning_rate": 6.977082746945877e-06, - "loss": 0.5208, + "epoch": 1.2319966111268004, + "grad_norm": 3.36715226945166, + "learning_rate": 1.3315737658499188e-05, + "loss": 0.5706, "step": 8725 }, { - "epoch": 1.8297336967917803, - "grad_norm": 6.587206167243888, - "learning_rate": 6.974924854633256e-06, - "loss": 0.668, + "epoch": 1.2321378141767863, + "grad_norm": 3.6596440183135432, + "learning_rate": 1.3314299406395677e-05, + "loss": 0.5343, "step": 8726 }, { - "epoch": 1.8299433843573076, - "grad_norm": 7.5204339568045055, - "learning_rate": 6.9727671173609805e-06, - "loss": 0.7291, + "epoch": 1.2322790172267721, + "grad_norm": 3.4085440896636463, + "learning_rate": 1.3312861077269277e-05, + "loss": 0.5438, "step": 8727 }, { - "epoch": 1.830153071922835, - "grad_norm": 8.918988742637003, - "learning_rate": 6.970609535239646e-06, - "loss": 1.0647, + "epoch": 1.232420220276758, + "grad_norm": 2.7259026125836234, + "learning_rate": 1.3311422671153416e-05, + "loss": 0.433, "step": 8728 }, { - "epoch": 1.8303627594883625, - "grad_norm": 5.766941862861837, - "learning_rate": 6.968452108379822e-06, - "loss": 0.6599, + "epoch": 1.232561423326744, + "grad_norm": 3.045497847759873, + "learning_rate": 1.3309984188081524e-05, + "loss": 0.5057, "step": 8729 }, { - "epoch": 1.8305724470538896, - "grad_norm": 6.717247972623266, - "learning_rate": 6.966294836892087e-06, - "loss": 0.6757, + "epoch": 1.2327026263767298, + "grad_norm": 4.313640047954047, + "learning_rate": 1.3308545628087029e-05, + "loss": 0.7062, "step": 8730 }, { - "epoch": 1.830782134619417, - "grad_norm": 8.134540076637986, - "learning_rate": 6.964137720887006e-06, - "loss": 0.8276, + "epoch": 1.2328438294267157, + "grad_norm": 3.5517214546211786, + "learning_rate": 1.330710699120336e-05, + "loss": 0.6463, "step": 8731 }, { - "epoch": 1.8309918221849444, - "grad_norm": 5.714469471093912, - "learning_rate": 6.96198076047513e-06, - "loss": 0.547, + "epoch": 1.2329850324767015, + "grad_norm": 4.843156263412101, + "learning_rate": 1.3305668277463954e-05, + "loss": 0.7249, "step": 8732 }, { - "epoch": 1.8312015097504717, - "grad_norm": 8.071601498851622, - "learning_rate": 6.9598239557670066e-06, - "loss": 0.7075, + "epoch": 1.2331262355266874, + "grad_norm": 3.38763999286509, + "learning_rate": 1.3304229486902248e-05, + "loss": 0.5147, "step": 8733 }, { - "epoch": 1.8314111973159992, - "grad_norm": 8.804782226132483, - "learning_rate": 6.9576673068731835e-06, - "loss": 0.996, + "epoch": 1.2332674385766733, + "grad_norm": 3.3919326613363774, + "learning_rate": 1.3302790619551673e-05, + "loss": 0.5824, "step": 8734 }, { - "epoch": 1.8316208848815265, - "grad_norm": 7.167396000735304, - "learning_rate": 6.955510813904185e-06, - "loss": 0.6136, + "epoch": 1.2334086416266592, + "grad_norm": 3.8602501109615517, + "learning_rate": 1.330135167544567e-05, + "loss": 0.6411, "step": 8735 }, { - "epoch": 1.8318305724470538, - "grad_norm": 7.678504133295939, - "learning_rate": 6.953354476970538e-06, - "loss": 0.8345, + "epoch": 1.233549844676645, + "grad_norm": 3.2244372865228206, + "learning_rate": 1.329991265461768e-05, + "loss": 0.5034, "step": 8736 }, { - "epoch": 1.8320402600125814, - "grad_norm": 6.467309340291007, - "learning_rate": 6.9511982961827594e-06, - "loss": 0.6597, + "epoch": 1.233691047726631, + "grad_norm": 2.80761104287388, + "learning_rate": 1.3298473557101146e-05, + "loss": 0.4364, "step": 8737 }, { - "epoch": 1.8322499475781087, - "grad_norm": 6.311879074958667, - "learning_rate": 6.949042271651352e-06, - "loss": 0.5755, + "epoch": 1.2338322507766168, + "grad_norm": 3.5905259617924354, + "learning_rate": 1.3297034382929512e-05, + "loss": 0.6025, "step": 8738 }, { - "epoch": 1.832459635143636, - "grad_norm": 5.4778145734243475, - "learning_rate": 6.9468864034868214e-06, - "loss": 0.6009, + "epoch": 1.2339734538266027, + "grad_norm": 3.6553094866454776, + "learning_rate": 1.3295595132136218e-05, + "loss": 0.5634, "step": 8739 }, { - "epoch": 1.8326693227091635, - "grad_norm": 8.820167555095335, - "learning_rate": 6.944730691799661e-06, - "loss": 0.7453, + "epoch": 1.2341146568765886, + "grad_norm": 3.2519846132149066, + "learning_rate": 1.329415580475472e-05, + "loss": 0.5509, "step": 8740 }, { - "epoch": 1.8328790102746906, - "grad_norm": 6.646315838742104, - "learning_rate": 6.942575136700349e-06, - "loss": 0.7186, + "epoch": 1.2342558599265745, + "grad_norm": 4.134821385810212, + "learning_rate": 1.3292716400818467e-05, + "loss": 0.7049, "step": 8741 }, { - "epoch": 1.833088697840218, - "grad_norm": 6.379388715476397, - "learning_rate": 6.940419738299365e-06, - "loss": 0.6422, + "epoch": 1.2343970629765604, + "grad_norm": 3.753889702249401, + "learning_rate": 1.32912769203609e-05, + "loss": 0.6503, "step": 8742 }, { - "epoch": 1.8332983854057454, - "grad_norm": 6.994016120924202, - "learning_rate": 6.938264496707178e-06, - "loss": 0.6554, + "epoch": 1.2345382660265463, + "grad_norm": 3.677663140687254, + "learning_rate": 1.3289837363415484e-05, + "loss": 0.6368, "step": 8743 }, { - "epoch": 1.8335080729712727, - "grad_norm": 6.4505305781359485, - "learning_rate": 6.936109412034241e-06, - "loss": 0.6176, + "epoch": 1.2346794690765321, + "grad_norm": 3.9913719450959055, + "learning_rate": 1.3288397730015666e-05, + "loss": 0.6309, "step": 8744 }, { - "epoch": 1.8337177605368002, - "grad_norm": 6.384236307978722, - "learning_rate": 6.933954484391014e-06, - "loss": 0.5764, + "epoch": 1.234820672126518, + "grad_norm": 3.2337630960176824, + "learning_rate": 1.3286958020194902e-05, + "loss": 0.5586, "step": 8745 }, { - "epoch": 1.8339274481023276, - "grad_norm": 7.161755463750637, - "learning_rate": 6.931799713887937e-06, - "loss": 0.5691, + "epoch": 1.234961875176504, + "grad_norm": 2.661810933518996, + "learning_rate": 1.328551823398666e-05, + "loss": 0.4363, "step": 8746 }, { - "epoch": 1.8341371356678549, - "grad_norm": 7.594024279432976, - "learning_rate": 6.929645100635447e-06, - "loss": 0.8112, + "epoch": 1.2351030782264898, + "grad_norm": 3.7328450656648733, + "learning_rate": 1.328407837142439e-05, + "loss": 0.7094, "step": 8747 }, { - "epoch": 1.8343468232333824, - "grad_norm": 7.3238262559629055, - "learning_rate": 6.927490644743967e-06, - "loss": 0.7137, + "epoch": 1.2352442812764757, + "grad_norm": 4.120270693464386, + "learning_rate": 1.3282638432541553e-05, + "loss": 0.6827, "step": 8748 }, { - "epoch": 1.8345565107989095, - "grad_norm": 6.442704289435585, - "learning_rate": 6.925336346323927e-06, - "loss": 0.545, + "epoch": 1.2353854843264616, + "grad_norm": 3.358718703789723, + "learning_rate": 1.3281198417371621e-05, + "loss": 0.5792, "step": 8749 }, { - "epoch": 1.834766198364437, - "grad_norm": 6.673162729139319, - "learning_rate": 6.923182205485726e-06, - "loss": 0.526, + "epoch": 1.2355266873764474, + "grad_norm": 3.138071982901729, + "learning_rate": 1.3279758325948054e-05, + "loss": 0.5307, "step": 8750 }, { - "epoch": 1.8349758859299643, - "grad_norm": 8.109778447028734, - "learning_rate": 6.921028222339776e-06, - "loss": 0.7791, + "epoch": 1.2356678904264333, + "grad_norm": 3.200415191627236, + "learning_rate": 1.3278318158304319e-05, + "loss": 0.5219, "step": 8751 }, { - "epoch": 1.8351855734954916, - "grad_norm": 7.371887924357651, - "learning_rate": 6.918874396996471e-06, - "loss": 0.7395, + "epoch": 1.2358090934764192, + "grad_norm": 4.262555451277976, + "learning_rate": 1.3276877914473887e-05, + "loss": 0.6127, "step": 8752 }, { - "epoch": 1.8353952610610191, - "grad_norm": 7.4048190975448955, - "learning_rate": 6.916720729566195e-06, - "loss": 0.7401, + "epoch": 1.235950296526405, + "grad_norm": 3.177761466802031, + "learning_rate": 1.3275437594490228e-05, + "loss": 0.4921, "step": 8753 }, { - "epoch": 1.8356049486265464, - "grad_norm": 6.224903839293317, - "learning_rate": 6.914567220159327e-06, - "loss": 0.5301, + "epoch": 1.236091499576391, + "grad_norm": 3.760803237939309, + "learning_rate": 1.3273997198386814e-05, + "loss": 0.6384, "step": 8754 }, { - "epoch": 1.8358146361920737, - "grad_norm": 6.720279593371625, - "learning_rate": 6.9124138688862455e-06, - "loss": 0.6154, + "epoch": 1.2362327026263766, + "grad_norm": 3.8895198110196523, + "learning_rate": 1.3272556726197116e-05, + "loss": 0.6395, "step": 8755 }, { - "epoch": 1.8360243237576013, - "grad_norm": 5.906003996573714, - "learning_rate": 6.910260675857305e-06, - "loss": 0.4938, + "epoch": 1.2363739056763625, + "grad_norm": 4.684518618921429, + "learning_rate": 1.3271116177954615e-05, + "loss": 0.7126, "step": 8756 }, { - "epoch": 1.8362340113231286, - "grad_norm": 7.424182895286606, - "learning_rate": 6.908107641182866e-06, - "loss": 0.8045, + "epoch": 1.2365151087263484, + "grad_norm": 3.259053709408109, + "learning_rate": 1.3269675553692787e-05, + "loss": 0.5349, "step": 8757 }, { - "epoch": 1.8364436988886559, - "grad_norm": 6.305857360326501, - "learning_rate": 6.9059547649732725e-06, - "loss": 0.8079, + "epoch": 1.2366563117763343, + "grad_norm": 3.6834726522198395, + "learning_rate": 1.3268234853445113e-05, + "loss": 0.5776, "step": 8758 }, { - "epoch": 1.8366533864541834, - "grad_norm": 7.76574959309599, - "learning_rate": 6.903802047338863e-06, - "loss": 0.7858, + "epoch": 1.2367975148263202, + "grad_norm": 4.370903924005992, + "learning_rate": 1.3266794077245066e-05, + "loss": 0.5604, "step": 8759 }, { - "epoch": 1.8368630740197105, - "grad_norm": 5.981770952663717, - "learning_rate": 6.901649488389967e-06, - "loss": 0.7034, + "epoch": 1.236938717876306, + "grad_norm": 3.639574699027349, + "learning_rate": 1.3265353225126143e-05, + "loss": 0.679, "step": 8760 }, { - "epoch": 1.837072761585238, - "grad_norm": 7.8155487370364165, - "learning_rate": 6.899497088236913e-06, - "loss": 0.9212, + "epoch": 1.237079920926292, + "grad_norm": 4.176948697697074, + "learning_rate": 1.3263912297121817e-05, + "loss": 0.8044, "step": 8761 }, { - "epoch": 1.8372824491507653, - "grad_norm": 6.767690774054371, - "learning_rate": 6.8973448469900065e-06, - "loss": 0.7651, + "epoch": 1.2372211239762778, + "grad_norm": 3.8825169345936, + "learning_rate": 1.3262471293265577e-05, + "loss": 0.6151, "step": 8762 }, { - "epoch": 1.8374921367162926, - "grad_norm": 7.49527758659679, - "learning_rate": 6.89519276475956e-06, - "loss": 0.7666, + "epoch": 1.2373623270262637, + "grad_norm": 5.074461967211695, + "learning_rate": 1.3261030213590919e-05, + "loss": 0.9318, "step": 8763 }, { - "epoch": 1.8377018242818202, - "grad_norm": 7.307603947944684, - "learning_rate": 6.8930408416558705e-06, - "loss": 0.7377, + "epoch": 1.2375035300762496, + "grad_norm": 3.54426080375801, + "learning_rate": 1.3259589058131323e-05, + "loss": 0.7533, "step": 8764 }, { - "epoch": 1.8379115118473475, - "grad_norm": 6.804909240447105, - "learning_rate": 6.8908890777892226e-06, - "loss": 0.7796, + "epoch": 1.2376447331262355, + "grad_norm": 3.443067838183573, + "learning_rate": 1.3258147826920285e-05, + "loss": 0.6384, "step": 8765 }, { - "epoch": 1.8381211994128748, - "grad_norm": 9.027313663196484, - "learning_rate": 6.888737473269904e-06, - "loss": 0.9549, + "epoch": 1.2377859361762213, + "grad_norm": 3.0886851911528463, + "learning_rate": 1.32567065199913e-05, + "loss": 0.5565, "step": 8766 }, { - "epoch": 1.8383308869784023, - "grad_norm": 5.179351990680766, - "learning_rate": 6.886586028208188e-06, - "loss": 0.4716, + "epoch": 1.2379271392262072, + "grad_norm": 3.3323239150652078, + "learning_rate": 1.3255265137377864e-05, + "loss": 0.5616, "step": 8767 }, { - "epoch": 1.8385405745439294, - "grad_norm": 7.662277965893355, - "learning_rate": 6.884434742714336e-06, - "loss": 0.8249, + "epoch": 1.2380683422761931, + "grad_norm": 3.5836439975078753, + "learning_rate": 1.3253823679113466e-05, + "loss": 0.5795, "step": 8768 }, { - "epoch": 1.838750262109457, - "grad_norm": 5.643284308001007, - "learning_rate": 6.882283616898604e-06, - "loss": 0.4252, + "epoch": 1.238209545326179, + "grad_norm": 3.167550862654838, + "learning_rate": 1.3252382145231616e-05, + "loss": 0.6186, "step": 8769 }, { - "epoch": 1.8389599496749844, - "grad_norm": 8.367960931523038, - "learning_rate": 6.880132650871252e-06, - "loss": 0.8399, + "epoch": 1.2383507483761649, + "grad_norm": 3.438833487710581, + "learning_rate": 1.3250940535765808e-05, + "loss": 0.5841, "step": 8770 }, { - "epoch": 1.8391696372405115, - "grad_norm": 7.8850147648764874, - "learning_rate": 6.877981844742505e-06, - "loss": 0.7551, + "epoch": 1.2384919514261508, + "grad_norm": 3.4687520871239297, + "learning_rate": 1.3249498850749547e-05, + "loss": 0.5865, "step": 8771 }, { - "epoch": 1.839379324806039, - "grad_norm": 6.774967576821188, - "learning_rate": 6.875831198622607e-06, - "loss": 0.737, + "epoch": 1.2386331544761366, + "grad_norm": 3.508387611014922, + "learning_rate": 1.3248057090216336e-05, + "loss": 0.6311, "step": 8772 }, { - "epoch": 1.8395890123715664, - "grad_norm": 6.511907095093191, - "learning_rate": 6.87368071262178e-06, - "loss": 0.6485, + "epoch": 1.2387743575261225, + "grad_norm": 3.572248319273451, + "learning_rate": 1.3246615254199679e-05, + "loss": 0.5752, "step": 8773 }, { - "epoch": 1.8397986999370937, - "grad_norm": 6.8029007747581485, - "learning_rate": 6.871530386850236e-06, - "loss": 0.8299, + "epoch": 1.2389155605761084, + "grad_norm": 3.6925340662810626, + "learning_rate": 1.3245173342733084e-05, + "loss": 0.6232, "step": 8774 }, { - "epoch": 1.8400083875026212, - "grad_norm": 6.47913204045999, - "learning_rate": 6.869380221418185e-06, - "loss": 0.68, + "epoch": 1.2390567636260943, + "grad_norm": 2.935864365319856, + "learning_rate": 1.3243731355850069e-05, + "loss": 0.4893, "step": 8775 }, { - "epoch": 1.8402180750681485, - "grad_norm": 6.56204407618195, - "learning_rate": 6.867230216435829e-06, - "loss": 0.7734, + "epoch": 1.2391979666760802, + "grad_norm": 3.1348486320544975, + "learning_rate": 1.3242289293584132e-05, + "loss": 0.5436, "step": 8776 }, { - "epoch": 1.8404277626336758, - "grad_norm": 7.944365517174927, - "learning_rate": 6.8650803720133595e-06, - "loss": 0.9319, + "epoch": 1.239339169726066, + "grad_norm": 3.1965456469447164, + "learning_rate": 1.3240847155968792e-05, + "loss": 0.5385, "step": 8777 }, { - "epoch": 1.8406374501992033, - "grad_norm": 7.68424845709979, - "learning_rate": 6.862930688260956e-06, - "loss": 0.9491, + "epoch": 1.239480372776052, + "grad_norm": 4.188913897730263, + "learning_rate": 1.3239404943037566e-05, + "loss": 0.6784, "step": 8778 }, { - "epoch": 1.8408471377647304, - "grad_norm": 8.251331752428593, - "learning_rate": 6.860781165288795e-06, - "loss": 0.7613, + "epoch": 1.2396215758260378, + "grad_norm": 3.6895657965985853, + "learning_rate": 1.3237962654823965e-05, + "loss": 0.5945, "step": 8779 }, { - "epoch": 1.841056825330258, - "grad_norm": 8.783563510218759, - "learning_rate": 6.85863180320705e-06, - "loss": 0.9162, + "epoch": 1.2397627788760237, + "grad_norm": 3.6331855567298397, + "learning_rate": 1.3236520291361516e-05, + "loss": 0.531, "step": 8780 }, { - "epoch": 1.8412665128957852, - "grad_norm": 6.006705760242939, - "learning_rate": 6.856482602125865e-06, - "loss": 0.5131, + "epoch": 1.2399039819260096, + "grad_norm": 3.5912946304073654, + "learning_rate": 1.3235077852683731e-05, + "loss": 0.7203, "step": 8781 }, { - "epoch": 1.8414762004613126, - "grad_norm": 6.4566421755038155, - "learning_rate": 6.854333562155403e-06, - "loss": 0.5553, + "epoch": 1.2400451849759955, + "grad_norm": 3.236463501239738, + "learning_rate": 1.3233635338824132e-05, + "loss": 0.5622, "step": 8782 }, { - "epoch": 1.84168588802684, - "grad_norm": 7.623377075686887, - "learning_rate": 6.852184683405804e-06, - "loss": 0.739, + "epoch": 1.2401863880259814, + "grad_norm": 2.9431432015729344, + "learning_rate": 1.3232192749816243e-05, + "loss": 0.4843, "step": 8783 }, { - "epoch": 1.8418955755923674, - "grad_norm": 6.777429353134894, - "learning_rate": 6.850035965987196e-06, - "loss": 0.833, + "epoch": 1.2403275910759672, + "grad_norm": 4.00352661968204, + "learning_rate": 1.3230750085693599e-05, + "loss": 0.7451, "step": 8784 }, { - "epoch": 1.8421052631578947, - "grad_norm": 7.218277731403332, - "learning_rate": 6.847887410009707e-06, - "loss": 0.6661, + "epoch": 1.2404687941259531, + "grad_norm": 4.456548997837541, + "learning_rate": 1.3229307346489706e-05, + "loss": 0.6909, "step": 8785 }, { - "epoch": 1.8423149507234222, - "grad_norm": 8.087248565514475, - "learning_rate": 6.845739015583459e-06, - "loss": 0.7049, + "epoch": 1.240609997175939, + "grad_norm": 3.6956437535631115, + "learning_rate": 1.3227864532238113e-05, + "loss": 0.6299, "step": 8786 }, { - "epoch": 1.8425246382889493, - "grad_norm": 8.209382688074394, - "learning_rate": 6.843590782818556e-06, - "loss": 0.9034, + "epoch": 1.2407512002259249, + "grad_norm": 4.2098723305151395, + "learning_rate": 1.3226421642972338e-05, + "loss": 0.7161, "step": 8787 }, { - "epoch": 1.8427343258544768, - "grad_norm": 7.292294602632365, - "learning_rate": 6.841442711825099e-06, - "loss": 0.8407, + "epoch": 1.2408924032759108, + "grad_norm": 3.539657835746912, + "learning_rate": 1.3224978678725921e-05, + "loss": 0.5809, "step": 8788 }, { - "epoch": 1.8429440134200044, - "grad_norm": 7.587416640934165, - "learning_rate": 6.839294802713183e-06, - "loss": 0.9503, + "epoch": 1.2410336063258967, + "grad_norm": 3.3095940675461524, + "learning_rate": 1.322353563953239e-05, + "loss": 0.5769, "step": 8789 }, { - "epoch": 1.8431537009855314, - "grad_norm": 6.88882586775115, - "learning_rate": 6.8371470555928895e-06, - "loss": 0.6873, + "epoch": 1.2411748093758825, + "grad_norm": 4.149141414449005, + "learning_rate": 1.3222092525425286e-05, + "loss": 0.5538, "step": 8790 }, { - "epoch": 1.843363388551059, - "grad_norm": 6.504081474555663, - "learning_rate": 6.834999470574293e-06, - "loss": 0.7416, + "epoch": 1.2413160124258684, + "grad_norm": 3.965040769406472, + "learning_rate": 1.3220649336438137e-05, + "loss": 0.6906, "step": 8791 }, { - "epoch": 1.8435730761165863, - "grad_norm": 6.339350714406659, - "learning_rate": 6.8328520477674665e-06, - "loss": 0.6479, + "epoch": 1.2414572154758543, + "grad_norm": 2.9669938479299613, + "learning_rate": 1.3219206072604496e-05, + "loss": 0.5257, "step": 8792 }, { - "epoch": 1.8437827636821136, - "grad_norm": 6.153805017716203, - "learning_rate": 6.830704787282464e-06, - "loss": 0.761, + "epoch": 1.2415984185258402, + "grad_norm": 3.7567511804677114, + "learning_rate": 1.3217762733957888e-05, + "loss": 0.5202, "step": 8793 }, { - "epoch": 1.843992451247641, - "grad_norm": 7.497326873830622, - "learning_rate": 6.828557689229338e-06, - "loss": 0.8069, + "epoch": 1.241739621575826, + "grad_norm": 3.601470149270778, + "learning_rate": 1.321631932053187e-05, + "loss": 0.6698, "step": 8794 }, { - "epoch": 1.8442021388131684, - "grad_norm": 8.551723296542427, - "learning_rate": 6.826410753718133e-06, - "loss": 0.7128, + "epoch": 1.241880824625812, + "grad_norm": 3.937632431517342, + "learning_rate": 1.3214875832359976e-05, + "loss": 0.6854, "step": 8795 }, { - "epoch": 1.8444118263786957, - "grad_norm": 5.815277958899624, - "learning_rate": 6.824263980858876e-06, - "loss": 0.6506, + "epoch": 1.2420220276757978, + "grad_norm": 3.498409729477378, + "learning_rate": 1.3213432269475759e-05, + "loss": 0.6512, "step": 8796 }, { - "epoch": 1.8446215139442232, - "grad_norm": 6.736973141979157, - "learning_rate": 6.822117370761602e-06, - "loss": 0.5724, + "epoch": 1.2421632307257837, + "grad_norm": 3.486828253123663, + "learning_rate": 1.3211988631912763e-05, + "loss": 0.5526, "step": 8797 }, { - "epoch": 1.8448312015097503, - "grad_norm": 6.703066376560904, - "learning_rate": 6.819970923536326e-06, - "loss": 0.6001, + "epoch": 1.2423044337757696, + "grad_norm": 3.2226967640097093, + "learning_rate": 1.3210544919704539e-05, + "loss": 0.5399, "step": 8798 }, { - "epoch": 1.8450408890752779, - "grad_norm": 6.358485872619281, - "learning_rate": 6.8178246392930525e-06, - "loss": 0.5928, + "epoch": 1.2424456368257555, + "grad_norm": 3.5689335021443376, + "learning_rate": 1.3209101132884634e-05, + "loss": 0.5354, "step": 8799 }, { - "epoch": 1.8452505766408052, - "grad_norm": 7.17451323497089, - "learning_rate": 6.815678518141784e-06, - "loss": 0.7811, + "epoch": 1.2425868398757414, + "grad_norm": 3.404096080261258, + "learning_rate": 1.3207657271486607e-05, + "loss": 0.605, "step": 8800 }, { - "epoch": 1.8454602642063325, - "grad_norm": 7.184417984195259, - "learning_rate": 6.813532560192521e-06, - "loss": 0.7148, + "epoch": 1.2427280429257272, + "grad_norm": 3.5728517955633263, + "learning_rate": 1.3206213335544014e-05, + "loss": 0.5884, "step": 8801 }, { - "epoch": 1.84566995177186, - "grad_norm": 6.622713259594123, - "learning_rate": 6.811386765555234e-06, - "loss": 0.6251, + "epoch": 1.2428692459757131, + "grad_norm": 4.2201097792867195, + "learning_rate": 1.3204769325090403e-05, + "loss": 0.6789, "step": 8802 }, { - "epoch": 1.8458796393373873, - "grad_norm": 6.371339231769335, - "learning_rate": 6.809241134339908e-06, - "loss": 0.6485, + "epoch": 1.243010449025699, + "grad_norm": 3.604481167022113, + "learning_rate": 1.3203325240159337e-05, + "loss": 0.583, "step": 8803 }, { - "epoch": 1.8460893269029146, - "grad_norm": 6.1606406876775095, - "learning_rate": 6.8070956666565104e-06, - "loss": 0.8153, + "epoch": 1.243151652075685, + "grad_norm": 3.5253743176520067, + "learning_rate": 1.3201881080784378e-05, + "loss": 0.6013, "step": 8804 }, { - "epoch": 1.8462990144684421, - "grad_norm": 6.58912325404316, - "learning_rate": 6.804950362614995e-06, - "loss": 0.6781, + "epoch": 1.2432928551256708, + "grad_norm": 3.712498563349242, + "learning_rate": 1.3200436846999085e-05, + "loss": 0.6944, "step": 8805 }, { - "epoch": 1.8465087020339694, - "grad_norm": 7.765294050690382, - "learning_rate": 6.802805222325312e-06, - "loss": 0.8856, + "epoch": 1.2434340581756567, + "grad_norm": 3.087602266306328, + "learning_rate": 1.3198992538837021e-05, + "loss": 0.5121, "step": 8806 }, { - "epoch": 1.8467183895994967, - "grad_norm": 6.370699939040486, - "learning_rate": 6.800660245897416e-06, - "loss": 0.664, + "epoch": 1.2435752612256425, + "grad_norm": 2.8669561716366956, + "learning_rate": 1.3197548156331752e-05, + "loss": 0.5541, "step": 8807 }, { - "epoch": 1.8469280771650243, - "grad_norm": 7.865284213620316, - "learning_rate": 6.798515433441223e-06, - "loss": 0.7884, + "epoch": 1.2437164642756284, + "grad_norm": 3.963777886816572, + "learning_rate": 1.3196103699516844e-05, + "loss": 0.6355, "step": 8808 }, { - "epoch": 1.8471377647305514, - "grad_norm": 6.894833373347385, - "learning_rate": 6.796370785066668e-06, - "loss": 0.8296, + "epoch": 1.2438576673256143, + "grad_norm": 3.37288524112615, + "learning_rate": 1.319465916842587e-05, + "loss": 0.5522, "step": 8809 }, { - "epoch": 1.8473474522960789, - "grad_norm": 7.823675164610121, - "learning_rate": 6.794226300883671e-06, - "loss": 0.9087, + "epoch": 1.2439988703756002, + "grad_norm": 4.6162432659403985, + "learning_rate": 1.3193214563092392e-05, + "loss": 0.5549, "step": 8810 }, { - "epoch": 1.8475571398616062, - "grad_norm": 6.600198464859498, - "learning_rate": 6.792081981002132e-06, - "loss": 0.6011, + "epoch": 1.244140073425586, + "grad_norm": 4.011670435701752, + "learning_rate": 1.319176988354999e-05, + "loss": 0.6446, "step": 8811 }, { - "epoch": 1.8477668274271335, - "grad_norm": 7.041773279406263, - "learning_rate": 6.789937825531955e-06, - "loss": 0.7141, + "epoch": 1.244281276475572, + "grad_norm": 3.969847613268934, + "learning_rate": 1.319032512983223e-05, + "loss": 0.6532, "step": 8812 }, { - "epoch": 1.847976514992661, - "grad_norm": 7.289098017637719, - "learning_rate": 6.7877938345830356e-06, - "loss": 0.5964, + "epoch": 1.2444224795255578, + "grad_norm": 3.554827968653602, + "learning_rate": 1.3188880301972696e-05, + "loss": 0.6085, "step": 8813 }, { - "epoch": 1.8481862025581883, - "grad_norm": 6.708332441824258, - "learning_rate": 6.785650008265251e-06, - "loss": 0.5387, + "epoch": 1.2445636825755437, + "grad_norm": 3.009536506260182, + "learning_rate": 1.318743540000496e-05, + "loss": 0.4958, "step": 8814 }, { - "epoch": 1.8483958901237156, - "grad_norm": 5.513481877202209, - "learning_rate": 6.783506346688478e-06, - "loss": 0.5649, + "epoch": 1.2447048856255294, + "grad_norm": 3.448484926164053, + "learning_rate": 1.3185990423962602e-05, + "loss": 0.6078, "step": 8815 }, { - "epoch": 1.8486055776892432, - "grad_norm": 6.764574080232439, - "learning_rate": 6.781362849962587e-06, - "loss": 0.6651, + "epoch": 1.2448460886755153, + "grad_norm": 3.287058916227956, + "learning_rate": 1.3184545373879201e-05, + "loss": 0.5298, "step": 8816 }, { - "epoch": 1.8488152652547702, - "grad_norm": 6.468305105710468, - "learning_rate": 6.7792195181974265e-06, - "loss": 0.7264, + "epoch": 1.2449872917255012, + "grad_norm": 3.6185668494815717, + "learning_rate": 1.3183100249788342e-05, + "loss": 0.579, "step": 8817 }, { - "epoch": 1.8490249528202978, - "grad_norm": 5.934825396928831, - "learning_rate": 6.777076351502856e-06, - "loss": 0.6363, + "epoch": 1.245128494775487, + "grad_norm": 3.7719687525944257, + "learning_rate": 1.318165505172361e-05, + "loss": 0.4656, "step": 8818 }, { - "epoch": 1.849234640385825, - "grad_norm": 6.802112973861438, - "learning_rate": 6.774933349988713e-06, - "loss": 0.6218, + "epoch": 1.245269697825473, + "grad_norm": 4.0415331449559835, + "learning_rate": 1.3180209779718584e-05, + "loss": 0.7276, "step": 8819 }, { - "epoch": 1.8494443279513524, - "grad_norm": 9.508540778568484, - "learning_rate": 6.772790513764828e-06, - "loss": 1.0334, + "epoch": 1.2454109008754588, + "grad_norm": 3.5860333791680965, + "learning_rate": 1.3178764433806858e-05, + "loss": 0.6693, "step": 8820 }, { - "epoch": 1.84965401551688, - "grad_norm": 7.876813362824488, - "learning_rate": 6.770647842941025e-06, - "loss": 0.7504, + "epoch": 1.2455521039254447, + "grad_norm": 3.2978923818028405, + "learning_rate": 1.3177319014022021e-05, + "loss": 0.5923, "step": 8821 }, { - "epoch": 1.8498637030824072, - "grad_norm": 6.410012445806904, - "learning_rate": 6.768505337627124e-06, - "loss": 0.5726, + "epoch": 1.2456933069754306, + "grad_norm": 3.1079156425258083, + "learning_rate": 1.3175873520397659e-05, + "loss": 0.4998, "step": 8822 }, { - "epoch": 1.8500733906479345, - "grad_norm": 6.381425316930798, - "learning_rate": 6.766362997932926e-06, - "loss": 0.7145, + "epoch": 1.2458345100254165, + "grad_norm": 3.437350757217801, + "learning_rate": 1.3174427952967373e-05, + "loss": 0.7065, "step": 8823 }, { - "epoch": 1.850283078213462, - "grad_norm": 6.525651035404871, - "learning_rate": 6.764220823968234e-06, - "loss": 0.6312, + "epoch": 1.2459757130754023, + "grad_norm": 4.273112073343518, + "learning_rate": 1.3172982311764749e-05, + "loss": 0.6009, "step": 8824 }, { - "epoch": 1.8504927657789894, - "grad_norm": 8.822498025423728, - "learning_rate": 6.762078815842838e-06, - "loss": 0.7329, + "epoch": 1.2461169161253882, + "grad_norm": 4.230711585042246, + "learning_rate": 1.3171536596823385e-05, + "loss": 0.6747, "step": 8825 }, { - "epoch": 1.8507024533445167, - "grad_norm": 7.032901590605517, - "learning_rate": 6.759936973666519e-06, - "loss": 0.8169, + "epoch": 1.246258119175374, + "grad_norm": 2.9691244557211474, + "learning_rate": 1.3170090808176883e-05, + "loss": 0.5021, "step": 8826 }, { - "epoch": 1.8509121409100442, - "grad_norm": 7.553829844647925, - "learning_rate": 6.757795297549046e-06, - "loss": 0.7136, + "epoch": 1.24639932222536, + "grad_norm": 3.7183292395900396, + "learning_rate": 1.316864494585884e-05, + "loss": 0.6001, "step": 8827 }, { - "epoch": 1.8511218284755713, - "grad_norm": 5.721436796682656, - "learning_rate": 6.755653787600194e-06, - "loss": 0.5972, + "epoch": 1.2465405252753459, + "grad_norm": 2.8241661285580064, + "learning_rate": 1.316719900990285e-05, + "loss": 0.4634, "step": 8828 }, { - "epoch": 1.8513315160410988, - "grad_norm": 7.3646695472519355, - "learning_rate": 6.753512443929706e-06, - "loss": 0.7422, + "epoch": 1.2466817283253318, + "grad_norm": 3.2483391687347853, + "learning_rate": 1.3165753000342532e-05, + "loss": 0.5398, "step": 8829 }, { - "epoch": 1.851541203606626, - "grad_norm": 6.442225788840229, - "learning_rate": 6.751371266647338e-06, - "loss": 0.591, + "epoch": 1.2468229313753176, + "grad_norm": 3.7439766374704155, + "learning_rate": 1.3164306917211475e-05, + "loss": 0.6913, "step": 8830 }, { - "epoch": 1.8517508911721534, - "grad_norm": 6.935939436228216, - "learning_rate": 6.74923025586283e-06, - "loss": 0.7439, + "epoch": 1.2469641344253035, + "grad_norm": 3.1873693922947566, + "learning_rate": 1.3162860760543297e-05, + "loss": 0.4709, "step": 8831 }, { - "epoch": 1.851960578737681, - "grad_norm": 6.913249673586981, - "learning_rate": 6.747089411685906e-06, - "loss": 0.6602, + "epoch": 1.2471053374752894, + "grad_norm": 4.192412531159845, + "learning_rate": 1.3161414530371598e-05, + "loss": 0.6238, "step": 8832 }, { - "epoch": 1.8521702663032082, - "grad_norm": 6.198042614676255, - "learning_rate": 6.744948734226291e-06, - "loss": 0.4786, + "epoch": 1.2472465405252753, + "grad_norm": 4.112658388139034, + "learning_rate": 1.3159968226729992e-05, + "loss": 0.7094, "step": 8833 }, { - "epoch": 1.8523799538687356, - "grad_norm": 6.838416173057786, - "learning_rate": 6.742808223593702e-06, - "loss": 0.7624, + "epoch": 1.2473877435752612, + "grad_norm": 2.928363409720141, + "learning_rate": 1.3158521849652087e-05, + "loss": 0.5165, "step": 8834 }, { - "epoch": 1.852589641434263, - "grad_norm": 8.222283824496552, - "learning_rate": 6.740667879897839e-06, - "loss": 0.8665, + "epoch": 1.247528946625247, + "grad_norm": 5.240800274105885, + "learning_rate": 1.31570753991715e-05, + "loss": 0.8118, "step": 8835 }, { - "epoch": 1.8527993289997902, - "grad_norm": 8.083539683029457, - "learning_rate": 6.738527703248401e-06, - "loss": 0.8253, + "epoch": 1.247670149675233, + "grad_norm": 4.067856663369048, + "learning_rate": 1.3155628875321842e-05, + "loss": 0.6378, "step": 8836 }, { - "epoch": 1.8530090165653177, - "grad_norm": 7.2470422542960815, - "learning_rate": 6.736387693755071e-06, - "loss": 0.8012, + "epoch": 1.2478113527252188, + "grad_norm": 3.788071631396618, + "learning_rate": 1.3154182278136734e-05, + "loss": 0.6113, "step": 8837 }, { - "epoch": 1.853218704130845, - "grad_norm": 7.345132051571992, - "learning_rate": 6.73424785152754e-06, - "loss": 0.8605, + "epoch": 1.2479525557752047, + "grad_norm": 3.435652464183366, + "learning_rate": 1.315273560764979e-05, + "loss": 0.5113, "step": 8838 }, { - "epoch": 1.8534283916963723, - "grad_norm": 7.557256944245459, - "learning_rate": 6.732108176675465e-06, - "loss": 0.8796, + "epoch": 1.2480937588251906, + "grad_norm": 4.775649081429604, + "learning_rate": 1.3151288863894633e-05, + "loss": 0.7843, "step": 8839 }, { - "epoch": 1.8536380792618998, - "grad_norm": 7.2110434487702335, - "learning_rate": 6.7299686693085145e-06, - "loss": 0.7316, + "epoch": 1.2482349618751765, + "grad_norm": 3.2997238814226844, + "learning_rate": 1.3149842046904885e-05, + "loss": 0.5308, "step": 8840 }, { - "epoch": 1.8538477668274271, - "grad_norm": 7.696283194998683, - "learning_rate": 6.727829329536344e-06, - "loss": 0.7346, + "epoch": 1.2483761649251623, + "grad_norm": 3.4772409889865665, + "learning_rate": 1.3148395156714169e-05, + "loss": 0.596, "step": 8841 }, { - "epoch": 1.8540574543929544, - "grad_norm": 7.563602074849365, - "learning_rate": 6.725690157468593e-06, - "loss": 0.8762, + "epoch": 1.2485173679751482, + "grad_norm": 4.199031420248525, + "learning_rate": 1.3146948193356105e-05, + "loss": 0.6357, "step": 8842 }, { - "epoch": 1.854267141958482, - "grad_norm": 7.536290936394836, - "learning_rate": 6.723551153214898e-06, - "loss": 0.7293, + "epoch": 1.2486585710251341, + "grad_norm": 3.1522251625643833, + "learning_rate": 1.314550115686433e-05, + "loss": 0.4538, "step": 8843 }, { - "epoch": 1.8544768295240093, - "grad_norm": 6.332324054485579, - "learning_rate": 6.721412316884894e-06, - "loss": 0.6841, + "epoch": 1.24879977407512, + "grad_norm": 3.6964605650874423, + "learning_rate": 1.3144054047272462e-05, + "loss": 0.6175, "step": 8844 }, { - "epoch": 1.8546865170895366, - "grad_norm": 5.7310436681325525, - "learning_rate": 6.7192736485881935e-06, - "loss": 0.5599, + "epoch": 1.2489409771251059, + "grad_norm": 2.9756356761730323, + "learning_rate": 1.3142606864614136e-05, + "loss": 0.5128, "step": 8845 }, { - "epoch": 1.854896204655064, - "grad_norm": 5.592282695165193, - "learning_rate": 6.717135148434408e-06, - "loss": 0.4635, + "epoch": 1.2490821801750918, + "grad_norm": 4.847521576357099, + "learning_rate": 1.3141159608922984e-05, + "loss": 0.7898, "step": 8846 }, { - "epoch": 1.8551058922205912, - "grad_norm": 6.046011829560615, - "learning_rate": 6.714996816533142e-06, - "loss": 0.675, + "epoch": 1.2492233832250776, + "grad_norm": 3.19555466080851, + "learning_rate": 1.313971228023264e-05, + "loss": 0.5153, "step": 8847 }, { - "epoch": 1.8553155797861187, - "grad_norm": 7.67778962726067, - "learning_rate": 6.712858652993983e-06, - "loss": 0.7734, + "epoch": 1.2493645862750635, + "grad_norm": 3.18323702431375, + "learning_rate": 1.3138264878576738e-05, + "loss": 0.516, "step": 8848 }, { - "epoch": 1.855525267351646, - "grad_norm": 7.731736382347402, - "learning_rate": 6.710720657926522e-06, - "loss": 0.7663, + "epoch": 1.2495057893250494, + "grad_norm": 3.6818333512778305, + "learning_rate": 1.3136817403988918e-05, + "loss": 0.563, "step": 8849 }, { - "epoch": 1.8557349549171733, - "grad_norm": 6.985793787001254, - "learning_rate": 6.708582831440333e-06, - "loss": 0.73, + "epoch": 1.2496469923750353, + "grad_norm": 3.1073702214895444, + "learning_rate": 1.3135369856502813e-05, + "loss": 0.5175, "step": 8850 }, { - "epoch": 1.8559446424827009, - "grad_norm": 7.283955032171695, - "learning_rate": 6.706445173644982e-06, - "loss": 0.8422, + "epoch": 1.2497881954250212, + "grad_norm": 3.517799896132647, + "learning_rate": 1.3133922236152066e-05, + "loss": 0.5859, "step": 8851 }, { - "epoch": 1.8561543300482282, - "grad_norm": 7.225940031616147, - "learning_rate": 6.7043076846500265e-06, - "loss": 0.6935, + "epoch": 1.249929398475007, + "grad_norm": 3.6545191569357085, + "learning_rate": 1.3132474542970324e-05, + "loss": 0.5811, "step": 8852 }, { - "epoch": 1.8563640176137555, - "grad_norm": 8.839704772895123, - "learning_rate": 6.702170364565021e-06, - "loss": 1.027, + "epoch": 1.250070601524993, + "grad_norm": 3.6830604709977317, + "learning_rate": 1.313102677699122e-05, + "loss": 0.649, "step": 8853 }, { - "epoch": 1.856573705179283, - "grad_norm": 7.613208424947482, - "learning_rate": 6.700033213499501e-06, - "loss": 0.7765, + "epoch": 1.2502118045749788, + "grad_norm": 3.4289558038114882, + "learning_rate": 1.3129578938248411e-05, + "loss": 0.5374, "step": 8854 }, { - "epoch": 1.85678339274481, - "grad_norm": 5.391501685739063, - "learning_rate": 6.697896231563004e-06, - "loss": 0.6015, + "epoch": 1.2503530076249647, + "grad_norm": 3.1954982437347472, + "learning_rate": 1.3128131026775539e-05, + "loss": 0.6011, "step": 8855 }, { - "epoch": 1.8569930803103376, - "grad_norm": 6.934836968867341, - "learning_rate": 6.695759418865053e-06, - "loss": 0.673, + "epoch": 1.2504942106749506, + "grad_norm": 4.578644899640924, + "learning_rate": 1.3126683042606251e-05, + "loss": 0.7552, "step": 8856 }, { - "epoch": 1.857202767875865, - "grad_norm": 6.391277890209812, - "learning_rate": 6.69362277551516e-06, - "loss": 0.7344, + "epoch": 1.2506354137249365, + "grad_norm": 3.487276572802947, + "learning_rate": 1.31252349857742e-05, + "loss": 0.6086, "step": 8857 }, { - "epoch": 1.8574124554413922, - "grad_norm": 5.942802575330476, - "learning_rate": 6.691486301622835e-06, - "loss": 0.5278, + "epoch": 1.2507766167749224, + "grad_norm": 3.555008217422142, + "learning_rate": 1.3123786856313036e-05, + "loss": 0.6226, "step": 8858 }, { - "epoch": 1.8576221430069197, - "grad_norm": 6.460421602427836, - "learning_rate": 6.689349997297581e-06, - "loss": 0.6349, + "epoch": 1.2509178198249082, + "grad_norm": 3.371465182688844, + "learning_rate": 1.3122338654256414e-05, + "loss": 0.5301, "step": 8859 }, { - "epoch": 1.857831830572447, - "grad_norm": 6.506632623211253, - "learning_rate": 6.687213862648874e-06, - "loss": 0.7203, + "epoch": 1.2510590228748941, + "grad_norm": 3.890419092880434, + "learning_rate": 1.3120890379637996e-05, + "loss": 0.6638, "step": 8860 }, { - "epoch": 1.8580415181379744, - "grad_norm": 6.799073737315293, - "learning_rate": 6.685077897786205e-06, - "loss": 0.9028, + "epoch": 1.25120022592488, + "grad_norm": 2.7699914645186228, + "learning_rate": 1.311944203249143e-05, + "loss": 0.4545, "step": 8861 }, { - "epoch": 1.8582512057035019, - "grad_norm": 6.9433676570730265, - "learning_rate": 6.682942102819046e-06, - "loss": 0.5904, + "epoch": 1.2513414289748659, + "grad_norm": 3.4554507306461693, + "learning_rate": 1.3117993612850377e-05, + "loss": 0.62, "step": 8862 }, { - "epoch": 1.8584608932690292, - "grad_norm": 6.019819731424689, - "learning_rate": 6.680806477856854e-06, - "loss": 0.6146, + "epoch": 1.2514826320248518, + "grad_norm": 2.966992611244665, + "learning_rate": 1.3116545120748501e-05, + "loss": 0.4996, "step": 8863 }, { - "epoch": 1.8586705808345565, - "grad_norm": 7.724605108421488, - "learning_rate": 6.678671023009087e-06, - "loss": 0.7486, + "epoch": 1.2516238350748377, + "grad_norm": 3.505279991720964, + "learning_rate": 1.3115096556219462e-05, + "loss": 0.6551, "step": 8864 }, { - "epoch": 1.858880268400084, - "grad_norm": 7.088456477643206, - "learning_rate": 6.676535738385193e-06, - "loss": 0.6422, + "epoch": 1.2517650381248235, + "grad_norm": 3.760279503684472, + "learning_rate": 1.311364791929692e-05, + "loss": 0.7873, "step": 8865 }, { - "epoch": 1.859089955965611, - "grad_norm": 6.901438102881417, - "learning_rate": 6.674400624094603e-06, - "loss": 0.6493, + "epoch": 1.2519062411748094, + "grad_norm": 3.484042485733211, + "learning_rate": 1.3112199210014552e-05, + "loss": 0.5954, "step": 8866 }, { - "epoch": 1.8592996435311386, - "grad_norm": 6.917424890996764, - "learning_rate": 6.672265680246752e-06, - "loss": 0.6539, + "epoch": 1.2520474442247953, + "grad_norm": 4.648987153093613, + "learning_rate": 1.3110750428406013e-05, + "loss": 0.8175, "step": 8867 }, { - "epoch": 1.859509331096666, - "grad_norm": 6.470614612018865, - "learning_rate": 6.670130906951058e-06, - "loss": 0.7055, + "epoch": 1.2521886472747812, + "grad_norm": 3.3030767246552495, + "learning_rate": 1.3109301574504978e-05, + "loss": 0.646, "step": 8868 }, { - "epoch": 1.8597190186621932, - "grad_norm": 5.4230921355498385, - "learning_rate": 6.667996304316927e-06, - "loss": 0.4805, + "epoch": 1.252329850324767, + "grad_norm": 3.929273037475889, + "learning_rate": 1.310785264834512e-05, + "loss": 0.6821, "step": 8869 }, { - "epoch": 1.8599287062277208, - "grad_norm": 7.365996521134372, - "learning_rate": 6.665861872453763e-06, - "loss": 0.8637, + "epoch": 1.252471053374753, + "grad_norm": 3.204361847352328, + "learning_rate": 1.3106403649960109e-05, + "loss": 0.5157, "step": 8870 }, { - "epoch": 1.860138393793248, - "grad_norm": 6.301453200704071, - "learning_rate": 6.663727611470966e-06, - "loss": 0.7539, + "epoch": 1.2526122564247388, + "grad_norm": 3.82218263509139, + "learning_rate": 1.3104954579383616e-05, + "loss": 0.704, "step": 8871 }, { - "epoch": 1.8603480813587754, - "grad_norm": 6.032662558705327, - "learning_rate": 6.661593521477912e-06, - "loss": 0.5953, + "epoch": 1.2527534594747247, + "grad_norm": 3.2679830467993356, + "learning_rate": 1.310350543664932e-05, + "loss": 0.5674, "step": 8872 }, { - "epoch": 1.860557768924303, - "grad_norm": 4.725083525844369, - "learning_rate": 6.659459602583981e-06, - "loss": 0.4363, + "epoch": 1.2528946625247106, + "grad_norm": 3.6296984233646734, + "learning_rate": 1.3102056221790899e-05, + "loss": 0.5782, "step": 8873 }, { - "epoch": 1.86076745648983, - "grad_norm": 7.5574122382743525, - "learning_rate": 6.657325854898541e-06, - "loss": 0.8328, + "epoch": 1.2530358655746965, + "grad_norm": 4.03258890304909, + "learning_rate": 1.310060693484203e-05, + "loss": 0.6735, "step": 8874 }, { - "epoch": 1.8609771440553575, - "grad_norm": 6.092646076242667, - "learning_rate": 6.655192278530944e-06, - "loss": 0.6048, + "epoch": 1.2531770686246824, + "grad_norm": 3.737356866454328, + "learning_rate": 1.3099157575836397e-05, + "loss": 0.6064, "step": 8875 }, { - "epoch": 1.8611868316208848, - "grad_norm": 9.732242866407068, - "learning_rate": 6.653058873590546e-06, - "loss": 1.0371, + "epoch": 1.2533182716746682, + "grad_norm": 4.532391642812697, + "learning_rate": 1.3097708144807679e-05, + "loss": 0.6881, "step": 8876 }, { - "epoch": 1.8613965191864121, - "grad_norm": 8.130190455484051, - "learning_rate": 6.6509256401866886e-06, - "loss": 0.913, + "epoch": 1.2534594747246541, + "grad_norm": 4.405534329667572, + "learning_rate": 1.3096258641789561e-05, + "loss": 0.7272, "step": 8877 }, { - "epoch": 1.8616062067519397, - "grad_norm": 7.719126143441198, - "learning_rate": 6.6487925784286975e-06, - "loss": 0.8452, + "epoch": 1.25360067777464, + "grad_norm": 2.9499957772359098, + "learning_rate": 1.3094809066815731e-05, + "loss": 0.5234, "step": 8878 }, { - "epoch": 1.861815894317467, - "grad_norm": 7.64850545581207, - "learning_rate": 6.646659688425898e-06, - "loss": 0.8829, + "epoch": 1.253741880824626, + "grad_norm": 3.2731531983735125, + "learning_rate": 1.309335941991987e-05, + "loss": 0.6693, "step": 8879 }, { - "epoch": 1.8620255818829943, - "grad_norm": 6.577181418264686, - "learning_rate": 6.6445269702876124e-06, - "loss": 0.7318, + "epoch": 1.2538830838746118, + "grad_norm": 3.0469863548731264, + "learning_rate": 1.3091909701135676e-05, + "loss": 0.549, "step": 8880 }, { - "epoch": 1.8622352694485218, - "grad_norm": 5.890659130279615, - "learning_rate": 6.642394424123133e-06, - "loss": 0.6811, + "epoch": 1.2540242869245977, + "grad_norm": 2.9534372891883174, + "learning_rate": 1.3090459910496837e-05, + "loss": 0.558, "step": 8881 }, { - "epoch": 1.862444957014049, - "grad_norm": 7.503770995820039, - "learning_rate": 6.640262050041764e-06, - "loss": 0.8002, + "epoch": 1.2541654899745835, + "grad_norm": 2.678718538473889, + "learning_rate": 1.3089010048037043e-05, + "loss": 0.5322, "step": 8882 }, { - "epoch": 1.8626546445795764, - "grad_norm": 5.20649126303187, - "learning_rate": 6.6381298481527945e-06, - "loss": 0.5716, + "epoch": 1.2543066930245694, + "grad_norm": 4.535064901333173, + "learning_rate": 1.3087560113789988e-05, + "loss": 0.6839, "step": 8883 }, { - "epoch": 1.862864332145104, - "grad_norm": 7.544733486050402, - "learning_rate": 6.635997818565499e-06, - "loss": 0.7659, + "epoch": 1.2544478960745553, + "grad_norm": 3.735818564746955, + "learning_rate": 1.3086110107789371e-05, + "loss": 0.6573, "step": 8884 }, { - "epoch": 1.863074019710631, - "grad_norm": 7.205893693741829, - "learning_rate": 6.633865961389148e-06, - "loss": 0.7655, + "epoch": 1.2545890991245412, + "grad_norm": 3.848881031250211, + "learning_rate": 1.3084660030068886e-05, + "loss": 0.6112, "step": 8885 }, { - "epoch": 1.8632837072761586, - "grad_norm": 5.343533088892523, - "learning_rate": 6.63173427673301e-06, - "loss": 0.6497, + "epoch": 1.254730302174527, + "grad_norm": 4.448083422792564, + "learning_rate": 1.3083209880662237e-05, + "loss": 0.7231, "step": 8886 }, { - "epoch": 1.8634933948416859, - "grad_norm": 8.980788269182918, - "learning_rate": 6.629602764706327e-06, - "loss": 0.9527, + "epoch": 1.254871505224513, + "grad_norm": 3.328377895698151, + "learning_rate": 1.3081759659603121e-05, + "loss": 0.5734, "step": 8887 }, { - "epoch": 1.8637030824072132, - "grad_norm": 6.421780517531628, - "learning_rate": 6.62747142541835e-06, - "loss": 0.6544, + "epoch": 1.2550127082744988, + "grad_norm": 3.8545191273647936, + "learning_rate": 1.3080309366925239e-05, + "loss": 0.6591, "step": 8888 }, { - "epoch": 1.8639127699727407, - "grad_norm": 5.150681066153522, - "learning_rate": 6.625340258978313e-06, - "loss": 0.4267, + "epoch": 1.2551539113244847, + "grad_norm": 2.993041001391005, + "learning_rate": 1.3078859002662301e-05, + "loss": 0.5193, "step": 8889 }, { - "epoch": 1.864122457538268, - "grad_norm": 5.260612581794034, - "learning_rate": 6.62320926549544e-06, - "loss": 0.4719, + "epoch": 1.2552951143744706, + "grad_norm": 3.478173442070076, + "learning_rate": 1.3077408566848009e-05, + "loss": 0.5155, "step": 8890 }, { - "epoch": 1.8643321451037953, - "grad_norm": 6.612865620264495, - "learning_rate": 6.621078445078946e-06, - "loss": 0.7476, + "epoch": 1.2554363174244565, + "grad_norm": 3.491073873243466, + "learning_rate": 1.307595805951607e-05, + "loss": 0.676, "step": 8891 }, { - "epoch": 1.8645418326693228, - "grad_norm": 5.7134223514473526, - "learning_rate": 6.618947797838047e-06, - "loss": 0.4969, + "epoch": 1.2555775204744424, + "grad_norm": 3.8598432580956823, + "learning_rate": 1.3074507480700192e-05, + "loss": 0.6301, "step": 8892 }, { - "epoch": 1.86475152023485, - "grad_norm": 5.860707185936737, - "learning_rate": 6.616817323881934e-06, - "loss": 0.5803, + "epoch": 1.2557187235244283, + "grad_norm": 3.868848972892207, + "learning_rate": 1.307305683043409e-05, + "loss": 0.7152, "step": 8893 }, { - "epoch": 1.8649612078003774, - "grad_norm": 5.806977242869526, - "learning_rate": 6.614687023319804e-06, - "loss": 0.42, + "epoch": 1.2558599265744141, + "grad_norm": 4.748159980676383, + "learning_rate": 1.3071606108751475e-05, + "loss": 0.7932, "step": 8894 }, { - "epoch": 1.865170895365905, - "grad_norm": 7.599407840184925, - "learning_rate": 6.612556896260835e-06, - "loss": 0.9014, + "epoch": 1.2560011296244, + "grad_norm": 3.756027891269936, + "learning_rate": 1.307015531568606e-05, + "loss": 0.5866, "step": 8895 }, { - "epoch": 1.865380582931432, - "grad_norm": 4.525484553125013, - "learning_rate": 6.610426942814197e-06, - "loss": 0.379, + "epoch": 1.2561423326743857, + "grad_norm": 3.936574219668811, + "learning_rate": 1.3068704451271561e-05, + "loss": 0.6473, "step": 8896 }, { - "epoch": 1.8655902704969596, - "grad_norm": 9.280598504186766, - "learning_rate": 6.608297163089058e-06, - "loss": 0.9187, + "epoch": 1.2562835357243716, + "grad_norm": 3.061095836867893, + "learning_rate": 1.3067253515541696e-05, + "loss": 0.7049, "step": 8897 }, { - "epoch": 1.8657999580624869, - "grad_norm": 6.573195469618976, - "learning_rate": 6.606167557194574e-06, - "loss": 0.6383, + "epoch": 1.2564247387743575, + "grad_norm": 3.718967192780105, + "learning_rate": 1.3065802508530186e-05, + "loss": 0.657, "step": 8898 }, { - "epoch": 1.8660096456280142, - "grad_norm": 7.854213203816996, - "learning_rate": 6.604038125239889e-06, - "loss": 0.8125, + "epoch": 1.2565659418243433, + "grad_norm": 3.406167284703346, + "learning_rate": 1.3064351430270746e-05, + "loss": 0.6181, "step": 8899 }, { - "epoch": 1.8662193331935417, - "grad_norm": 5.8858506164386375, - "learning_rate": 6.601908867334139e-06, - "loss": 0.6233, + "epoch": 1.2567071448743292, + "grad_norm": 3.5380274967198018, + "learning_rate": 1.3062900280797104e-05, + "loss": 0.6316, "step": 8900 }, { - "epoch": 1.866429020759069, - "grad_norm": 7.595317667476934, - "learning_rate": 6.599779783586451e-06, - "loss": 0.6713, + "epoch": 1.256848347924315, + "grad_norm": 3.5325974670148925, + "learning_rate": 1.306144906014298e-05, + "loss": 0.6437, "step": 8901 }, { - "epoch": 1.8666387083245963, - "grad_norm": 6.614381674667473, - "learning_rate": 6.597650874105951e-06, - "loss": 0.5873, + "epoch": 1.256989550974301, + "grad_norm": 3.8264641770226255, + "learning_rate": 1.3059997768342108e-05, + "loss": 0.6781, "step": 8902 }, { - "epoch": 1.8668483958901239, - "grad_norm": 7.864690117658409, - "learning_rate": 6.595522139001742e-06, - "loss": 0.7024, + "epoch": 1.2571307540242869, + "grad_norm": 4.095870638503094, + "learning_rate": 1.3058546405428203e-05, + "loss": 0.8185, "step": 8903 }, { - "epoch": 1.867058083455651, - "grad_norm": 6.684074803413309, - "learning_rate": 6.59339357838293e-06, - "loss": 0.6293, + "epoch": 1.2572719570742728, + "grad_norm": 3.853095788380736, + "learning_rate": 1.3057094971435005e-05, + "loss": 0.6963, "step": 8904 }, { - "epoch": 1.8672677710211785, - "grad_norm": 7.279803440112739, - "learning_rate": 6.591265192358605e-06, - "loss": 0.7423, + "epoch": 1.2574131601242586, + "grad_norm": 3.7104930205725157, + "learning_rate": 1.3055643466396234e-05, + "loss": 0.6659, "step": 8905 }, { - "epoch": 1.8674774585867058, - "grad_norm": 7.306437518354194, - "learning_rate": 6.589136981037849e-06, - "loss": 0.7373, + "epoch": 1.2575543631742445, + "grad_norm": 2.8770009189194274, + "learning_rate": 1.3054191890345633e-05, + "loss": 0.4687, "step": 8906 }, { - "epoch": 1.867687146152233, - "grad_norm": 7.122241310280439, - "learning_rate": 6.5870089445297395e-06, - "loss": 0.5429, + "epoch": 1.2576955662242304, + "grad_norm": 4.742575777138068, + "learning_rate": 1.3052740243316932e-05, + "loss": 0.8066, "step": 8907 }, { - "epoch": 1.8678968337177606, - "grad_norm": 6.248476416581585, - "learning_rate": 6.584881082943344e-06, - "loss": 0.5769, + "epoch": 1.2578367692742163, + "grad_norm": 3.2747676847445075, + "learning_rate": 1.3051288525343866e-05, + "loss": 0.5463, "step": 8908 }, { - "epoch": 1.868106521283288, - "grad_norm": 6.976369911997928, - "learning_rate": 6.582753396387713e-06, - "loss": 0.7296, + "epoch": 1.2579779723242022, + "grad_norm": 3.9685279704623864, + "learning_rate": 1.304983673646017e-05, + "loss": 0.6982, "step": 8909 }, { - "epoch": 1.8683162088488152, - "grad_norm": 7.477179307019196, - "learning_rate": 6.580625884971896e-06, - "loss": 0.9902, + "epoch": 1.258119175374188, + "grad_norm": 3.5930840915197435, + "learning_rate": 1.3048384876699588e-05, + "loss": 0.6242, "step": 8910 }, { - "epoch": 1.8685258964143427, - "grad_norm": 8.760351687062585, - "learning_rate": 6.578498548804939e-06, - "loss": 1.032, + "epoch": 1.258260378424174, + "grad_norm": 3.9491200168461176, + "learning_rate": 1.3046932946095856e-05, + "loss": 0.6113, "step": 8911 }, { - "epoch": 1.8687355839798698, - "grad_norm": 6.207798507838127, - "learning_rate": 6.576371387995859e-06, - "loss": 0.7004, + "epoch": 1.2584015814741598, + "grad_norm": 3.317973328250457, + "learning_rate": 1.304548094468272e-05, + "loss": 0.5419, "step": 8912 }, { - "epoch": 1.8689452715453974, - "grad_norm": 5.4493282698491905, - "learning_rate": 6.574244402653687e-06, - "loss": 0.5798, + "epoch": 1.2585427845241457, + "grad_norm": 3.5908218535080647, + "learning_rate": 1.304402887249392e-05, + "loss": 0.6088, "step": 8913 }, { - "epoch": 1.8691549591109249, - "grad_norm": 6.83261381738648, - "learning_rate": 6.572117592887433e-06, - "loss": 0.7441, + "epoch": 1.2586839875741316, + "grad_norm": 3.8069201078017856, + "learning_rate": 1.3042576729563203e-05, + "loss": 0.5494, "step": 8914 }, { - "epoch": 1.869364646676452, - "grad_norm": 8.4741185941393, - "learning_rate": 6.5699909588060935e-06, - "loss": 0.7635, + "epoch": 1.2588251906241175, + "grad_norm": 3.9781355984075826, + "learning_rate": 1.3041124515924324e-05, + "loss": 0.7008, "step": 8915 }, { - "epoch": 1.8695743342419795, - "grad_norm": 6.144122827067461, - "learning_rate": 6.567864500518666e-06, - "loss": 0.687, + "epoch": 1.2589663936741033, + "grad_norm": 3.8632152272993405, + "learning_rate": 1.3039672231611015e-05, + "loss": 0.6686, "step": 8916 }, { - "epoch": 1.8697840218075068, - "grad_norm": 7.075808395191733, - "learning_rate": 6.565738218134141e-06, - "loss": 0.631, + "epoch": 1.2591075967240892, + "grad_norm": 3.793096450272382, + "learning_rate": 1.303821987665704e-05, + "loss": 0.5002, "step": 8917 }, { - "epoch": 1.869993709373034, - "grad_norm": 8.969218875262396, - "learning_rate": 6.563612111761484e-06, - "loss": 1.0271, + "epoch": 1.2592487997740751, + "grad_norm": 2.752665495213168, + "learning_rate": 1.3036767451096148e-05, + "loss": 0.4297, "step": 8918 }, { - "epoch": 1.8702033969385616, - "grad_norm": 6.363895689850094, - "learning_rate": 6.561486181509666e-06, - "loss": 0.6306, + "epoch": 1.259390002824061, + "grad_norm": 3.369686424265232, + "learning_rate": 1.3035314954962092e-05, + "loss": 0.5652, "step": 8919 }, { - "epoch": 1.870413084504089, - "grad_norm": 6.578826797911811, - "learning_rate": 6.559360427487649e-06, - "loss": 0.6168, + "epoch": 1.2595312058740469, + "grad_norm": 4.240867842961195, + "learning_rate": 1.3033862388288628e-05, + "loss": 0.5564, "step": 8920 }, { - "epoch": 1.8706227720696162, - "grad_norm": 7.048244666740947, - "learning_rate": 6.557234849804375e-06, - "loss": 0.7544, + "epoch": 1.2596724089240328, + "grad_norm": 3.2356365250206243, + "learning_rate": 1.3032409751109509e-05, + "loss": 0.5441, "step": 8921 }, { - "epoch": 1.8708324596351438, - "grad_norm": 6.909507666577538, - "learning_rate": 6.555109448568784e-06, - "loss": 0.7355, + "epoch": 1.2598136119740186, + "grad_norm": 3.8468139636304537, + "learning_rate": 1.30309570434585e-05, + "loss": 0.6802, "step": 8922 }, { - "epoch": 1.8710421472006709, - "grad_norm": 6.043428641168836, - "learning_rate": 6.552984223889813e-06, - "loss": 0.6072, + "epoch": 1.2599548150240045, + "grad_norm": 3.949462364771497, + "learning_rate": 1.3029504265369356e-05, + "loss": 0.5011, "step": 8923 }, { - "epoch": 1.8712518347661984, - "grad_norm": 6.927432719928769, - "learning_rate": 6.550859175876376e-06, - "loss": 0.7859, + "epoch": 1.2600960180739904, + "grad_norm": 2.8576362413757326, + "learning_rate": 1.3028051416875845e-05, + "loss": 0.4717, "step": 8924 }, { - "epoch": 1.8714615223317257, - "grad_norm": 5.750444733062134, - "learning_rate": 6.548734304637389e-06, - "loss": 0.5718, + "epoch": 1.2602372211239763, + "grad_norm": 4.894327819831198, + "learning_rate": 1.3026598498011721e-05, + "loss": 0.7425, "step": 8925 }, { - "epoch": 1.871671209897253, - "grad_norm": 6.7915582670454775, - "learning_rate": 6.546609610281758e-06, - "loss": 0.8597, + "epoch": 1.2603784241739622, + "grad_norm": 3.6381770204971158, + "learning_rate": 1.302514550881076e-05, + "loss": 0.6288, "step": 8926 }, { - "epoch": 1.8718808974627805, - "grad_norm": 6.122602169339049, - "learning_rate": 6.544485092918369e-06, - "loss": 0.6887, + "epoch": 1.260519627223948, + "grad_norm": 3.431979565034582, + "learning_rate": 1.3023692449306721e-05, + "loss": 0.59, "step": 8927 }, { - "epoch": 1.8720905850283078, - "grad_norm": 6.425600311955218, - "learning_rate": 6.542360752656114e-06, - "loss": 0.6948, + "epoch": 1.260660830273934, + "grad_norm": 3.7887198884522464, + "learning_rate": 1.3022239319533376e-05, + "loss": 0.5832, "step": 8928 }, { - "epoch": 1.8723002725938351, - "grad_norm": 8.614202927995255, - "learning_rate": 6.54023658960387e-06, - "loss": 0.9709, + "epoch": 1.2608020333239198, + "grad_norm": 4.005135336528074, + "learning_rate": 1.3020786119524495e-05, + "loss": 0.6775, "step": 8929 }, { - "epoch": 1.8725099601593627, - "grad_norm": 8.476560605176372, - "learning_rate": 6.538112603870499e-06, - "loss": 0.9771, + "epoch": 1.2609432363739057, + "grad_norm": 4.111172688075792, + "learning_rate": 1.3019332849313851e-05, + "loss": 0.6188, "step": 8930 }, { - "epoch": 1.87271964772489, - "grad_norm": 6.954862038486184, - "learning_rate": 6.5359887955648624e-06, - "loss": 0.6574, + "epoch": 1.2610844394238916, + "grad_norm": 3.4482134691506063, + "learning_rate": 1.301787950893521e-05, + "loss": 0.5794, "step": 8931 }, { - "epoch": 1.8729293352904173, - "grad_norm": 6.222568665802242, - "learning_rate": 6.53386516479581e-06, - "loss": 0.5438, + "epoch": 1.2612256424738775, + "grad_norm": 3.13623252154556, + "learning_rate": 1.301642609842236e-05, + "loss": 0.5171, "step": 8932 }, { - "epoch": 1.8731390228559448, - "grad_norm": 4.959865571404766, - "learning_rate": 6.531741711672175e-06, - "loss": 0.4911, + "epoch": 1.2613668455238634, + "grad_norm": 4.240374093672408, + "learning_rate": 1.3014972617809062e-05, + "loss": 0.652, "step": 8933 }, { - "epoch": 1.8733487104214719, - "grad_norm": 5.805135323788341, - "learning_rate": 6.529618436302794e-06, - "loss": 0.5746, + "epoch": 1.2615080485738492, + "grad_norm": 3.650117196717995, + "learning_rate": 1.3013519067129108e-05, + "loss": 0.5957, "step": 8934 }, { - "epoch": 1.8735583979869994, - "grad_norm": 7.684195461237127, - "learning_rate": 6.52749533879649e-06, - "loss": 0.7282, + "epoch": 1.2616492516238351, + "grad_norm": 3.56709103857253, + "learning_rate": 1.3012065446416272e-05, + "loss": 0.5668, "step": 8935 }, { - "epoch": 1.8737680855525267, - "grad_norm": 6.288204704747225, - "learning_rate": 6.5253724192620705e-06, - "loss": 0.7443, + "epoch": 1.261790454673821, + "grad_norm": 3.338455964870943, + "learning_rate": 1.3010611755704334e-05, + "loss": 0.5544, "step": 8936 }, { - "epoch": 1.873977773118054, - "grad_norm": 5.583061637872511, - "learning_rate": 6.523249677808339e-06, - "loss": 0.6514, + "epoch": 1.2619316577238069, + "grad_norm": 4.320475233328176, + "learning_rate": 1.3009157995027079e-05, + "loss": 0.6603, "step": 8937 }, { - "epoch": 1.8741874606835816, - "grad_norm": 7.115169158875418, - "learning_rate": 6.521127114544097e-06, - "loss": 0.6332, + "epoch": 1.2620728607737928, + "grad_norm": 3.6989088346249526, + "learning_rate": 1.3007704164418294e-05, + "loss": 0.6197, "step": 8938 }, { - "epoch": 1.8743971482491089, - "grad_norm": 6.803543682295695, - "learning_rate": 6.519004729578119e-06, - "loss": 0.7904, + "epoch": 1.2622140638237787, + "grad_norm": 3.9258328393111457, + "learning_rate": 1.3006250263911759e-05, + "loss": 0.5633, "step": 8939 }, { - "epoch": 1.8746068358146362, - "grad_norm": 6.8270308356618585, - "learning_rate": 6.516882523019188e-06, - "loss": 0.6484, + "epoch": 1.2623552668737645, + "grad_norm": 3.472715183538632, + "learning_rate": 1.3004796293541269e-05, + "loss": 0.5269, "step": 8940 }, { - "epoch": 1.8748165233801637, - "grad_norm": 7.705124357154914, - "learning_rate": 6.51476049497607e-06, - "loss": 0.7667, + "epoch": 1.2624964699237504, + "grad_norm": 3.3614711803971766, + "learning_rate": 1.3003342253340613e-05, + "loss": 0.6048, "step": 8941 }, { - "epoch": 1.8750262109456908, - "grad_norm": 6.5294676255434005, - "learning_rate": 6.512638645557521e-06, - "loss": 0.6332, + "epoch": 1.2626376729737363, + "grad_norm": 3.668901304355269, + "learning_rate": 1.3001888143343578e-05, + "loss": 0.62, "step": 8942 }, { - "epoch": 1.8752358985112183, - "grad_norm": 5.0371993155952905, - "learning_rate": 6.510516974872286e-06, - "loss": 0.5078, + "epoch": 1.2627788760237222, + "grad_norm": 3.8102469144223168, + "learning_rate": 1.3000433963583956e-05, + "loss": 0.6805, "step": 8943 }, { - "epoch": 1.8754455860767456, - "grad_norm": 6.004225104743536, - "learning_rate": 6.5083954830291126e-06, - "loss": 0.6194, + "epoch": 1.262920079073708, + "grad_norm": 3.5014917754074895, + "learning_rate": 1.2998979714095547e-05, + "loss": 0.5967, "step": 8944 }, { - "epoch": 1.875655273642273, - "grad_norm": 6.083101894981867, - "learning_rate": 6.506274170136726e-06, - "loss": 0.6356, + "epoch": 1.263061282123694, + "grad_norm": 3.4865129052020296, + "learning_rate": 1.2997525394912148e-05, + "loss": 0.5413, "step": 8945 }, { - "epoch": 1.8758649612078004, - "grad_norm": 7.193023671217823, - "learning_rate": 6.5041530363038475e-06, - "loss": 0.8813, + "epoch": 1.2632024851736798, + "grad_norm": 3.6064554367241075, + "learning_rate": 1.2996071006067548e-05, + "loss": 0.6061, "step": 8946 }, { - "epoch": 1.8760746487733277, - "grad_norm": 6.702513618808895, - "learning_rate": 6.50203208163919e-06, - "loss": 0.7228, + "epoch": 1.2633436882236655, + "grad_norm": 3.262145240240631, + "learning_rate": 1.2994616547595555e-05, + "loss": 0.4854, "step": 8947 }, { - "epoch": 1.876284336338855, - "grad_norm": 7.0157992711968555, - "learning_rate": 6.499911306251454e-06, - "loss": 0.7464, + "epoch": 1.2634848912736514, + "grad_norm": 3.674252208660445, + "learning_rate": 1.2993162019529963e-05, + "loss": 0.6715, "step": 8948 }, { - "epoch": 1.8764940239043826, - "grad_norm": 7.387384453011469, - "learning_rate": 6.497790710249332e-06, - "loss": 0.7829, + "epoch": 1.2636260943236373, + "grad_norm": 3.8027909909672575, + "learning_rate": 1.2991707421904582e-05, + "loss": 0.5725, "step": 8949 }, { - "epoch": 1.8767037114699099, - "grad_norm": 6.294736181396978, - "learning_rate": 6.495670293741514e-06, - "loss": 0.7263, + "epoch": 1.2637672973736231, + "grad_norm": 3.296533472974303, + "learning_rate": 1.2990252754753212e-05, + "loss": 0.4854, "step": 8950 }, { - "epoch": 1.8769133990354372, - "grad_norm": 6.967283317206903, - "learning_rate": 6.49355005683667e-06, - "loss": 0.603, + "epoch": 1.263908500423609, + "grad_norm": 3.8955632055364267, + "learning_rate": 1.2988798018109655e-05, + "loss": 0.6311, "step": 8951 }, { - "epoch": 1.8771230866009647, - "grad_norm": 6.950041571853424, - "learning_rate": 6.491429999643468e-06, - "loss": 0.4412, + "epoch": 1.264049703473595, + "grad_norm": 3.3606954751147717, + "learning_rate": 1.2987343212007728e-05, + "loss": 0.5515, "step": 8952 }, { - "epoch": 1.8773327741664918, - "grad_norm": 8.902568674112553, - "learning_rate": 6.489310122270564e-06, - "loss": 0.8974, + "epoch": 1.2641909065235808, + "grad_norm": 3.315289844199876, + "learning_rate": 1.2985888336481236e-05, + "loss": 0.6271, "step": 8953 }, { - "epoch": 1.8775424617320193, - "grad_norm": 7.679624562088054, - "learning_rate": 6.487190424826601e-06, - "loss": 0.6639, + "epoch": 1.2643321095735667, + "grad_norm": 5.377517995348369, + "learning_rate": 1.2984433391563984e-05, + "loss": 0.4782, "step": 8954 }, { - "epoch": 1.8777521492975466, - "grad_norm": 7.473587370459821, - "learning_rate": 6.485070907420225e-06, - "loss": 0.6834, + "epoch": 1.2644733126235526, + "grad_norm": 3.7162518336751615, + "learning_rate": 1.2982978377289792e-05, + "loss": 0.558, "step": 8955 }, { - "epoch": 1.877961836863074, - "grad_norm": 6.036893945044784, - "learning_rate": 6.482951570160063e-06, - "loss": 0.7007, + "epoch": 1.2646145156735384, + "grad_norm": 3.0820007244190912, + "learning_rate": 1.2981523293692468e-05, + "loss": 0.4931, "step": 8956 }, { - "epoch": 1.8781715244286015, - "grad_norm": 7.252405544719615, - "learning_rate": 6.480832413154732e-06, - "loss": 0.7026, + "epoch": 1.2647557187235243, + "grad_norm": 3.64601854661631, + "learning_rate": 1.2980068140805833e-05, + "loss": 0.65, "step": 8957 }, { - "epoch": 1.8783812119941288, - "grad_norm": 8.155894741500735, - "learning_rate": 6.47871343651284e-06, - "loss": 0.9691, + "epoch": 1.2648969217735102, + "grad_norm": 3.2652312135192028, + "learning_rate": 1.2978612918663702e-05, + "loss": 0.6184, "step": 8958 }, { - "epoch": 1.878590899559656, - "grad_norm": 6.890982968764576, - "learning_rate": 6.476594640342993e-06, - "loss": 0.6606, + "epoch": 1.265038124823496, + "grad_norm": 3.678370650149595, + "learning_rate": 1.2977157627299888e-05, + "loss": 0.6475, "step": 8959 }, { - "epoch": 1.8788005871251836, - "grad_norm": 8.726550712893387, - "learning_rate": 6.4744760247537855e-06, - "loss": 0.9447, + "epoch": 1.265179327873482, + "grad_norm": 3.2042596889359665, + "learning_rate": 1.297570226674822e-05, + "loss": 0.6045, "step": 8960 }, { - "epoch": 1.8790102746907107, - "grad_norm": 7.240029471173216, - "learning_rate": 6.472357589853793e-06, - "loss": 0.6056, + "epoch": 1.2653205309234679, + "grad_norm": 3.962067951560162, + "learning_rate": 1.2974246837042518e-05, + "loss": 0.7248, "step": 8961 }, { - "epoch": 1.8792199622562382, - "grad_norm": 7.882731099837918, - "learning_rate": 6.470239335751594e-06, - "loss": 0.9855, + "epoch": 1.2654617339734537, + "grad_norm": 3.9980907105036403, + "learning_rate": 1.2972791338216601e-05, + "loss": 0.6598, "step": 8962 }, { - "epoch": 1.8794296498217655, - "grad_norm": 7.565005743444662, - "learning_rate": 6.468121262555752e-06, - "loss": 0.8214, + "epoch": 1.2656029370234396, + "grad_norm": 3.4544667163195646, + "learning_rate": 1.2971335770304301e-05, + "loss": 0.5786, "step": 8963 }, { - "epoch": 1.8796393373872928, - "grad_norm": 6.6689163902282305, - "learning_rate": 6.466003370374817e-06, - "loss": 0.8282, + "epoch": 1.2657441400734255, + "grad_norm": 3.93353711886771, + "learning_rate": 1.2969880133339437e-05, + "loss": 0.6544, "step": 8964 }, { - "epoch": 1.8798490249528204, - "grad_norm": 6.993420022084548, - "learning_rate": 6.4638856593173416e-06, - "loss": 0.6934, + "epoch": 1.2658853431234114, + "grad_norm": 4.238039114949604, + "learning_rate": 1.2968424427355842e-05, + "loss": 0.7479, "step": 8965 }, { - "epoch": 1.8800587125183477, - "grad_norm": 5.133658428228081, - "learning_rate": 6.46176812949186e-06, - "loss": 0.4979, + "epoch": 1.2660265461733973, + "grad_norm": 3.145038751687595, + "learning_rate": 1.2966968652387348e-05, + "loss": 0.4958, "step": 8966 }, { - "epoch": 1.880268400083875, - "grad_norm": 7.017300011416189, - "learning_rate": 6.459650781006897e-06, - "loss": 0.7037, + "epoch": 1.2661677492233832, + "grad_norm": 4.183474409543424, + "learning_rate": 1.2965512808467782e-05, + "loss": 0.7165, "step": 8967 }, { - "epoch": 1.8804780876494025, - "grad_norm": 6.638147808313806, - "learning_rate": 6.45753361397097e-06, - "loss": 0.6873, + "epoch": 1.266308952273369, + "grad_norm": 3.6759148043059167, + "learning_rate": 1.2964056895630976e-05, + "loss": 0.6784, "step": 8968 }, { - "epoch": 1.8806877752149298, - "grad_norm": 8.36345720195135, - "learning_rate": 6.455416628492596e-06, - "loss": 1.0301, + "epoch": 1.266450155323355, + "grad_norm": 3.383925566923806, + "learning_rate": 1.2962600913910773e-05, + "loss": 0.5114, "step": 8969 }, { - "epoch": 1.880897462780457, - "grad_norm": 5.87060066920337, - "learning_rate": 6.4532998246802605e-06, - "loss": 0.576, + "epoch": 1.2665913583733408, + "grad_norm": 3.404124685983445, + "learning_rate": 1.2961144863341e-05, + "loss": 0.6093, "step": 8970 }, { - "epoch": 1.8811071503459846, - "grad_norm": 6.642853366081228, - "learning_rate": 6.451183202642463e-06, - "loss": 0.6483, + "epoch": 1.2667325614233267, + "grad_norm": 3.0597263951938305, + "learning_rate": 1.2959688743955502e-05, + "loss": 0.5727, "step": 8971 }, { - "epoch": 1.8813168379115117, - "grad_norm": 6.3328560021998355, - "learning_rate": 6.449066762487684e-06, - "loss": 0.6491, + "epoch": 1.2668737644733126, + "grad_norm": 3.400041641777541, + "learning_rate": 1.2958232555788115e-05, + "loss": 0.5637, "step": 8972 }, { - "epoch": 1.8815265254770392, - "grad_norm": 4.989911108874801, - "learning_rate": 6.4469505043243885e-06, - "loss": 0.4519, + "epoch": 1.2670149675232985, + "grad_norm": 3.4521489421962275, + "learning_rate": 1.2956776298872682e-05, + "loss": 0.5491, "step": 8973 }, { - "epoch": 1.8817362130425666, - "grad_norm": 6.32969834054392, - "learning_rate": 6.444834428261042e-06, - "loss": 0.6474, + "epoch": 1.2671561705732843, + "grad_norm": 2.8287607076633767, + "learning_rate": 1.2955319973243043e-05, + "loss": 0.5293, "step": 8974 }, { - "epoch": 1.8819459006080939, - "grad_norm": 7.57485872892723, - "learning_rate": 6.4427185344061e-06, - "loss": 0.6824, + "epoch": 1.2672973736232702, + "grad_norm": 3.6401511014648316, + "learning_rate": 1.2953863578933045e-05, + "loss": 0.5701, "step": 8975 }, { - "epoch": 1.8821555881736214, - "grad_norm": 6.728962601042951, - "learning_rate": 6.4406028228680005e-06, - "loss": 0.6553, + "epoch": 1.267438576673256, + "grad_norm": 3.6630020153107825, + "learning_rate": 1.2952407115976531e-05, + "loss": 0.6483, "step": 8976 }, { - "epoch": 1.8823652757391487, - "grad_norm": 6.691419482983556, - "learning_rate": 6.4384872937551805e-06, - "loss": 0.5701, + "epoch": 1.267579779723242, + "grad_norm": 4.065382577807174, + "learning_rate": 1.2950950584407353e-05, + "loss": 0.6482, "step": 8977 }, { - "epoch": 1.882574963304676, - "grad_norm": 7.614783066258959, - "learning_rate": 6.436371947176066e-06, - "loss": 0.9495, + "epoch": 1.2677209827732279, + "grad_norm": 4.326769035670086, + "learning_rate": 1.2949493984259355e-05, + "loss": 0.6315, "step": 8978 }, { - "epoch": 1.8827846508702035, - "grad_norm": 7.511072995463747, - "learning_rate": 6.434256783239068e-06, - "loss": 0.7676, + "epoch": 1.2678621858232137, + "grad_norm": 4.025754721347293, + "learning_rate": 1.2948037315566395e-05, + "loss": 0.6701, "step": 8979 }, { - "epoch": 1.8829943384357306, - "grad_norm": 6.835069916438593, - "learning_rate": 6.432141802052592e-06, - "loss": 0.7322, + "epoch": 1.2680033888731996, + "grad_norm": 3.5624685657430715, + "learning_rate": 1.294658057836232e-05, + "loss": 0.6098, "step": 8980 }, { - "epoch": 1.8832040260012581, - "grad_norm": 7.529890368778924, - "learning_rate": 6.430027003725041e-06, - "loss": 0.9392, + "epoch": 1.2681445919231855, + "grad_norm": 3.9543817995148505, + "learning_rate": 1.2945123772680983e-05, + "loss": 0.734, "step": 8981 }, { - "epoch": 1.8834137135667854, - "grad_norm": 5.697224226491581, - "learning_rate": 6.427912388364796e-06, - "loss": 0.5872, + "epoch": 1.2682857949731714, + "grad_norm": 5.084742692533595, + "learning_rate": 1.294366689855624e-05, + "loss": 0.5913, "step": 8982 }, { - "epoch": 1.8836234011323127, - "grad_norm": 6.838932593203239, - "learning_rate": 6.425797956080236e-06, - "loss": 0.8455, + "epoch": 1.2684269980231573, + "grad_norm": 3.4026778394307646, + "learning_rate": 1.2942209956021953e-05, + "loss": 0.559, "step": 8983 }, { - "epoch": 1.8838330886978403, - "grad_norm": 5.921217453019628, - "learning_rate": 6.423683706979733e-06, - "loss": 0.5809, + "epoch": 1.2685682010731432, + "grad_norm": 3.6524786099681075, + "learning_rate": 1.2940752945111977e-05, + "loss": 0.5666, "step": 8984 }, { - "epoch": 1.8840427762633676, - "grad_norm": 5.499811032758023, - "learning_rate": 6.421569641171638e-06, - "loss": 0.6489, + "epoch": 1.268709404123129, + "grad_norm": 3.298860884514903, + "learning_rate": 1.293929586586017e-05, + "loss": 0.606, "step": 8985 }, { - "epoch": 1.8842524638288949, - "grad_norm": 5.324100269166418, - "learning_rate": 6.4194557587643066e-06, - "loss": 0.5409, + "epoch": 1.268850607173115, + "grad_norm": 3.9303412608682318, + "learning_rate": 1.2937838718300398e-05, + "loss": 0.6429, "step": 8986 }, { - "epoch": 1.8844621513944224, - "grad_norm": 5.601625625036533, - "learning_rate": 6.417342059866081e-06, - "loss": 0.6347, + "epoch": 1.2689918102231008, + "grad_norm": 3.497938671828123, + "learning_rate": 1.2936381502466524e-05, + "loss": 0.6578, "step": 8987 }, { - "epoch": 1.8846718389599497, - "grad_norm": 6.789689469661332, - "learning_rate": 6.415228544585286e-06, - "loss": 0.7457, + "epoch": 1.2691330132730867, + "grad_norm": 3.758198179817365, + "learning_rate": 1.293492421839241e-05, + "loss": 0.6359, "step": 8988 }, { - "epoch": 1.884881526525477, - "grad_norm": 7.68154280789634, - "learning_rate": 6.413115213030243e-06, - "loss": 0.7901, + "epoch": 1.2692742163230726, + "grad_norm": 4.032020562309251, + "learning_rate": 1.2933466866111925e-05, + "loss": 0.6627, "step": 8989 }, { - "epoch": 1.8850912140910046, - "grad_norm": 6.549744468811412, - "learning_rate": 6.411002065309274e-06, - "loss": 0.611, + "epoch": 1.2694154193730585, + "grad_norm": 3.0089064763758873, + "learning_rate": 1.293200944565894e-05, + "loss": 0.533, "step": 8990 }, { - "epoch": 1.8853009016565316, - "grad_norm": 7.204557346747703, - "learning_rate": 6.408889101530666e-06, - "loss": 0.8142, + "epoch": 1.2695566224230443, + "grad_norm": 3.2256158405256214, + "learning_rate": 1.2930551957067318e-05, + "loss": 0.6249, "step": 8991 }, { - "epoch": 1.8855105892220592, - "grad_norm": 6.9965841679486935, - "learning_rate": 6.406776321802725e-06, - "loss": 0.737, + "epoch": 1.2696978254730302, + "grad_norm": 3.7546933467337946, + "learning_rate": 1.2929094400370936e-05, + "loss": 0.5833, "step": 8992 }, { - "epoch": 1.8857202767875865, - "grad_norm": 8.716060398973092, - "learning_rate": 6.404663726233728e-06, - "loss": 0.9585, + "epoch": 1.2698390285230161, + "grad_norm": 3.181103764849049, + "learning_rate": 1.2927636775603663e-05, + "loss": 0.4576, "step": 8993 }, { - "epoch": 1.8859299643531138, - "grad_norm": 5.231424842197686, - "learning_rate": 6.402551314931952e-06, - "loss": 0.5039, + "epoch": 1.269980231573002, + "grad_norm": 4.827523025444046, + "learning_rate": 1.2926179082799377e-05, + "loss": 0.8276, "step": 8994 }, { - "epoch": 1.8861396519186413, - "grad_norm": 6.459207999825532, - "learning_rate": 6.4004390880056566e-06, - "loss": 0.6143, + "epoch": 1.2701214346229879, + "grad_norm": 3.195125233736593, + "learning_rate": 1.2924721321991953e-05, + "loss": 0.5184, "step": 8995 }, { - "epoch": 1.8863493394841686, - "grad_norm": 7.482139476773287, - "learning_rate": 6.398327045563108e-06, - "loss": 0.8055, + "epoch": 1.2702626376729738, + "grad_norm": 4.325255856387945, + "learning_rate": 1.292326349321527e-05, + "loss": 0.803, "step": 8996 }, { - "epoch": 1.886559027049696, - "grad_norm": 6.874487298845809, - "learning_rate": 6.3962151877125385e-06, - "loss": 0.727, + "epoch": 1.2704038407229596, + "grad_norm": 3.2589357281435327, + "learning_rate": 1.2921805596503203e-05, + "loss": 0.5662, "step": 8997 }, { - "epoch": 1.8867687146152234, - "grad_norm": 6.786037002893921, - "learning_rate": 6.394103514562193e-06, - "loss": 0.5984, + "epoch": 1.2705450437729455, + "grad_norm": 3.1653797195248234, + "learning_rate": 1.2920347631889637e-05, + "loss": 0.5886, "step": 8998 }, { - "epoch": 1.8869784021807505, - "grad_norm": 8.457215285996815, - "learning_rate": 6.391992026220299e-06, - "loss": 0.8675, + "epoch": 1.2706862468229314, + "grad_norm": 3.290514737918978, + "learning_rate": 1.2918889599408453e-05, + "loss": 0.5716, "step": 8999 }, { - "epoch": 1.887188089746278, - "grad_norm": 7.716848580581355, - "learning_rate": 6.38988072279507e-06, - "loss": 0.7905, + "epoch": 1.2708274498729173, + "grad_norm": 3.685175409717053, + "learning_rate": 1.2917431499093538e-05, + "loss": 0.5248, "step": 9000 }, { - "epoch": 1.8873977773118054, - "grad_norm": 5.32908579320883, - "learning_rate": 6.387769604394713e-06, - "loss": 0.5501, + "epoch": 1.2709686529229032, + "grad_norm": 3.404081731664567, + "learning_rate": 1.2915973330978772e-05, + "loss": 0.5311, "step": 9001 }, { - "epoch": 1.8876074648773327, - "grad_norm": 6.497847530791774, - "learning_rate": 6.385658671127433e-06, - "loss": 0.6941, + "epoch": 1.271109855972889, + "grad_norm": 3.8071207311969104, + "learning_rate": 1.2914515095098043e-05, + "loss": 0.6472, "step": 9002 }, { - "epoch": 1.8878171524428602, - "grad_norm": 6.1307103975041, - "learning_rate": 6.383547923101412e-06, - "loss": 0.5342, + "epoch": 1.271251059022875, + "grad_norm": 3.470880049957385, + "learning_rate": 1.2913056791485246e-05, + "loss": 0.5813, "step": 9003 }, { - "epoch": 1.8880268400083875, - "grad_norm": 7.765491888854994, - "learning_rate": 6.3814373604248315e-06, - "loss": 0.7923, + "epoch": 1.2713922620728608, + "grad_norm": 3.377128356843363, + "learning_rate": 1.291159842017427e-05, + "loss": 0.5266, "step": 9004 }, { - "epoch": 1.8882365275739148, - "grad_norm": 6.143227148109602, - "learning_rate": 6.3793269832058665e-06, - "loss": 0.5646, + "epoch": 1.2715334651228467, + "grad_norm": 3.7512896580711477, + "learning_rate": 1.2910139981198997e-05, + "loss": 0.6997, "step": 9005 }, { - "epoch": 1.8884462151394423, - "grad_norm": 8.097047907607376, - "learning_rate": 6.377216791552668e-06, - "loss": 0.8071, + "epoch": 1.2716746681728326, + "grad_norm": 3.838302242630251, + "learning_rate": 1.290868147459333e-05, + "loss": 0.6298, "step": 9006 }, { - "epoch": 1.8886559027049696, - "grad_norm": 6.084921774434358, - "learning_rate": 6.375106785573395e-06, - "loss": 0.5932, + "epoch": 1.2718158712228185, + "grad_norm": 3.374726034915733, + "learning_rate": 1.2907222900391164e-05, + "loss": 0.5631, "step": 9007 }, { - "epoch": 1.888865590270497, - "grad_norm": 6.538946075069554, - "learning_rate": 6.372996965376188e-06, - "loss": 0.5712, + "epoch": 1.2719570742728044, + "grad_norm": 3.3580965615188574, + "learning_rate": 1.290576425862639e-05, + "loss": 0.657, "step": 9008 }, { - "epoch": 1.8890752778360245, - "grad_norm": 7.200732521790574, - "learning_rate": 6.370887331069176e-06, - "loss": 0.8045, + "epoch": 1.2720982773227902, + "grad_norm": 4.777898492589735, + "learning_rate": 1.2904305549332911e-05, + "loss": 0.7193, "step": 9009 }, { - "epoch": 1.8892849654015516, - "grad_norm": 8.601416704395712, - "learning_rate": 6.368777882760482e-06, - "loss": 0.7995, + "epoch": 1.2722394803727761, + "grad_norm": 3.6071499667920084, + "learning_rate": 1.2902846772544625e-05, + "loss": 0.5979, "step": 9010 }, { - "epoch": 1.889494652967079, - "grad_norm": 7.653629587078774, - "learning_rate": 6.366668620558221e-06, - "loss": 0.8363, + "epoch": 1.272380683422762, + "grad_norm": 3.1814571763100483, + "learning_rate": 1.2901387928295432e-05, + "loss": 0.5846, "step": 9011 }, { - "epoch": 1.8897043405326064, - "grad_norm": 5.238812819565244, - "learning_rate": 6.364559544570491e-06, - "loss": 0.4146, + "epoch": 1.2725218864727479, + "grad_norm": 3.705722505407158, + "learning_rate": 1.2899929016619241e-05, + "loss": 0.6566, "step": 9012 }, { - "epoch": 1.8899140280981337, - "grad_norm": 6.398572992222365, - "learning_rate": 6.362450654905392e-06, - "loss": 0.6192, + "epoch": 1.2726630895227338, + "grad_norm": 3.5763143552016503, + "learning_rate": 1.2898470037549951e-05, + "loss": 0.6745, "step": 9013 }, { - "epoch": 1.8901237156636612, - "grad_norm": 7.108089892596612, - "learning_rate": 6.360341951671009e-06, - "loss": 0.7132, + "epoch": 1.2728042925727197, + "grad_norm": 4.519392776891794, + "learning_rate": 1.2897010991121465e-05, + "loss": 0.5467, "step": 9014 }, { - "epoch": 1.8903334032291885, - "grad_norm": 7.204043090405134, - "learning_rate": 6.35823343497541e-06, - "loss": 0.8402, + "epoch": 1.2729454956227055, + "grad_norm": 3.497465990029531, + "learning_rate": 1.2895551877367697e-05, + "loss": 0.5674, "step": 9015 }, { - "epoch": 1.8905430907947158, - "grad_norm": 6.306860209044986, - "learning_rate": 6.356125104926663e-06, - "loss": 0.5375, + "epoch": 1.2730866986726914, + "grad_norm": 3.273343653483845, + "learning_rate": 1.2894092696322554e-05, + "loss": 0.5137, "step": 9016 }, { - "epoch": 1.8907527783602434, - "grad_norm": 6.794073080627332, - "learning_rate": 6.35401696163283e-06, - "loss": 0.77, + "epoch": 1.2732279017226773, + "grad_norm": 3.0075411063392696, + "learning_rate": 1.2892633448019949e-05, + "loss": 0.5048, "step": 9017 }, { - "epoch": 1.8909624659257704, - "grad_norm": 7.1303014602296475, - "learning_rate": 6.351909005201944e-06, - "loss": 0.7385, + "epoch": 1.2733691047726632, + "grad_norm": 3.522618824981709, + "learning_rate": 1.2891174132493792e-05, + "loss": 0.5525, "step": 9018 }, { - "epoch": 1.891172153491298, - "grad_norm": 6.500577542633357, - "learning_rate": 6.349801235742053e-06, - "loss": 0.6575, + "epoch": 1.273510307822649, + "grad_norm": 3.5560233088935638, + "learning_rate": 1.2889714749777991e-05, + "loss": 0.6299, "step": 9019 }, { - "epoch": 1.8913818410568253, - "grad_norm": 5.9824188618803396, - "learning_rate": 6.347693653361176e-06, - "loss": 0.593, + "epoch": 1.273651510872635, + "grad_norm": 3.6976794483143283, + "learning_rate": 1.288825529990647e-05, + "loss": 0.5337, "step": 9020 }, { - "epoch": 1.8915915286223526, - "grad_norm": 7.816785221501974, - "learning_rate": 6.34558625816734e-06, - "loss": 0.7875, + "epoch": 1.2737927139226208, + "grad_norm": 4.089550088145935, + "learning_rate": 1.2886795782913146e-05, + "loss": 0.6999, "step": 9021 }, { - "epoch": 1.89180121618788, - "grad_norm": 7.005855691641357, - "learning_rate": 6.343479050268542e-06, - "loss": 0.6356, + "epoch": 1.2739339169726067, + "grad_norm": 2.963432204737656, + "learning_rate": 1.2885336198831933e-05, + "loss": 0.4643, "step": 9022 }, { - "epoch": 1.8920109037534074, - "grad_norm": 7.156502321273942, - "learning_rate": 6.3413720297727855e-06, - "loss": 0.6618, + "epoch": 1.2740751200225926, + "grad_norm": 3.805806762094766, + "learning_rate": 1.2883876547696752e-05, + "loss": 0.5956, "step": 9023 }, { - "epoch": 1.8922205913189347, - "grad_norm": 6.894294714449833, - "learning_rate": 6.33926519678806e-06, - "loss": 0.802, + "epoch": 1.2742163230725785, + "grad_norm": 3.699386348450627, + "learning_rate": 1.2882416829541526e-05, + "loss": 0.5453, "step": 9024 }, { - "epoch": 1.8924302788844622, - "grad_norm": 7.455603504332197, - "learning_rate": 6.33715855142234e-06, - "loss": 0.7907, + "epoch": 1.2743575261225644, + "grad_norm": 3.982022060237142, + "learning_rate": 1.2880957044400178e-05, + "loss": 0.6503, "step": 9025 }, { - "epoch": 1.8926399664499896, - "grad_norm": 6.271804481529485, - "learning_rate": 6.3350520937835956e-06, - "loss": 0.6858, + "epoch": 1.2744987291725502, + "grad_norm": 3.735658804595784, + "learning_rate": 1.2879497192306634e-05, + "loss": 0.6317, "step": 9026 }, { - "epoch": 1.8928496540155169, - "grad_norm": 5.484111224763741, - "learning_rate": 6.332945823979795e-06, - "loss": 0.62, + "epoch": 1.2746399322225361, + "grad_norm": 2.9418784850845006, + "learning_rate": 1.2878037273294815e-05, + "loss": 0.4802, "step": 9027 }, { - "epoch": 1.8930593415810444, - "grad_norm": 6.78551177892579, - "learning_rate": 6.330839742118873e-06, - "loss": 0.7759, + "epoch": 1.274781135272522, + "grad_norm": 3.484275421998208, + "learning_rate": 1.2876577287398653e-05, + "loss": 0.6382, "step": 9028 }, { - "epoch": 1.8932690291465715, - "grad_norm": 6.993557637116799, - "learning_rate": 6.328733848308781e-06, - "loss": 0.6785, + "epoch": 1.274922338322508, + "grad_norm": 3.3357819990388866, + "learning_rate": 1.2875117234652078e-05, + "loss": 0.576, "step": 9029 }, { - "epoch": 1.893478716712099, - "grad_norm": 5.126076341952603, - "learning_rate": 6.3266281426574495e-06, - "loss": 0.478, + "epoch": 1.2750635413724938, + "grad_norm": 3.8040315235486735, + "learning_rate": 1.2873657115089022e-05, + "loss": 0.5232, "step": 9030 }, { - "epoch": 1.8936884042776263, - "grad_norm": 7.624656798502881, - "learning_rate": 6.324522625272793e-06, - "loss": 0.793, + "epoch": 1.2752047444224797, + "grad_norm": 3.7550747136052554, + "learning_rate": 1.2872196928743415e-05, + "loss": 0.619, "step": 9031 }, { - "epoch": 1.8938980918431536, - "grad_norm": 6.193206999668794, - "learning_rate": 6.322417296262726e-06, - "loss": 0.7363, + "epoch": 1.2753459474724653, + "grad_norm": 4.150598432962877, + "learning_rate": 1.2870736675649192e-05, + "loss": 0.6395, "step": 9032 }, { - "epoch": 1.8941077794086811, - "grad_norm": 6.639816099996433, - "learning_rate": 6.320312155735156e-06, - "loss": 0.6514, + "epoch": 1.2754871505224512, + "grad_norm": 4.946742700535504, + "learning_rate": 1.2869276355840288e-05, + "loss": 0.8017, "step": 9033 }, { - "epoch": 1.8943174669742084, - "grad_norm": 5.321986323911192, - "learning_rate": 6.318207203797967e-06, - "loss": 0.541, + "epoch": 1.275628353572437, + "grad_norm": 3.35923903874186, + "learning_rate": 1.286781596935064e-05, + "loss": 0.5291, "step": 9034 }, { - "epoch": 1.8945271545397357, - "grad_norm": 7.759774728467715, - "learning_rate": 6.316102440559044e-06, - "loss": 0.7734, + "epoch": 1.275769556622423, + "grad_norm": 4.194240938547909, + "learning_rate": 1.2866355516214189e-05, + "loss": 0.7017, "step": 9035 }, { - "epoch": 1.8947368421052633, - "grad_norm": 7.11020066043924, - "learning_rate": 6.3139978661262635e-06, - "loss": 0.5538, + "epoch": 1.2759107596724089, + "grad_norm": 4.221129370964834, + "learning_rate": 1.286489499646487e-05, + "loss": 0.7873, "step": 9036 }, { - "epoch": 1.8949465296707904, - "grad_norm": 7.362698812494282, - "learning_rate": 6.311893480607481e-06, - "loss": 0.7707, + "epoch": 1.2760519627223947, + "grad_norm": 3.946002483998895, + "learning_rate": 1.286343441013663e-05, + "loss": 0.6566, "step": 9037 }, { - "epoch": 1.8951562172363179, - "grad_norm": 5.28953207342459, - "learning_rate": 6.309789284110555e-06, - "loss": 0.5376, + "epoch": 1.2761931657723806, + "grad_norm": 3.8080867091565125, + "learning_rate": 1.2861973757263416e-05, + "loss": 0.6312, "step": 9038 }, { - "epoch": 1.8953659048018454, - "grad_norm": 6.50100437371527, - "learning_rate": 6.3076852767433325e-06, - "loss": 0.6038, + "epoch": 1.2763343688223665, + "grad_norm": 3.511239423643281, + "learning_rate": 1.2860513037879163e-05, + "loss": 0.5464, "step": 9039 }, { - "epoch": 1.8955755923673725, - "grad_norm": 6.21203372925816, - "learning_rate": 6.305581458613641e-06, - "loss": 0.5111, + "epoch": 1.2764755718723524, + "grad_norm": 3.6746233485778075, + "learning_rate": 1.2859052252017824e-05, + "loss": 0.5864, "step": 9040 }, { - "epoch": 1.8957852799329, - "grad_norm": 10.037091138073055, - "learning_rate": 6.303477829829305e-06, - "loss": 1.0874, + "epoch": 1.2766167749223383, + "grad_norm": 3.311392569297727, + "learning_rate": 1.2857591399713343e-05, + "loss": 0.6239, "step": 9041 }, { - "epoch": 1.8959949674984273, - "grad_norm": 6.714104149538097, - "learning_rate": 6.301374390498147e-06, - "loss": 0.8119, + "epoch": 1.2767579779723242, + "grad_norm": 4.298390151375422, + "learning_rate": 1.2856130480999673e-05, + "loss": 0.7454, "step": 9042 }, { - "epoch": 1.8962046550639546, - "grad_norm": 6.127757367450675, - "learning_rate": 6.299271140727962e-06, - "loss": 0.5786, + "epoch": 1.27689918102231, + "grad_norm": 3.1506816768221753, + "learning_rate": 1.2854669495910766e-05, + "loss": 0.5701, "step": 9043 }, { - "epoch": 1.8964143426294822, - "grad_norm": 7.763536760845901, - "learning_rate": 6.29716808062655e-06, - "loss": 0.7182, + "epoch": 1.277040384072296, + "grad_norm": 2.916827332024249, + "learning_rate": 1.2853208444480572e-05, + "loss": 0.4372, "step": 9044 }, { - "epoch": 1.8966240301950095, - "grad_norm": 5.608766144361198, - "learning_rate": 6.295065210301699e-06, - "loss": 0.5903, + "epoch": 1.2771815871222818, + "grad_norm": 3.6272905943783345, + "learning_rate": 1.2851747326743042e-05, + "loss": 0.6946, "step": 9045 }, { - "epoch": 1.8968337177605368, - "grad_norm": 8.022382414718525, - "learning_rate": 6.29296252986118e-06, - "loss": 0.8858, + "epoch": 1.2773227901722677, + "grad_norm": 3.8019574062878827, + "learning_rate": 1.2850286142732141e-05, + "loss": 0.6799, "step": 9046 }, { - "epoch": 1.8970434053260643, - "grad_norm": 6.3978998577802235, - "learning_rate": 6.290860039412758e-06, - "loss": 0.6672, + "epoch": 1.2774639932222536, + "grad_norm": 2.5473892670713205, + "learning_rate": 1.284882489248182e-05, + "loss": 0.4538, "step": 9047 }, { - "epoch": 1.8972530928915914, - "grad_norm": 6.595551143512827, - "learning_rate": 6.2887577390642e-06, - "loss": 0.7484, + "epoch": 1.2776051962722395, + "grad_norm": 3.1407306516241196, + "learning_rate": 1.2847363576026037e-05, + "loss": 0.5421, "step": 9048 }, { - "epoch": 1.897462780457119, - "grad_norm": 6.693288000848857, - "learning_rate": 6.286655628923237e-06, - "loss": 0.7635, + "epoch": 1.2777463993222253, + "grad_norm": 3.879403219381558, + "learning_rate": 1.2845902193398754e-05, + "loss": 0.6871, "step": 9049 }, { - "epoch": 1.8976724680226462, - "grad_norm": 6.703232260131536, - "learning_rate": 6.284553709097616e-06, - "loss": 0.6711, + "epoch": 1.2778876023722112, + "grad_norm": 3.159963253562537, + "learning_rate": 1.2844440744633934e-05, + "loss": 0.5396, "step": 9050 }, { - "epoch": 1.8978821555881735, - "grad_norm": 7.136691085561368, - "learning_rate": 6.282451979695063e-06, - "loss": 0.6758, + "epoch": 1.278028805422197, + "grad_norm": 4.778117223539895, + "learning_rate": 1.2842979229765542e-05, + "loss": 0.8533, "step": 9051 }, { - "epoch": 1.898091843153701, - "grad_norm": 6.155548977241294, - "learning_rate": 6.280350440823292e-06, - "loss": 0.6813, + "epoch": 1.278170008472183, + "grad_norm": 3.0857270106133434, + "learning_rate": 1.2841517648827538e-05, + "loss": 0.5128, "step": 9052 }, { - "epoch": 1.8983015307192284, - "grad_norm": 7.471725647580179, - "learning_rate": 6.278249092590009e-06, - "loss": 0.7158, + "epoch": 1.2783112115221689, + "grad_norm": 4.069958689198129, + "learning_rate": 1.284005600185389e-05, + "loss": 0.8294, "step": 9053 }, { - "epoch": 1.8985112182847557, - "grad_norm": 6.973128331656198, - "learning_rate": 6.276147935102922e-06, - "loss": 0.6108, + "epoch": 1.2784524145721547, + "grad_norm": 3.0031579853538606, + "learning_rate": 1.2838594288878567e-05, + "loss": 0.482, "step": 9054 }, { - "epoch": 1.8987209058502832, - "grad_norm": 5.605320781489347, - "learning_rate": 6.274046968469709e-06, - "loss": 0.5382, + "epoch": 1.2785936176221406, + "grad_norm": 3.7158121329497287, + "learning_rate": 1.283713250993554e-05, + "loss": 0.5901, "step": 9055 }, { - "epoch": 1.8989305934158105, - "grad_norm": 6.120858741148775, - "learning_rate": 6.271946192798051e-06, - "loss": 0.522, + "epoch": 1.2787348206721265, + "grad_norm": 4.463494988961276, + "learning_rate": 1.2835670665058779e-05, + "loss": 0.6481, "step": 9056 }, { - "epoch": 1.8991402809813378, - "grad_norm": 8.199832552751719, - "learning_rate": 6.269845608195617e-06, - "loss": 0.7982, + "epoch": 1.2788760237221124, + "grad_norm": 3.415631213538582, + "learning_rate": 1.2834208754282255e-05, + "loss": 0.6852, "step": 9057 }, { - "epoch": 1.8993499685468653, - "grad_norm": 8.509136923994387, - "learning_rate": 6.267745214770065e-06, - "loss": 0.9397, + "epoch": 1.2790172267720983, + "grad_norm": 4.882143141877675, + "learning_rate": 1.2832746777639947e-05, + "loss": 0.6252, "step": 9058 }, { - "epoch": 1.8995596561123924, - "grad_norm": 6.5282795742878035, - "learning_rate": 6.265645012629042e-06, - "loss": 0.7041, + "epoch": 1.2791584298220842, + "grad_norm": 3.3786757878391533, + "learning_rate": 1.2831284735165822e-05, + "loss": 0.6443, "step": 9059 }, { - "epoch": 1.89976934367792, - "grad_norm": 6.399206347320185, - "learning_rate": 6.263545001880192e-06, - "loss": 0.6985, + "epoch": 1.27929963287207, + "grad_norm": 3.3911009722904453, + "learning_rate": 1.2829822626893867e-05, + "loss": 0.6144, "step": 9060 }, { - "epoch": 1.8999790312434472, - "grad_norm": 7.4663703412715945, - "learning_rate": 6.261445182631141e-06, - "loss": 0.7214, + "epoch": 1.279440835922056, + "grad_norm": 3.345531011189538, + "learning_rate": 1.2828360452858055e-05, + "loss": 0.6042, "step": 9061 }, { - "epoch": 1.9001887188089746, - "grad_norm": 8.085544298811916, - "learning_rate": 6.259345554989507e-06, - "loss": 0.8153, + "epoch": 1.2795820389720418, + "grad_norm": 2.9206185831109264, + "learning_rate": 1.2826898213092365e-05, + "loss": 0.4402, "step": 9062 }, { - "epoch": 1.900398406374502, - "grad_norm": 5.400880713411227, - "learning_rate": 6.257246119062903e-06, - "loss": 0.5791, + "epoch": 1.2797232420220277, + "grad_norm": 3.8689221550079136, + "learning_rate": 1.2825435907630785e-05, + "loss": 0.6051, "step": 9063 }, { - "epoch": 1.9006080939400294, - "grad_norm": 5.738390552435382, - "learning_rate": 6.2551468749589235e-06, - "loss": 0.6487, + "epoch": 1.2798644450720136, + "grad_norm": 4.023273198520075, + "learning_rate": 1.2823973536507295e-05, + "loss": 0.5673, "step": 9064 }, { - "epoch": 1.9008177815055567, - "grad_norm": 7.129336315403556, - "learning_rate": 6.253047822785163e-06, - "loss": 0.8305, + "epoch": 1.2800056481219995, + "grad_norm": 3.7825713302075843, + "learning_rate": 1.2822511099755875e-05, + "loss": 0.5499, "step": 9065 }, { - "epoch": 1.9010274690710842, - "grad_norm": 6.626744924057898, - "learning_rate": 6.250948962649203e-06, - "loss": 0.6631, + "epoch": 1.2801468511719853, + "grad_norm": 3.6008399540236224, + "learning_rate": 1.282104859741052e-05, + "loss": 0.5746, "step": 9066 }, { - "epoch": 1.9012371566366113, - "grad_norm": 6.854769677739551, - "learning_rate": 6.24885029465861e-06, - "loss": 0.7244, + "epoch": 1.2802880542219712, + "grad_norm": 4.178942750261326, + "learning_rate": 1.281958602950521e-05, + "loss": 0.6172, "step": 9067 }, { - "epoch": 1.9014468442021388, - "grad_norm": 9.352684845972671, - "learning_rate": 6.246751818920942e-06, - "loss": 0.7809, + "epoch": 1.280429257271957, + "grad_norm": 3.6290002368569265, + "learning_rate": 1.2818123396073942e-05, + "loss": 0.5913, "step": 9068 }, { - "epoch": 1.9016565317676661, - "grad_norm": 6.71503066118461, - "learning_rate": 6.244653535543761e-06, - "loss": 0.7525, + "epoch": 1.280570460321943, + "grad_norm": 4.014921813384591, + "learning_rate": 1.2816660697150702e-05, + "loss": 0.6248, "step": 9069 }, { - "epoch": 1.9018662193331934, - "grad_norm": 7.055347972244156, - "learning_rate": 6.242555444634592e-06, - "loss": 0.664, + "epoch": 1.2807116633719289, + "grad_norm": 3.3017953832358713, + "learning_rate": 1.2815197932769486e-05, + "loss": 0.5486, "step": 9070 }, { - "epoch": 1.902075906898721, - "grad_norm": 7.641179472018461, - "learning_rate": 6.240457546300975e-06, - "loss": 0.7026, + "epoch": 1.2808528664219148, + "grad_norm": 3.1221616580412603, + "learning_rate": 1.2813735102964281e-05, + "loss": 0.5434, "step": 9071 }, { - "epoch": 1.9022855944642483, - "grad_norm": 6.216153024907154, - "learning_rate": 6.238359840650432e-06, + "epoch": 1.2809940694719006, + "grad_norm": 4.1161508168803405, + "learning_rate": 1.2812272207769092e-05, "loss": 0.6951, "step": 9072 }, { - "epoch": 1.9024952820297756, - "grad_norm": 5.107328948895102, - "learning_rate": 6.23626232779047e-06, - "loss": 0.4799, + "epoch": 1.2811352725218865, + "grad_norm": 3.822547768492929, + "learning_rate": 1.281080924721791e-05, + "loss": 0.7153, "step": 9073 }, { - "epoch": 1.902704969595303, - "grad_norm": 7.679785441145659, - "learning_rate": 6.234165007828587e-06, - "loss": 0.9029, + "epoch": 1.2812764755718724, + "grad_norm": 4.32537764969359, + "learning_rate": 1.2809346221344734e-05, + "loss": 0.7702, "step": 9074 }, { - "epoch": 1.9029146571608304, - "grad_norm": 8.234386008190128, - "learning_rate": 6.232067880872288e-06, - "loss": 0.6897, + "epoch": 1.2814176786218583, + "grad_norm": 3.82402094964537, + "learning_rate": 1.2807883130183565e-05, + "loss": 0.7318, "step": 9075 }, { - "epoch": 1.9031243447263577, - "grad_norm": 5.983965735223116, - "learning_rate": 6.229970947029038e-06, - "loss": 0.583, + "epoch": 1.2815588816718442, + "grad_norm": 3.8540534164318228, + "learning_rate": 1.2806419973768407e-05, + "loss": 0.6531, "step": 9076 }, { - "epoch": 1.9033340322918852, - "grad_norm": 6.5115682304780185, - "learning_rate": 6.2278742064063166e-06, - "loss": 0.5923, + "epoch": 1.28170008472183, + "grad_norm": 3.1890766880969954, + "learning_rate": 1.280495675213326e-05, + "loss": 0.5015, "step": 9077 }, { - "epoch": 1.9035437198574123, - "grad_norm": 8.604485664282944, - "learning_rate": 6.225777659111589e-06, - "loss": 0.8574, + "epoch": 1.281841287771816, + "grad_norm": 3.642016654278224, + "learning_rate": 1.280349346531213e-05, + "loss": 0.6078, "step": 9078 }, { - "epoch": 1.9037534074229399, - "grad_norm": 4.553535797560089, - "learning_rate": 6.2236813052522986e-06, - "loss": 0.4361, + "epoch": 1.2819824908218018, + "grad_norm": 4.863419029139658, + "learning_rate": 1.2802030113339016e-05, + "loss": 0.9011, "step": 9079 }, { - "epoch": 1.9039630949884672, - "grad_norm": 7.746790973335353, - "learning_rate": 6.221585144935889e-06, - "loss": 0.6398, + "epoch": 1.2821236938717877, + "grad_norm": 3.721077428242094, + "learning_rate": 1.2800566696247943e-05, + "loss": 0.6479, "step": 9080 }, { - "epoch": 1.9041727825539945, - "grad_norm": 6.303396338421388, - "learning_rate": 6.219489178269798e-06, - "loss": 0.7479, + "epoch": 1.2822648969217736, + "grad_norm": 3.832086077215903, + "learning_rate": 1.2799103214072902e-05, + "loss": 0.5806, "step": 9081 }, { - "epoch": 1.904382470119522, - "grad_norm": 6.791186542845121, - "learning_rate": 6.217393405361443e-06, - "loss": 0.7714, + "epoch": 1.2824060999717595, + "grad_norm": 3.8781451452458637, + "learning_rate": 1.2797639666847912e-05, + "loss": 0.657, "step": 9082 }, { - "epoch": 1.9045921576850493, - "grad_norm": 6.06417220320398, - "learning_rate": 6.215297826318236e-06, - "loss": 0.5004, + "epoch": 1.2825473030217451, + "grad_norm": 3.6559326162855412, + "learning_rate": 1.2796176054606987e-05, + "loss": 0.6045, "step": 9083 }, { - "epoch": 1.9048018452505766, - "grad_norm": 8.237523514351546, - "learning_rate": 6.213202441247577e-06, - "loss": 0.7543, + "epoch": 1.282688506071731, + "grad_norm": 3.635084095363264, + "learning_rate": 1.2794712377384135e-05, + "loss": 0.4881, "step": 9084 }, { - "epoch": 1.9050115328161041, - "grad_norm": 7.2654754123363094, - "learning_rate": 6.211107250256865e-06, - "loss": 0.7512, + "epoch": 1.282829709121717, + "grad_norm": 3.6237142931407873, + "learning_rate": 1.2793248635213378e-05, + "loss": 0.6144, "step": 9085 }, { - "epoch": 1.9052212203816312, - "grad_norm": 8.224161068590927, - "learning_rate": 6.2090122534534765e-06, - "loss": 0.8016, + "epoch": 1.2829709121717028, + "grad_norm": 3.7376196314230072, + "learning_rate": 1.2791784828128727e-05, + "loss": 0.5813, "step": 9086 }, { - "epoch": 1.9054309079471587, - "grad_norm": 7.429953410677938, - "learning_rate": 6.206917450944785e-06, - "loss": 0.924, + "epoch": 1.2831121152216887, + "grad_norm": 4.337537177584381, + "learning_rate": 1.2790320956164201e-05, + "loss": 0.7707, "step": 9087 }, { - "epoch": 1.905640595512686, - "grad_norm": 6.927919649637011, - "learning_rate": 6.204822842838154e-06, - "loss": 0.7153, + "epoch": 1.2832533182716745, + "grad_norm": 3.8976983203461235, + "learning_rate": 1.2788857019353821e-05, + "loss": 0.6553, "step": 9088 }, { - "epoch": 1.9058502830782134, - "grad_norm": 8.425167478977086, - "learning_rate": 6.202728429240931e-06, - "loss": 0.8736, + "epoch": 1.2833945213216604, + "grad_norm": 3.551887662402904, + "learning_rate": 1.2787393017731611e-05, + "loss": 0.6227, "step": 9089 }, { - "epoch": 1.9060599706437409, - "grad_norm": 7.377057161625221, - "learning_rate": 6.200634210260464e-06, - "loss": 0.8848, + "epoch": 1.2835357243716463, + "grad_norm": 4.93692179457243, + "learning_rate": 1.278592895133159e-05, + "loss": 0.587, "step": 9090 }, { - "epoch": 1.9062696582092682, - "grad_norm": 6.766126372115526, - "learning_rate": 6.198540186004086e-06, - "loss": 0.6415, + "epoch": 1.2836769274216322, + "grad_norm": 3.3096639248984863, + "learning_rate": 1.278446482018778e-05, + "loss": 0.6119, "step": 9091 }, { - "epoch": 1.9064793457747955, - "grad_norm": 8.728347245976163, - "learning_rate": 6.196446356579115e-06, - "loss": 1.0288, + "epoch": 1.283818130471618, + "grad_norm": 3.5142085273817085, + "learning_rate": 1.2783000624334214e-05, + "loss": 0.5337, "step": 9092 }, { - "epoch": 1.906689033340323, - "grad_norm": 6.619654272517235, - "learning_rate": 6.194352722092864e-06, - "loss": 0.7075, + "epoch": 1.283959333521604, + "grad_norm": 3.561899909836545, + "learning_rate": 1.2781536363804916e-05, + "loss": 0.6693, "step": 9093 }, { - "epoch": 1.9068987209058503, - "grad_norm": 6.564929765525703, - "learning_rate": 6.192259282652639e-06, - "loss": 0.7521, + "epoch": 1.2841005365715898, + "grad_norm": 3.134394405928417, + "learning_rate": 1.2780072038633913e-05, + "loss": 0.5643, "step": 9094 }, { - "epoch": 1.9071084084713776, - "grad_norm": 6.853229650318318, - "learning_rate": 6.190166038365725e-06, - "loss": 0.6715, + "epoch": 1.2842417396215757, + "grad_norm": 3.7054041950743053, + "learning_rate": 1.2778607648855234e-05, + "loss": 0.6389, "step": 9095 }, { - "epoch": 1.9073180960369052, - "grad_norm": 6.357485968036707, - "learning_rate": 6.188072989339414e-06, - "loss": 0.6732, + "epoch": 1.2843829426715616, + "grad_norm": 3.1891706803151396, + "learning_rate": 1.2777143194502915e-05, + "loss": 0.6225, "step": 9096 }, { - "epoch": 1.9075277836024322, - "grad_norm": 7.76001776586375, - "learning_rate": 6.185980135680975e-06, - "loss": 0.9084, + "epoch": 1.2845241457215475, + "grad_norm": 3.7730931467218536, + "learning_rate": 1.277567867561099e-05, + "loss": 0.6012, "step": 9097 }, { - "epoch": 1.9077374711679598, - "grad_norm": 6.895661462428787, - "learning_rate": 6.183887477497665e-06, - "loss": 0.6244, + "epoch": 1.2846653487715334, + "grad_norm": 3.5183364941382917, + "learning_rate": 1.2774214092213489e-05, + "loss": 0.6552, "step": 9098 }, { - "epoch": 1.907947158733487, - "grad_norm": 6.450003192705835, - "learning_rate": 6.181795014896741e-06, - "loss": 0.5019, + "epoch": 1.2848065518215193, + "grad_norm": 3.874592950812235, + "learning_rate": 1.2772749444344448e-05, + "loss": 0.5055, "step": 9099 }, { - "epoch": 1.9081568462990144, - "grad_norm": 5.571363813014145, - "learning_rate": 6.17970274798545e-06, - "loss": 0.4562, + "epoch": 1.2849477548715051, + "grad_norm": 3.255264845768958, + "learning_rate": 1.2771284732037912e-05, + "loss": 0.6486, "step": 9100 }, { - "epoch": 1.908366533864542, - "grad_norm": 7.382282277145855, - "learning_rate": 6.177610676871012e-06, - "loss": 0.823, + "epoch": 1.285088957921491, + "grad_norm": 3.877770314912597, + "learning_rate": 1.2769819955327913e-05, + "loss": 0.6116, "step": 9101 }, { - "epoch": 1.9085762214300692, - "grad_norm": 6.927303336290081, - "learning_rate": 6.175518801660659e-06, - "loss": 0.6553, + "epoch": 1.285230160971477, + "grad_norm": 3.612740206367866, + "learning_rate": 1.2768355114248493e-05, + "loss": 0.5899, "step": 9102 }, { - "epoch": 1.9087859089955965, - "grad_norm": 6.114409144164231, - "learning_rate": 6.173427122461604e-06, - "loss": 0.6102, + "epoch": 1.2853713640214628, + "grad_norm": 3.2431648960275097, + "learning_rate": 1.27668902088337e-05, + "loss": 0.5624, "step": 9103 }, { - "epoch": 1.908995596561124, - "grad_norm": 6.386235760732453, - "learning_rate": 6.171335639381042e-06, - "loss": 0.6282, + "epoch": 1.2855125670714487, + "grad_norm": 3.2387335645328728, + "learning_rate": 1.2765425239117572e-05, + "loss": 0.5617, "step": 9104 }, { - "epoch": 1.9092052841266511, - "grad_norm": 7.410881420993415, - "learning_rate": 6.169244352526167e-06, - "loss": 0.7806, + "epoch": 1.2856537701214346, + "grad_norm": 3.236788266527372, + "learning_rate": 1.2763960205134154e-05, + "loss": 0.478, "step": 9105 }, { - "epoch": 1.9094149716921787, - "grad_norm": 7.0515819374956, - "learning_rate": 6.167153262004171e-06, - "loss": 0.7283, + "epoch": 1.2857949731714204, + "grad_norm": 3.750706132297012, + "learning_rate": 1.2762495106917496e-05, + "loss": 0.6337, "step": 9106 }, { - "epoch": 1.909624659257706, - "grad_norm": 7.802412724221096, - "learning_rate": 6.16506236792221e-06, - "loss": 0.683, + "epoch": 1.2859361762214063, + "grad_norm": 4.048985172899322, + "learning_rate": 1.2761029944501646e-05, + "loss": 0.6661, "step": 9107 }, { - "epoch": 1.9098343468232333, - "grad_norm": 6.207539766096078, - "learning_rate": 6.162971670387457e-06, - "loss": 0.5298, + "epoch": 1.2860773792713922, + "grad_norm": 3.8888052252130394, + "learning_rate": 1.2759564717920649e-05, + "loss": 0.5669, "step": 9108 }, { - "epoch": 1.9100440343887608, - "grad_norm": 5.405935969148804, - "learning_rate": 6.160881169507062e-06, - "loss": 0.5624, + "epoch": 1.286218582321378, + "grad_norm": 2.915494251355418, + "learning_rate": 1.2758099427208561e-05, + "loss": 0.5163, "step": 9109 }, { - "epoch": 1.910253721954288, - "grad_norm": 6.652201333484288, - "learning_rate": 6.158790865388164e-06, - "loss": 0.6425, + "epoch": 1.286359785371364, + "grad_norm": 4.285217129704498, + "learning_rate": 1.2756634072399434e-05, + "loss": 0.6707, "step": 9110 }, { - "epoch": 1.9104634095198154, - "grad_norm": 6.6782549269991875, - "learning_rate": 6.156700758137895e-06, - "loss": 0.6581, + "epoch": 1.2865009884213499, + "grad_norm": 3.512441839670131, + "learning_rate": 1.275516865352732e-05, + "loss": 0.6144, "step": 9111 }, { - "epoch": 1.910673097085343, - "grad_norm": 7.728669079965136, - "learning_rate": 6.154610847863383e-06, - "loss": 0.7246, + "epoch": 1.2866421914713357, + "grad_norm": 3.2951732486540566, + "learning_rate": 1.2753703170626279e-05, + "loss": 0.5363, "step": 9112 }, { - "epoch": 1.9108827846508702, - "grad_norm": 6.87586869730982, - "learning_rate": 6.15252113467173e-06, - "loss": 0.6548, + "epoch": 1.2867833945213216, + "grad_norm": 2.729260865312269, + "learning_rate": 1.2752237623730365e-05, + "loss": 0.4523, "step": 9113 }, { - "epoch": 1.9110924722163976, - "grad_norm": 6.60880623956218, - "learning_rate": 6.150431618670043e-06, - "loss": 0.7322, + "epoch": 1.2869245975713075, + "grad_norm": 4.1942233778185445, + "learning_rate": 1.2750772012873637e-05, + "loss": 0.6665, "step": 9114 }, { - "epoch": 1.911302159781925, - "grad_norm": 6.818183484154013, - "learning_rate": 6.148342299965415e-06, - "loss": 0.9061, + "epoch": 1.2870658006212934, + "grad_norm": 3.7468447817708204, + "learning_rate": 1.2749306338090155e-05, + "loss": 0.5828, "step": 9115 }, { - "epoch": 1.9115118473474522, - "grad_norm": 7.52681811926945, - "learning_rate": 6.14625317866492e-06, - "loss": 0.7357, + "epoch": 1.2872070036712793, + "grad_norm": 3.7043649426765146, + "learning_rate": 1.2747840599413981e-05, + "loss": 0.5242, "step": 9116 }, { - "epoch": 1.9117215349129797, - "grad_norm": 6.348985101548476, - "learning_rate": 6.144164254875635e-06, - "loss": 0.7264, + "epoch": 1.2873482067212652, + "grad_norm": 3.5891298501327373, + "learning_rate": 1.2746374796879177e-05, + "loss": 0.6133, "step": 9117 }, { - "epoch": 1.911931222478507, - "grad_norm": 6.527068126025374, - "learning_rate": 6.142075528704623e-06, - "loss": 0.6643, + "epoch": 1.287489409771251, + "grad_norm": 4.172448338957914, + "learning_rate": 1.274490893051981e-05, + "loss": 0.6337, "step": 9118 }, { - "epoch": 1.9121409100440343, - "grad_norm": 8.975990707763494, - "learning_rate": 6.139987000258928e-06, - "loss": 0.8361, + "epoch": 1.287630612821237, + "grad_norm": 4.017832984296004, + "learning_rate": 1.2743443000369947e-05, + "loss": 0.7282, "step": 9119 }, { - "epoch": 1.9123505976095618, - "grad_norm": 10.790468247628679, - "learning_rate": 6.137898669645592e-06, - "loss": 1.1709, + "epoch": 1.2877718158712228, + "grad_norm": 3.138434748723435, + "learning_rate": 1.274197700646365e-05, + "loss": 0.5969, "step": 9120 }, { - "epoch": 1.9125602851750891, - "grad_norm": 6.688955881057216, - "learning_rate": 6.135810536971655e-06, - "loss": 0.7099, + "epoch": 1.2879130189212087, + "grad_norm": 3.2319800607921234, + "learning_rate": 1.2740510948834995e-05, + "loss": 0.5734, "step": 9121 }, { - "epoch": 1.9127699727406164, - "grad_norm": 7.025398559290096, - "learning_rate": 6.133722602344123e-06, - "loss": 0.8218, + "epoch": 1.2880542219711946, + "grad_norm": 3.5038270449090345, + "learning_rate": 1.2739044827518043e-05, + "loss": 0.6379, "step": 9122 }, { - "epoch": 1.912979660306144, - "grad_norm": 6.929604564781155, - "learning_rate": 6.131634865870015e-06, - "loss": 0.7349, + "epoch": 1.2881954250211805, + "grad_norm": 3.4222816116387684, + "learning_rate": 1.273757864254688e-05, + "loss": 0.6449, "step": 9123 }, { - "epoch": 1.913189347871671, - "grad_norm": 6.135038794538746, - "learning_rate": 6.129547327656333e-06, - "loss": 0.5375, + "epoch": 1.2883366280711663, + "grad_norm": 4.2249335261196235, + "learning_rate": 1.2736112393955567e-05, + "loss": 0.5842, "step": 9124 }, { - "epoch": 1.9133990354371986, - "grad_norm": 6.986841285048638, - "learning_rate": 6.127459987810061e-06, - "loss": 0.8427, + "epoch": 1.2884778311211522, + "grad_norm": 3.461448309290748, + "learning_rate": 1.273464608177818e-05, + "loss": 0.5505, "step": 9125 }, { - "epoch": 1.9136087230027259, - "grad_norm": 9.019622473092278, - "learning_rate": 6.1253728464381776e-06, - "loss": 0.7606, + "epoch": 1.288619034171138, + "grad_norm": 3.9934403814322406, + "learning_rate": 1.2733179706048805e-05, + "loss": 0.5846, "step": 9126 }, { - "epoch": 1.9138184105682532, - "grad_norm": 5.529049538362865, - "learning_rate": 6.123285903647665e-06, - "loss": 0.4932, + "epoch": 1.288760237221124, + "grad_norm": 3.4278466965991843, + "learning_rate": 1.2731713266801515e-05, + "loss": 0.5523, "step": 9127 }, { - "epoch": 1.9140280981337807, - "grad_norm": 5.395914430097848, - "learning_rate": 6.121199159545465e-06, - "loss": 0.4906, + "epoch": 1.2889014402711099, + "grad_norm": 3.8982328172273304, + "learning_rate": 1.2730246764070385e-05, + "loss": 0.6666, "step": 9128 }, { - "epoch": 1.914237785699308, - "grad_norm": 5.139612158245559, - "learning_rate": 6.119112614238538e-06, - "loss": 0.6098, + "epoch": 1.2890426433210957, + "grad_norm": 3.2968420413702773, + "learning_rate": 1.2728780197889502e-05, + "loss": 0.4832, "step": 9129 }, { - "epoch": 1.9144474732648353, - "grad_norm": 6.256769229062346, - "learning_rate": 6.117026267833823e-06, - "loss": 0.6793, + "epoch": 1.2891838463710816, + "grad_norm": 2.749232950974371, + "learning_rate": 1.2727313568292942e-05, + "loss": 0.408, "step": 9130 }, { - "epoch": 1.9146571608303629, - "grad_norm": 6.025461317973854, - "learning_rate": 6.114940120438244e-06, - "loss": 0.735, + "epoch": 1.2893250494210675, + "grad_norm": 3.4603073480395588, + "learning_rate": 1.2725846875314796e-05, + "loss": 0.703, "step": 9131 }, { - "epoch": 1.9148668483958902, - "grad_norm": 8.668052171174216, - "learning_rate": 6.112854172158721e-06, - "loss": 0.7462, + "epoch": 1.2894662524710534, + "grad_norm": 3.303766060637883, + "learning_rate": 1.2724380118989144e-05, + "loss": 0.5756, "step": 9132 }, { - "epoch": 1.9150765359614175, - "grad_norm": 6.422776824565058, - "learning_rate": 6.110768423102167e-06, - "loss": 0.668, + "epoch": 1.2896074555210393, + "grad_norm": 3.56451770569294, + "learning_rate": 1.2722913299350072e-05, + "loss": 0.5995, "step": 9133 }, { - "epoch": 1.915286223526945, - "grad_norm": 4.580057136796635, - "learning_rate": 6.108682873375476e-06, - "loss": 0.4537, + "epoch": 1.2897486585710252, + "grad_norm": 4.361508179535617, + "learning_rate": 1.2721446416431676e-05, + "loss": 0.7199, "step": 9134 }, { - "epoch": 1.915495911092472, - "grad_norm": 6.231359392442732, - "learning_rate": 6.106597523085535e-06, - "loss": 0.6518, + "epoch": 1.289889861621011, + "grad_norm": 3.6834623748311115, + "learning_rate": 1.271997947026804e-05, + "loss": 0.5984, "step": 9135 }, { - "epoch": 1.9157055986579996, - "grad_norm": 7.056009146623417, - "learning_rate": 6.104512372339227e-06, - "loss": 0.6164, + "epoch": 1.290031064670997, + "grad_norm": 3.3743725090140515, + "learning_rate": 1.271851246089325e-05, + "loss": 0.543, "step": 9136 }, { - "epoch": 1.915915286223527, - "grad_norm": 7.517923545632127, - "learning_rate": 6.102427421243411e-06, - "loss": 0.8008, + "epoch": 1.2901722677209828, + "grad_norm": 3.0500344111849484, + "learning_rate": 1.271704538834141e-05, + "loss": 0.5708, "step": 9137 }, { - "epoch": 1.9161249737890542, - "grad_norm": 6.809358798940432, - "learning_rate": 6.100342669904953e-06, - "loss": 0.6515, + "epoch": 1.2903134707709687, + "grad_norm": 2.9109490139650513, + "learning_rate": 1.2715578252646608e-05, + "loss": 0.4338, "step": 9138 }, { - "epoch": 1.9163346613545817, - "grad_norm": 6.937425089981554, - "learning_rate": 6.098258118430698e-06, - "loss": 0.7662, + "epoch": 1.2904546738209546, + "grad_norm": 3.0943686708591365, + "learning_rate": 1.2714111053842934e-05, + "loss": 0.5301, "step": 9139 }, { - "epoch": 1.916544348920109, - "grad_norm": 6.123212770255885, - "learning_rate": 6.09617376692748e-06, - "loss": 0.6688, + "epoch": 1.2905958768709405, + "grad_norm": 3.4863447653048922, + "learning_rate": 1.2712643791964501e-05, + "loss": 0.7131, "step": 9140 }, { - "epoch": 1.9167540364856364, - "grad_norm": 5.599001937833183, - "learning_rate": 6.094089615502127e-06, - "loss": 0.6122, + "epoch": 1.2907370799209263, + "grad_norm": 3.041717308363255, + "learning_rate": 1.2711176467045392e-05, + "loss": 0.547, "step": 9141 }, { - "epoch": 1.9169637240511639, - "grad_norm": 6.732902342326816, - "learning_rate": 6.092005664261453e-06, - "loss": 0.6461, + "epoch": 1.2908782829709122, + "grad_norm": 3.4656368821398758, + "learning_rate": 1.2709709079119712e-05, + "loss": 0.6446, "step": 9142 }, { - "epoch": 1.917173411616691, - "grad_norm": 7.902813312695726, - "learning_rate": 6.089921913312272e-06, - "loss": 0.8317, + "epoch": 1.291019486020898, + "grad_norm": 3.947719585980999, + "learning_rate": 1.2708241628221567e-05, + "loss": 0.5806, "step": 9143 }, { - "epoch": 1.9173830991822185, - "grad_norm": 6.641439167348805, - "learning_rate": 6.087838362761372e-06, - "loss": 0.6143, + "epoch": 1.291160689070884, + "grad_norm": 3.2697595091787504, + "learning_rate": 1.2706774114385054e-05, + "loss": 0.5626, "step": 9144 }, { - "epoch": 1.9175927867477458, - "grad_norm": 6.354099916116643, - "learning_rate": 6.085755012715542e-06, - "loss": 0.7456, + "epoch": 1.2913018921208699, + "grad_norm": 4.377270108459941, + "learning_rate": 1.270530653764428e-05, + "loss": 0.6703, "step": 9145 }, { - "epoch": 1.917802474313273, - "grad_norm": 7.355514843776557, - "learning_rate": 6.083671863281557e-06, - "loss": 0.6493, + "epoch": 1.2914430951708558, + "grad_norm": 4.202886227512236, + "learning_rate": 1.270383889803335e-05, + "loss": 0.6021, "step": 9146 }, { - "epoch": 1.9180121618788006, - "grad_norm": 7.357720894809341, - "learning_rate": 6.081588914566179e-06, - "loss": 0.7073, + "epoch": 1.2915842982208416, + "grad_norm": 3.7745778135809793, + "learning_rate": 1.2702371195586373e-05, + "loss": 0.6119, "step": 9147 }, { - "epoch": 1.918221849444328, - "grad_norm": 6.268968389622958, - "learning_rate": 6.079506166676168e-06, - "loss": 0.7148, + "epoch": 1.2917255012708275, + "grad_norm": 3.009342510453179, + "learning_rate": 1.2700903430337456e-05, + "loss": 0.4995, "step": 9148 }, { - "epoch": 1.9184315370098552, - "grad_norm": 8.406200567218905, - "learning_rate": 6.077423619718266e-06, - "loss": 0.8671, + "epoch": 1.2918667043208134, + "grad_norm": 3.900364454571682, + "learning_rate": 1.2699435602320709e-05, + "loss": 0.7363, "step": 9149 }, { - "epoch": 1.9186412245753828, - "grad_norm": 6.133066460945635, - "learning_rate": 6.075341273799205e-06, - "loss": 0.5847, + "epoch": 1.2920079073707993, + "grad_norm": 2.727029227241306, + "learning_rate": 1.2697967711570243e-05, + "loss": 0.4824, "step": 9150 }, { - "epoch": 1.91885091214091, - "grad_norm": 5.655222529902011, - "learning_rate": 6.073259129025709e-06, - "loss": 0.5088, + "epoch": 1.2921491104207852, + "grad_norm": 3.9514071111954103, + "learning_rate": 1.2696499758120173e-05, + "loss": 0.6633, "step": 9151 }, { - "epoch": 1.9190605997064374, - "grad_norm": 7.623272778112802, - "learning_rate": 6.0711771855045e-06, - "loss": 0.5804, + "epoch": 1.292290313470771, + "grad_norm": 3.9017390315751688, + "learning_rate": 1.2695031742004617e-05, + "loss": 0.6377, "step": 9152 }, { - "epoch": 1.919270287271965, - "grad_norm": 6.80615048826173, - "learning_rate": 6.069095443342268e-06, - "loss": 0.7016, + "epoch": 1.292431516520757, + "grad_norm": 3.7045476575352314, + "learning_rate": 1.2693563663257684e-05, + "loss": 0.6663, "step": 9153 }, { - "epoch": 1.919479974837492, - "grad_norm": 6.442726991262663, - "learning_rate": 6.067013902645713e-06, - "loss": 0.6521, + "epoch": 1.2925727195707428, + "grad_norm": 3.923140023803046, + "learning_rate": 1.2692095521913494e-05, + "loss": 0.7567, "step": 9154 }, { - "epoch": 1.9196896624030195, - "grad_norm": 8.394638438048586, - "learning_rate": 6.06493256352152e-06, - "loss": 0.7845, + "epoch": 1.2927139226207287, + "grad_norm": 3.2456361405594985, + "learning_rate": 1.2690627318006171e-05, + "loss": 0.5535, "step": 9155 }, { - "epoch": 1.9198993499685468, - "grad_norm": 6.531901041349821, - "learning_rate": 6.062851426076356e-06, - "loss": 0.7459, + "epoch": 1.2928551256707146, + "grad_norm": 4.517974292635989, + "learning_rate": 1.2689159051569824e-05, + "loss": 0.7345, "step": 9156 }, { - "epoch": 1.9201090375340741, - "grad_norm": 7.287788582350326, - "learning_rate": 6.060770490416883e-06, - "loss": 0.7302, + "epoch": 1.2929963287207005, + "grad_norm": 5.341343923925911, + "learning_rate": 1.2687690722638588e-05, + "loss": 0.8875, "step": 9157 }, { - "epoch": 1.9203187250996017, - "grad_norm": 6.288854307673709, - "learning_rate": 6.0586897566497605e-06, - "loss": 0.568, + "epoch": 1.2931375317706864, + "grad_norm": 3.649420995069708, + "learning_rate": 1.268622233124658e-05, + "loss": 0.6293, "step": 9158 }, { - "epoch": 1.920528412665129, - "grad_norm": 7.136806794945663, - "learning_rate": 6.056609224881619e-06, - "loss": 0.7276, + "epoch": 1.2932787348206722, + "grad_norm": 4.110481779041412, + "learning_rate": 1.2684753877427922e-05, + "loss": 0.9144, "step": 9159 }, { - "epoch": 1.9207381002306563, - "grad_norm": 6.2292382259840915, - "learning_rate": 6.054528895219095e-06, - "loss": 0.6155, + "epoch": 1.2934199378706581, + "grad_norm": 2.830973213421061, + "learning_rate": 1.2683285361216745e-05, + "loss": 0.4555, "step": 9160 }, { - "epoch": 1.9209477877961838, - "grad_norm": 7.176081663970161, - "learning_rate": 6.0524487677688106e-06, - "loss": 0.7813, + "epoch": 1.293561140920644, + "grad_norm": 3.402516639997098, + "learning_rate": 1.2681816782647176e-05, + "loss": 0.5348, "step": 9161 }, { - "epoch": 1.9211574753617109, - "grad_norm": 6.875262491677104, - "learning_rate": 6.050368842637371e-06, - "loss": 0.7492, + "epoch": 1.2937023439706299, + "grad_norm": 2.7525871910766337, + "learning_rate": 1.268034814175334e-05, + "loss": 0.4001, "step": 9162 }, { - "epoch": 1.9213671629272384, - "grad_norm": 6.00566162621194, - "learning_rate": 6.0482891199313765e-06, - "loss": 0.5621, + "epoch": 1.2938435470206158, + "grad_norm": 4.019184744946877, + "learning_rate": 1.2678879438569373e-05, + "loss": 0.6721, "step": 9163 }, { - "epoch": 1.921576850492766, - "grad_norm": 6.856406708752517, - "learning_rate": 6.046209599757423e-06, - "loss": 0.5873, + "epoch": 1.2939847500706017, + "grad_norm": 3.9479831659107307, + "learning_rate": 1.2677410673129406e-05, + "loss": 0.6557, "step": 9164 }, { - "epoch": 1.921786538058293, - "grad_norm": 7.977876291141394, - "learning_rate": 6.0441302822220825e-06, - "loss": 0.8558, + "epoch": 1.2941259531205875, + "grad_norm": 3.162731753157139, + "learning_rate": 1.267594184546757e-05, + "loss": 0.5178, "step": 9165 }, { - "epoch": 1.9219962256238206, - "grad_norm": 6.498539129163264, - "learning_rate": 6.042051167431928e-06, - "loss": 0.6293, + "epoch": 1.2942671561705734, + "grad_norm": 3.367463749031155, + "learning_rate": 1.2674472955618001e-05, + "loss": 0.5941, "step": 9166 }, { - "epoch": 1.9222059131893479, - "grad_norm": 9.38095639710982, - "learning_rate": 6.039972255493516e-06, - "loss": 0.8867, + "epoch": 1.2944083592205593, + "grad_norm": 3.5275740142043266, + "learning_rate": 1.2673004003614835e-05, + "loss": 0.5644, "step": 9167 }, { - "epoch": 1.9224156007548752, - "grad_norm": 8.256527493500366, - "learning_rate": 6.03789354651339e-06, - "loss": 0.8981, + "epoch": 1.294549562270545, + "grad_norm": 3.7123388426804573, + "learning_rate": 1.2671534989492209e-05, + "loss": 0.6172, "step": 9168 }, { - "epoch": 1.9226252883204027, - "grad_norm": 6.636979131940089, - "learning_rate": 6.0358150405980944e-06, - "loss": 0.7353, + "epoch": 1.2946907653205308, + "grad_norm": 3.4731283051624553, + "learning_rate": 1.2670065913284268e-05, + "loss": 0.5004, "step": 9169 }, { - "epoch": 1.92283497588593, - "grad_norm": 8.909903698117736, - "learning_rate": 6.033736737854157e-06, - "loss": 0.8786, + "epoch": 1.2948319683705167, + "grad_norm": 3.9616396078989093, + "learning_rate": 1.2668596775025143e-05, + "loss": 0.5446, "step": 9170 }, { - "epoch": 1.9230446634514573, - "grad_norm": 8.225122566219394, - "learning_rate": 6.031658638388087e-06, - "loss": 0.7735, + "epoch": 1.2949731714205026, + "grad_norm": 3.198145166284306, + "learning_rate": 1.2667127574748985e-05, + "loss": 0.5033, "step": 9171 }, { - "epoch": 1.9232543510169848, - "grad_norm": 6.640383690488738, - "learning_rate": 6.0295807423063935e-06, - "loss": 0.6074, + "epoch": 1.2951143744704885, + "grad_norm": 3.6393317998630703, + "learning_rate": 1.2665658312489932e-05, + "loss": 0.6497, "step": 9172 }, { - "epoch": 1.923464038582512, - "grad_norm": 7.302975957444926, - "learning_rate": 6.0275030497155775e-06, - "loss": 0.6412, + "epoch": 1.2952555775204744, + "grad_norm": 3.4451994435918323, + "learning_rate": 1.2664188988282134e-05, + "loss": 0.6279, "step": 9173 }, { - "epoch": 1.9236737261480394, - "grad_norm": 5.311622440560782, - "learning_rate": 6.025425560722115e-06, - "loss": 0.4873, + "epoch": 1.2953967805704603, + "grad_norm": 3.0543916530372415, + "learning_rate": 1.2662719602159732e-05, + "loss": 0.5374, "step": 9174 }, { - "epoch": 1.9238834137135667, - "grad_norm": 8.505035951352397, - "learning_rate": 6.023348275432488e-06, - "loss": 0.7454, + "epoch": 1.2955379836204461, + "grad_norm": 3.995415561467427, + "learning_rate": 1.2661250154156876e-05, + "loss": 0.6717, "step": 9175 }, { - "epoch": 1.924093101279094, - "grad_norm": 7.182864645319939, - "learning_rate": 6.021271193953162e-06, - "loss": 0.5788, + "epoch": 1.295679186670432, + "grad_norm": 3.3638788769976866, + "learning_rate": 1.2659780644307718e-05, + "loss": 0.5538, "step": 9176 }, { - "epoch": 1.9243027888446216, - "grad_norm": 7.365906933789078, - "learning_rate": 6.019194316390586e-06, - "loss": 0.7115, + "epoch": 1.295820389720418, + "grad_norm": 3.754253857857848, + "learning_rate": 1.2658311072646403e-05, + "loss": 0.5385, "step": 9177 }, { - "epoch": 1.9245124764101489, - "grad_norm": 6.874371495020125, - "learning_rate": 6.017117642851203e-06, - "loss": 0.6019, + "epoch": 1.2959615927704038, + "grad_norm": 3.747825199381283, + "learning_rate": 1.2656841439207093e-05, + "loss": 0.7271, "step": 9178 }, { - "epoch": 1.9247221639756762, - "grad_norm": 5.652872581992205, - "learning_rate": 6.015041173441455e-06, - "loss": 0.5893, + "epoch": 1.2961027958203897, + "grad_norm": 4.380742937847543, + "learning_rate": 1.2655371744023927e-05, + "loss": 0.7049, "step": 9179 }, { - "epoch": 1.9249318515412037, - "grad_norm": 8.205954617252523, - "learning_rate": 6.012964908267752e-06, - "loss": 0.8125, + "epoch": 1.2962439988703756, + "grad_norm": 3.0658416465473817, + "learning_rate": 1.2653901987131074e-05, + "loss": 0.4793, "step": 9180 }, { - "epoch": 1.925141539106731, - "grad_norm": 6.051682343565646, - "learning_rate": 6.010888847436516e-06, - "loss": 0.5219, + "epoch": 1.2963852019203614, + "grad_norm": 3.7528761518649056, + "learning_rate": 1.2652432168562686e-05, + "loss": 0.6789, "step": 9181 }, { - "epoch": 1.9253512266722583, - "grad_norm": 6.202107529977549, - "learning_rate": 6.008812991054145e-06, - "loss": 0.7788, + "epoch": 1.2965264049703473, + "grad_norm": 4.1413207492653, + "learning_rate": 1.2650962288352916e-05, + "loss": 0.7074, "step": 9182 }, { - "epoch": 1.9255609142377859, - "grad_norm": 8.115459329767129, - "learning_rate": 6.00673733922703e-06, - "loss": 0.8739, + "epoch": 1.2966676080203332, + "grad_norm": 3.346290887816353, + "learning_rate": 1.264949234653593e-05, + "loss": 0.4921, "step": 9183 }, { - "epoch": 1.925770601803313, - "grad_norm": 5.62990360487765, - "learning_rate": 6.004661892061549e-06, - "loss": 0.5218, + "epoch": 1.296808811070319, + "grad_norm": 3.0799745957867244, + "learning_rate": 1.2648022343145885e-05, + "loss": 0.5031, "step": 9184 }, { - "epoch": 1.9259802893688405, - "grad_norm": 6.9834884602544625, - "learning_rate": 6.002586649664084e-06, - "loss": 0.7865, + "epoch": 1.296950014120305, + "grad_norm": 4.614755748817048, + "learning_rate": 1.2646552278216944e-05, + "loss": 0.6786, "step": 9185 }, { - "epoch": 1.9261899769343678, - "grad_norm": 7.1938287846980495, - "learning_rate": 6.000511612140979e-06, - "loss": 0.6954, + "epoch": 1.2970912171702909, + "grad_norm": 3.9530908809668714, + "learning_rate": 1.2645082151783272e-05, + "loss": 0.7993, "step": 9186 }, { - "epoch": 1.926399664499895, - "grad_norm": 6.332940468764646, - "learning_rate": 5.998436779598593e-06, - "loss": 0.6354, + "epoch": 1.2972324202202767, + "grad_norm": 3.799183263641042, + "learning_rate": 1.264361196387903e-05, + "loss": 0.5602, "step": 9187 }, { - "epoch": 1.9266093520654226, - "grad_norm": 7.798446494274803, - "learning_rate": 5.996362152143264e-06, - "loss": 0.6095, + "epoch": 1.2973736232702626, + "grad_norm": 3.735304362747171, + "learning_rate": 1.2642141714538391e-05, + "loss": 0.5967, "step": 9188 }, { - "epoch": 1.92681903963095, - "grad_norm": 9.065076775109011, - "learning_rate": 5.994287729881316e-06, - "loss": 1.0213, + "epoch": 1.2975148263202485, + "grad_norm": 3.35732043957424, + "learning_rate": 1.2640671403795519e-05, + "loss": 0.6186, "step": 9189 }, { - "epoch": 1.9270287271964772, - "grad_norm": 9.302297919705035, - "learning_rate": 5.99221351291907e-06, - "loss": 0.8142, + "epoch": 1.2976560293702344, + "grad_norm": 3.4756560697122523, + "learning_rate": 1.2639201031684584e-05, + "loss": 0.6093, "step": 9190 }, { - "epoch": 1.9272384147620047, - "grad_norm": 6.5626044903679, - "learning_rate": 5.990139501362834e-06, - "loss": 0.5153, + "epoch": 1.2977972324202203, + "grad_norm": 3.4846579296438693, + "learning_rate": 1.2637730598239756e-05, + "loss": 0.6051, "step": 9191 }, { - "epoch": 1.9274481023275318, - "grad_norm": 6.240116425117373, - "learning_rate": 5.988065695318902e-06, - "loss": 0.5713, + "epoch": 1.2979384354702062, + "grad_norm": 3.241275930240864, + "learning_rate": 1.2636260103495209e-05, + "loss": 0.5586, "step": 9192 }, { - "epoch": 1.9276577898930594, - "grad_norm": 6.130376204925679, - "learning_rate": 5.9859920948935624e-06, - "loss": 0.5022, + "epoch": 1.298079638520192, + "grad_norm": 3.0768233031250234, + "learning_rate": 1.2634789547485114e-05, + "loss": 0.4239, "step": 9193 }, { - "epoch": 1.9278674774585867, - "grad_norm": 7.957710147729744, - "learning_rate": 5.98391870019309e-06, - "loss": 0.5867, + "epoch": 1.298220841570178, + "grad_norm": 3.1098997348543573, + "learning_rate": 1.2633318930243647e-05, + "loss": 0.5366, "step": 9194 }, { - "epoch": 1.928077165024114, - "grad_norm": 7.014091963790038, - "learning_rate": 5.9818455113237475e-06, - "loss": 0.5082, + "epoch": 1.2983620446201638, + "grad_norm": 3.9755980212009345, + "learning_rate": 1.2631848251804992e-05, + "loss": 0.6447, "step": 9195 }, { - "epoch": 1.9282868525896415, - "grad_norm": 7.8688242984343955, - "learning_rate": 5.979772528391793e-06, - "loss": 0.909, + "epoch": 1.2985032476701497, + "grad_norm": 3.9644059849144475, + "learning_rate": 1.2630377512203314e-05, + "loss": 0.7631, "step": 9196 }, { - "epoch": 1.9284965401551688, - "grad_norm": 9.609443192317569, - "learning_rate": 5.9776997515034705e-06, - "loss": 0.8842, + "epoch": 1.2986444507201356, + "grad_norm": 3.22762014733013, + "learning_rate": 1.26289067114728e-05, + "loss": 0.5078, "step": 9197 }, { - "epoch": 1.928706227720696, - "grad_norm": 4.919427128259844, - "learning_rate": 5.975627180765011e-06, - "loss": 0.4792, + "epoch": 1.2987856537701215, + "grad_norm": 2.6559142763370303, + "learning_rate": 1.2627435849647629e-05, + "loss": 0.4428, "step": 9198 }, { - "epoch": 1.9289159152862236, - "grad_norm": 6.855620883275216, - "learning_rate": 5.9735548162826365e-06, - "loss": 0.5523, + "epoch": 1.2989268568201073, + "grad_norm": 4.249009344261088, + "learning_rate": 1.2625964926761984e-05, + "loss": 0.6148, "step": 9199 }, { - "epoch": 1.929125602851751, - "grad_norm": 8.82945972792503, - "learning_rate": 5.971482658162566e-06, - "loss": 0.8178, + "epoch": 1.2990680598700932, + "grad_norm": 3.423248820356446, + "learning_rate": 1.262449394285005e-05, + "loss": 0.5887, "step": 9200 }, { - "epoch": 1.9293352904172782, - "grad_norm": 6.8894152690381425, - "learning_rate": 5.969410706510992e-06, - "loss": 0.6986, + "epoch": 1.299209262920079, + "grad_norm": 4.102646208948321, + "learning_rate": 1.2623022897946008e-05, + "loss": 0.6299, "step": 9201 }, { - "epoch": 1.9295449779828058, - "grad_norm": 6.184587592156968, - "learning_rate": 5.96733896143411e-06, - "loss": 0.5876, + "epoch": 1.299350465970065, + "grad_norm": 3.187380673913972, + "learning_rate": 1.2621551792084045e-05, + "loss": 0.5114, "step": 9202 }, { - "epoch": 1.9297546655483329, - "grad_norm": 5.809723403344811, - "learning_rate": 5.965267423038103e-06, - "loss": 0.5792, + "epoch": 1.2994916690200509, + "grad_norm": 3.753509560109683, + "learning_rate": 1.2620080625298355e-05, + "loss": 0.5615, "step": 9203 }, { - "epoch": 1.9299643531138604, - "grad_norm": 6.877603480359358, - "learning_rate": 5.96319609142914e-06, - "loss": 0.6381, + "epoch": 1.2996328720700367, + "grad_norm": 3.608135438769864, + "learning_rate": 1.2618609397623121e-05, + "loss": 0.6428, "step": 9204 }, { - "epoch": 1.9301740406793877, - "grad_norm": 9.635570910052625, - "learning_rate": 5.961124966713373e-06, - "loss": 0.8593, + "epoch": 1.2997740751200226, + "grad_norm": 3.3244461554486846, + "learning_rate": 1.2617138109092533e-05, + "loss": 0.4924, "step": 9205 }, { - "epoch": 1.930383728244915, - "grad_norm": 8.527611782223218, - "learning_rate": 5.95905404899696e-06, - "loss": 0.7332, + "epoch": 1.2999152781700085, + "grad_norm": 3.524588630677718, + "learning_rate": 1.2615666759740788e-05, + "loss": 0.5365, "step": 9206 }, { - "epoch": 1.9305934158104425, - "grad_norm": 7.99365304815592, - "learning_rate": 5.956983338386036e-06, - "loss": 0.8465, + "epoch": 1.3000564812199944, + "grad_norm": 3.762455366243628, + "learning_rate": 1.2614195349602076e-05, + "loss": 0.6073, "step": 9207 }, { - "epoch": 1.9308031033759698, - "grad_norm": 5.610793084051084, - "learning_rate": 5.954912834986728e-06, - "loss": 0.5189, + "epoch": 1.3001976842699803, + "grad_norm": 4.182315209182354, + "learning_rate": 1.2612723878710594e-05, + "loss": 0.5632, "step": 9208 }, { - "epoch": 1.9310127909414971, - "grad_norm": 5.853094618494927, - "learning_rate": 5.95284253890515e-06, - "loss": 0.4658, + "epoch": 1.3003388873199662, + "grad_norm": 3.427077512354995, + "learning_rate": 1.2611252347100538e-05, + "loss": 0.5562, "step": 9209 }, { - "epoch": 1.9312224785070247, - "grad_norm": 6.546092581967956, - "learning_rate": 5.950772450247417e-06, - "loss": 0.694, + "epoch": 1.300480090369952, + "grad_norm": 4.103940312370206, + "learning_rate": 1.2609780754806105e-05, + "loss": 0.6116, "step": 9210 }, { - "epoch": 1.9314321660725517, - "grad_norm": 7.602615479787789, - "learning_rate": 5.948702569119615e-06, - "loss": 0.7681, + "epoch": 1.300621293419938, + "grad_norm": 3.5965243412444896, + "learning_rate": 1.2608309101861491e-05, + "loss": 0.5912, "step": 9211 }, { - "epoch": 1.9316418536380793, - "grad_norm": 7.713090827354776, - "learning_rate": 5.946632895627833e-06, - "loss": 0.7491, + "epoch": 1.3007624964699238, + "grad_norm": 3.547172590395529, + "learning_rate": 1.2606837388300905e-05, + "loss": 0.6037, "step": 9212 }, { - "epoch": 1.9318515412036066, - "grad_norm": 6.00339158010869, - "learning_rate": 5.944563429878147e-06, - "loss": 0.5661, + "epoch": 1.3009036995199097, + "grad_norm": 3.3507083359123646, + "learning_rate": 1.260536561415854e-05, + "loss": 0.5831, "step": 9213 }, { - "epoch": 1.9320612287691339, - "grad_norm": 8.213204873385854, - "learning_rate": 5.942494171976619e-06, - "loss": 0.8473, + "epoch": 1.3010449025698956, + "grad_norm": 3.3867300675965653, + "learning_rate": 1.2603893779468604e-05, + "loss": 0.5604, "step": 9214 }, { - "epoch": 1.9322709163346614, - "grad_norm": 8.983965592800304, - "learning_rate": 5.940425122029298e-06, - "loss": 0.9672, + "epoch": 1.3011861056198815, + "grad_norm": 3.8621106543325463, + "learning_rate": 1.2602421884265304e-05, + "loss": 0.7, "step": 9215 }, { - "epoch": 1.9324806039001887, - "grad_norm": 6.871460281070904, - "learning_rate": 5.938356280142237e-06, - "loss": 0.7107, + "epoch": 1.3013273086698673, + "grad_norm": 3.3095964738495267, + "learning_rate": 1.2600949928582839e-05, + "loss": 0.5302, "step": 9216 }, { - "epoch": 1.932690291465716, - "grad_norm": 6.421179836039351, - "learning_rate": 5.936287646421457e-06, - "loss": 0.7952, + "epoch": 1.3014685117198532, + "grad_norm": 3.230380849409727, + "learning_rate": 1.2599477912455425e-05, + "loss": 0.5565, "step": 9217 }, { - "epoch": 1.9328999790312436, - "grad_norm": 6.965772093420556, - "learning_rate": 5.934219220972984e-06, - "loss": 0.664, + "epoch": 1.301609714769839, + "grad_norm": 3.9254749244011684, + "learning_rate": 1.2598005835917263e-05, + "loss": 0.6512, "step": 9218 }, { - "epoch": 1.9331096665967709, - "grad_norm": 7.618222413794964, - "learning_rate": 5.9321510039028295e-06, - "loss": 0.8827, + "epoch": 1.3017509178198248, + "grad_norm": 3.148232914471939, + "learning_rate": 1.2596533699002568e-05, + "loss": 0.5626, "step": 9219 }, { - "epoch": 1.9333193541622982, - "grad_norm": 6.847746469688917, - "learning_rate": 5.930082995316991e-06, - "loss": 0.7411, + "epoch": 1.3018921208698107, + "grad_norm": 4.263337419451311, + "learning_rate": 1.2595061501745556e-05, + "loss": 0.6361, "step": 9220 }, { - "epoch": 1.9335290417278257, - "grad_norm": 7.2907641957869345, - "learning_rate": 5.928015195321452e-06, - "loss": 0.699, + "epoch": 1.3020333239197965, + "grad_norm": 2.611021097978707, + "learning_rate": 1.2593589244180431e-05, + "loss": 0.4608, "step": 9221 }, { - "epoch": 1.9337387292933528, - "grad_norm": 5.128945889977109, - "learning_rate": 5.925947604022204e-06, - "loss": 0.4736, + "epoch": 1.3021745269697824, + "grad_norm": 3.668163325454977, + "learning_rate": 1.2592116926341411e-05, + "loss": 0.6532, "step": 9222 }, { - "epoch": 1.9339484168588803, - "grad_norm": 6.745419119802339, - "learning_rate": 5.923880221525205e-06, - "loss": 0.7443, + "epoch": 1.3023157300197683, + "grad_norm": 4.155956454463068, + "learning_rate": 1.2590644548262715e-05, + "loss": 0.6765, "step": 9223 }, { - "epoch": 1.9341581044244076, - "grad_norm": 6.944667290314115, - "learning_rate": 5.921813047936413e-06, - "loss": 0.6644, + "epoch": 1.3024569330697542, + "grad_norm": 3.545076840440909, + "learning_rate": 1.258917210997856e-05, + "loss": 0.609, "step": 9224 }, { - "epoch": 1.934367791989935, - "grad_norm": 7.084972351611858, - "learning_rate": 5.9197460833617794e-06, - "loss": 0.7261, + "epoch": 1.30259813611974, + "grad_norm": 4.126756638443326, + "learning_rate": 1.2587699611523162e-05, + "loss": 0.7125, "step": 9225 }, { - "epoch": 1.9345774795554624, - "grad_norm": 7.819354598503194, - "learning_rate": 5.917679327907231e-06, - "loss": 0.6477, + "epoch": 1.302739339169726, + "grad_norm": 3.633539083708262, + "learning_rate": 1.2586227052930742e-05, + "loss": 0.7023, "step": 9226 }, { - "epoch": 1.9347871671209897, - "grad_norm": 7.209069175810552, - "learning_rate": 5.915612781678699e-06, - "loss": 0.6908, + "epoch": 1.3028805422197118, + "grad_norm": 3.9478574104552884, + "learning_rate": 1.2584754434235524e-05, + "loss": 0.6645, "step": 9227 }, { - "epoch": 1.934996854686517, - "grad_norm": 7.757144290188985, - "learning_rate": 5.913546444782099e-06, - "loss": 1.0458, + "epoch": 1.3030217452696977, + "grad_norm": 3.2569735468147294, + "learning_rate": 1.2583281755471726e-05, + "loss": 0.5555, "step": 9228 }, { - "epoch": 1.9352065422520446, - "grad_norm": 7.267495411671543, - "learning_rate": 5.911480317323328e-06, - "loss": 0.6362, + "epoch": 1.3031629483196836, + "grad_norm": 3.90011549331362, + "learning_rate": 1.2581809016673581e-05, + "loss": 0.6465, "step": 9229 }, { - "epoch": 1.9354162298175717, - "grad_norm": 6.449304460548617, - "learning_rate": 5.90941439940828e-06, - "loss": 0.7882, + "epoch": 1.3033041513696695, + "grad_norm": 3.060315708706385, + "learning_rate": 1.2580336217875303e-05, + "loss": 0.5537, "step": 9230 }, { - "epoch": 1.9356259173830992, - "grad_norm": 5.854659580799673, - "learning_rate": 5.907348691142846e-06, - "loss": 0.4732, + "epoch": 1.3034453544196554, + "grad_norm": 4.196938659517449, + "learning_rate": 1.2578863359111129e-05, + "loss": 0.6245, "step": 9231 }, { - "epoch": 1.9358356049486265, - "grad_norm": 7.108478836022061, - "learning_rate": 5.905283192632883e-06, - "loss": 0.8041, + "epoch": 1.3035865574696412, + "grad_norm": 4.37189937633729, + "learning_rate": 1.2577390440415284e-05, + "loss": 0.6887, "step": 9232 }, { - "epoch": 1.9360452925141538, - "grad_norm": 6.318351076259204, - "learning_rate": 5.90321790398426e-06, - "loss": 0.5548, + "epoch": 1.3037277605196271, + "grad_norm": 3.666958479537359, + "learning_rate": 1.2575917461821998e-05, + "loss": 0.6331, "step": 9233 }, { - "epoch": 1.9362549800796813, - "grad_norm": 5.040833899471434, - "learning_rate": 5.901152825302827e-06, - "loss": 0.6743, + "epoch": 1.303868963569613, + "grad_norm": 3.4969768392521114, + "learning_rate": 1.2574444423365503e-05, + "loss": 0.5452, "step": 9234 }, { - "epoch": 1.9364646676452086, - "grad_norm": 7.828821237424376, - "learning_rate": 5.89908795669442e-06, - "loss": 0.8771, + "epoch": 1.304010166619599, + "grad_norm": 5.156653102349881, + "learning_rate": 1.2572971325080033e-05, + "loss": 0.6953, "step": 9235 }, { - "epoch": 1.936674355210736, - "grad_norm": 6.578201393173153, - "learning_rate": 5.8970232982648656e-06, - "loss": 0.702, + "epoch": 1.3041513696695848, + "grad_norm": 2.811542306016432, + "learning_rate": 1.2571498166999816e-05, + "loss": 0.4188, "step": 9236 }, { - "epoch": 1.9368840427762635, - "grad_norm": 5.794155517280722, - "learning_rate": 5.894958850119988e-06, - "loss": 0.6955, + "epoch": 1.3042925727195707, + "grad_norm": 2.7427089789373986, + "learning_rate": 1.2570024949159097e-05, + "loss": 0.3697, "step": 9237 }, { - "epoch": 1.9370937303417908, - "grad_norm": 5.915526856954934, - "learning_rate": 5.892894612365585e-06, - "loss": 0.7092, + "epoch": 1.3044337757695565, + "grad_norm": 4.156514052944761, + "learning_rate": 1.2568551671592106e-05, + "loss": 0.6854, "step": 9238 }, { - "epoch": 1.937303417907318, - "grad_norm": 6.153325669480489, - "learning_rate": 5.890830585107456e-06, - "loss": 0.5228, + "epoch": 1.3045749788195424, + "grad_norm": 3.0204778903150302, + "learning_rate": 1.2567078334333083e-05, + "loss": 0.5371, "step": 9239 }, { - "epoch": 1.9375131054728456, - "grad_norm": 5.805033618586204, - "learning_rate": 5.888766768451391e-06, - "loss": 0.5226, + "epoch": 1.3047161818695283, + "grad_norm": 3.185404891906232, + "learning_rate": 1.2565604937416267e-05, + "loss": 0.5115, "step": 9240 }, { - "epoch": 1.9377227930383727, - "grad_norm": 8.544604226883976, - "learning_rate": 5.886703162503158e-06, - "loss": 1.0777, + "epoch": 1.3048573849195142, + "grad_norm": 3.0288109667028373, + "learning_rate": 1.2564131480875905e-05, + "loss": 0.465, "step": 9241 }, { - "epoch": 1.9379324806039002, - "grad_norm": 7.108878802967099, - "learning_rate": 5.884639767368518e-06, - "loss": 0.7181, + "epoch": 1.3049985879695, + "grad_norm": 3.1489080873078956, + "learning_rate": 1.256265796474623e-05, + "loss": 0.5965, "step": 9242 }, { - "epoch": 1.9381421681694275, - "grad_norm": 8.500185130010607, - "learning_rate": 5.882576583153232e-06, - "loss": 0.9749, + "epoch": 1.305139791019486, + "grad_norm": 3.1535011919011833, + "learning_rate": 1.2561184389061491e-05, + "loss": 0.5258, "step": 9243 }, { - "epoch": 1.9383518557349548, - "grad_norm": 5.7689104669556075, - "learning_rate": 5.880513609963037e-06, - "loss": 0.5329, + "epoch": 1.3052809940694718, + "grad_norm": 4.106568400784383, + "learning_rate": 1.2559710753855937e-05, + "loss": 0.6012, "step": 9244 }, { - "epoch": 1.9385615433004824, - "grad_norm": 5.938595326934034, - "learning_rate": 5.878450847903663e-06, - "loss": 0.6059, + "epoch": 1.3054221971194577, + "grad_norm": 2.919580928387219, + "learning_rate": 1.2558237059163805e-05, + "loss": 0.5206, "step": 9245 }, { - "epoch": 1.9387712308660097, - "grad_norm": 7.310343659542488, - "learning_rate": 5.876388297080834e-06, - "loss": 0.7572, + "epoch": 1.3055634001694436, + "grad_norm": 3.6481958167009703, + "learning_rate": 1.2556763305019353e-05, + "loss": 0.6156, "step": 9246 }, { - "epoch": 1.938980918431537, - "grad_norm": 6.459458859136461, - "learning_rate": 5.874325957600253e-06, - "loss": 0.4941, + "epoch": 1.3057046032194295, + "grad_norm": 3.825560218440628, + "learning_rate": 1.2555289491456822e-05, + "loss": 0.6533, "step": 9247 }, { - "epoch": 1.9391906059970645, - "grad_norm": 6.409904485497111, - "learning_rate": 5.8722638295676235e-06, - "loss": 0.4399, + "epoch": 1.3058458062694154, + "grad_norm": 3.415095554042437, + "learning_rate": 1.255381561851047e-05, + "loss": 0.5388, "step": 9248 }, { - "epoch": 1.9394002935625916, - "grad_norm": 7.121771038556411, - "learning_rate": 5.870201913088634e-06, - "loss": 0.812, + "epoch": 1.3059870093194013, + "grad_norm": 3.323744061224734, + "learning_rate": 1.2552341686214544e-05, + "loss": 0.5352, "step": 9249 }, { - "epoch": 1.939609981128119, - "grad_norm": 7.164893122717032, - "learning_rate": 5.8681402082689575e-06, - "loss": 0.6704, + "epoch": 1.3061282123693871, + "grad_norm": 3.9175071418402685, + "learning_rate": 1.2550867694603302e-05, + "loss": 0.6332, "step": 9250 }, { - "epoch": 1.9398196686936464, - "grad_norm": 7.410330048570561, - "learning_rate": 5.8660787152142625e-06, - "loss": 0.5786, + "epoch": 1.306269415419373, + "grad_norm": 3.6822575051325055, + "learning_rate": 1.2549393643710994e-05, + "loss": 0.6342, "step": 9251 }, { - "epoch": 1.9400293562591737, - "grad_norm": 7.087249885624948, - "learning_rate": 5.864017434030205e-06, - "loss": 0.7156, + "epoch": 1.306410618469359, + "grad_norm": 3.87837085093127, + "learning_rate": 1.2547919533571879e-05, + "loss": 0.552, "step": 9252 }, { - "epoch": 1.9402390438247012, - "grad_norm": 8.665001107189395, - "learning_rate": 5.861956364822422e-06, - "loss": 0.921, + "epoch": 1.3065518215193448, + "grad_norm": 3.629248648469618, + "learning_rate": 1.2546445364220214e-05, + "loss": 0.5717, "step": 9253 }, { - "epoch": 1.9404487313902286, - "grad_norm": 5.720479568659965, - "learning_rate": 5.859895507696556e-06, - "loss": 0.4832, + "epoch": 1.3066930245693307, + "grad_norm": 4.312863166428563, + "learning_rate": 1.2544971135690263e-05, + "loss": 0.7042, "step": 9254 }, { - "epoch": 1.9406584189557559, - "grad_norm": 7.731598221467886, - "learning_rate": 5.857834862758227e-06, - "loss": 0.7504, + "epoch": 1.3068342276193166, + "grad_norm": 4.736792516158595, + "learning_rate": 1.2543496848016278e-05, + "loss": 0.723, "step": 9255 }, { - "epoch": 1.9408681065212834, - "grad_norm": 5.8858587203793, - "learning_rate": 5.855774430113044e-06, - "loss": 0.4916, + "epoch": 1.3069754306693024, + "grad_norm": 3.3879786809211945, + "learning_rate": 1.2542022501232522e-05, + "loss": 0.5851, "step": 9256 }, { - "epoch": 1.9410777940868107, - "grad_norm": 7.611795451936627, - "learning_rate": 5.853714209866608e-06, - "loss": 0.6805, + "epoch": 1.3071166337192883, + "grad_norm": 3.1562943805964165, + "learning_rate": 1.2540548095373266e-05, + "loss": 0.5518, "step": 9257 }, { - "epoch": 1.941287481652338, - "grad_norm": 6.872140220252903, - "learning_rate": 5.851654202124515e-06, - "loss": 0.7804, + "epoch": 1.3072578367692742, + "grad_norm": 4.253654238411628, + "learning_rate": 1.2539073630472768e-05, + "loss": 0.7056, "step": 9258 }, { - "epoch": 1.9414971692178655, - "grad_norm": 6.3589864748054925, - "learning_rate": 5.849594406992331e-06, - "loss": 0.784, + "epoch": 1.30739903981926, + "grad_norm": 3.591993407644833, + "learning_rate": 1.2537599106565295e-05, + "loss": 0.6348, "step": 9259 }, { - "epoch": 1.9417068567833926, - "grad_norm": 7.527512992625092, - "learning_rate": 5.847534824575638e-06, - "loss": 0.7265, + "epoch": 1.307540242869246, + "grad_norm": 4.236035072447704, + "learning_rate": 1.2536124523685114e-05, + "loss": 0.7128, "step": 9260 }, { - "epoch": 1.9419165443489201, - "grad_norm": 6.626663692999666, - "learning_rate": 5.845475454979988e-06, - "loss": 0.6825, + "epoch": 1.3076814459192319, + "grad_norm": 3.114823931997039, + "learning_rate": 1.2534649881866494e-05, + "loss": 0.5183, "step": 9261 }, { - "epoch": 1.9421262319144474, - "grad_norm": 6.604748541123709, - "learning_rate": 5.843416298310924e-06, - "loss": 0.6192, + "epoch": 1.3078226489692177, + "grad_norm": 2.817984018560341, + "learning_rate": 1.2533175181143704e-05, + "loss": 0.3875, "step": 9262 }, { - "epoch": 1.9423359194799747, - "grad_norm": 7.740332536414257, - "learning_rate": 5.841357354673982e-06, - "loss": 0.6147, + "epoch": 1.3079638520192036, + "grad_norm": 3.8205400884775593, + "learning_rate": 1.253170042155102e-05, + "loss": 0.597, "step": 9263 }, { - "epoch": 1.9425456070455023, - "grad_norm": 6.724110318904424, - "learning_rate": 5.839298624174691e-06, - "loss": 0.6427, + "epoch": 1.3081050550691895, + "grad_norm": 3.1081068623405463, + "learning_rate": 1.2530225603122713e-05, + "loss": 0.5262, "step": 9264 }, { - "epoch": 1.9427552946110296, - "grad_norm": 7.046734516927857, - "learning_rate": 5.837240106918563e-06, - "loss": 0.6331, + "epoch": 1.3082462581191754, + "grad_norm": 3.3608363563632233, + "learning_rate": 1.252875072589305e-05, + "loss": 0.678, "step": 9265 }, { - "epoch": 1.9429649821765569, - "grad_norm": 6.946113395963471, - "learning_rate": 5.835181803011096e-06, - "loss": 0.6876, + "epoch": 1.3083874611691613, + "grad_norm": 3.1112980915399957, + "learning_rate": 1.2527275789896315e-05, + "loss": 0.5677, "step": 9266 }, { - "epoch": 1.9431746697420844, - "grad_norm": 7.581627960819508, - "learning_rate": 5.833123712557789e-06, - "loss": 0.6814, + "epoch": 1.3085286642191472, + "grad_norm": 3.5240580591515442, + "learning_rate": 1.2525800795166783e-05, + "loss": 0.5247, "step": 9267 }, { - "epoch": 1.9433843573076115, - "grad_norm": 6.150674179847868, - "learning_rate": 5.831065835664119e-06, - "loss": 0.6461, + "epoch": 1.308669867269133, + "grad_norm": 4.298886661569598, + "learning_rate": 1.2524325741738732e-05, + "loss": 0.6677, "step": 9268 }, { - "epoch": 1.943594044873139, - "grad_norm": 9.355387978414148, - "learning_rate": 5.829008172435552e-06, - "loss": 0.7282, + "epoch": 1.308811070319119, + "grad_norm": 3.5933587103455475, + "learning_rate": 1.2522850629646439e-05, + "loss": 0.558, "step": 9269 }, { - "epoch": 1.9438037324386663, - "grad_norm": 8.010429471087958, - "learning_rate": 5.826950722977549e-06, - "loss": 0.5761, + "epoch": 1.3089522733691048, + "grad_norm": 3.7607720814874903, + "learning_rate": 1.2521375458924187e-05, + "loss": 0.6326, "step": 9270 }, { - "epoch": 1.9440134200041936, - "grad_norm": 5.61438119145265, - "learning_rate": 5.824893487395564e-06, - "loss": 0.648, + "epoch": 1.3090934764190907, + "grad_norm": 3.2957694209007817, + "learning_rate": 1.2519900229606261e-05, + "loss": 0.4539, "step": 9271 }, { - "epoch": 1.9442231075697212, - "grad_norm": 7.487553321826831, - "learning_rate": 5.82283646579503e-06, - "loss": 0.6758, + "epoch": 1.3092346794690766, + "grad_norm": 3.774286912841949, + "learning_rate": 1.2518424941726939e-05, + "loss": 0.504, "step": 9272 }, { - "epoch": 1.9444327951352485, - "grad_norm": 7.0716788083506135, - "learning_rate": 5.820779658281368e-06, - "loss": 0.7614, + "epoch": 1.3093758825190625, + "grad_norm": 3.1098028550188657, + "learning_rate": 1.251694959532051e-05, + "loss": 0.5128, "step": 9273 }, { - "epoch": 1.9446424827007758, - "grad_norm": 7.927492651645763, - "learning_rate": 5.818723064959999e-06, - "loss": 0.8617, + "epoch": 1.3095170855690483, + "grad_norm": 3.0696725368822406, + "learning_rate": 1.2515474190421258e-05, + "loss": 0.4844, "step": 9274 }, { - "epoch": 1.9448521702663033, - "grad_norm": 6.242842619586923, - "learning_rate": 5.816666685936327e-06, - "loss": 0.5955, + "epoch": 1.3096582886190342, + "grad_norm": 2.988075346302314, + "learning_rate": 1.2513998727063475e-05, + "loss": 0.5383, "step": 9275 }, { - "epoch": 1.9450618578318306, - "grad_norm": 6.785903902923679, - "learning_rate": 5.814610521315738e-06, - "loss": 0.6331, + "epoch": 1.30979949166902, + "grad_norm": 3.682406744798175, + "learning_rate": 1.2512523205281444e-05, + "loss": 0.6115, "step": 9276 }, { - "epoch": 1.945271545397358, - "grad_norm": 6.107795187285666, - "learning_rate": 5.812554571203624e-06, - "loss": 0.6826, + "epoch": 1.309940694719006, + "grad_norm": 3.8131474272886754, + "learning_rate": 1.251104762510946e-05, + "loss": 0.5554, "step": 9277 }, { - "epoch": 1.9454812329628854, - "grad_norm": 6.040836956269254, - "learning_rate": 5.810498835705345e-06, - "loss": 0.6376, + "epoch": 1.3100818977689919, + "grad_norm": 3.8331445825039947, + "learning_rate": 1.2509571986581814e-05, + "loss": 0.6753, "step": 9278 }, { - "epoch": 1.9456909205284125, - "grad_norm": 5.780480782298735, - "learning_rate": 5.808443314926273e-06, - "loss": 0.5165, + "epoch": 1.3102231008189777, + "grad_norm": 3.852097333624089, + "learning_rate": 1.2508096289732799e-05, + "loss": 0.7472, "step": 9279 }, { - "epoch": 1.94590060809394, - "grad_norm": 6.8258817826654985, - "learning_rate": 5.806388008971751e-06, - "loss": 0.6663, + "epoch": 1.3103643038689636, + "grad_norm": 3.5665765023083065, + "learning_rate": 1.2506620534596711e-05, + "loss": 0.5941, "step": 9280 }, { - "epoch": 1.9461102956594674, - "grad_norm": 6.690171162849062, - "learning_rate": 5.804332917947111e-06, - "loss": 0.696, + "epoch": 1.3105055069189495, + "grad_norm": 2.9934552756948736, + "learning_rate": 1.2505144721207843e-05, + "loss": 0.4984, "step": 9281 }, { - "epoch": 1.9463199832249947, - "grad_norm": 6.12625218474809, - "learning_rate": 5.802278041957687e-06, - "loss": 0.6677, + "epoch": 1.3106467099689354, + "grad_norm": 3.512662329711256, + "learning_rate": 1.250366884960049e-05, + "loss": 0.4728, "step": 9282 }, { - "epoch": 1.9465296707905222, - "grad_norm": 8.743377118394523, - "learning_rate": 5.800223381108798e-06, - "loss": 0.9713, + "epoch": 1.3107879130189213, + "grad_norm": 3.9225558091451442, + "learning_rate": 1.2502192919808958e-05, + "loss": 0.5928, "step": 9283 }, { - "epoch": 1.9467393583560495, - "grad_norm": 7.910693649827986, - "learning_rate": 5.798168935505746e-06, - "loss": 0.8692, + "epoch": 1.3109291160689072, + "grad_norm": 3.3510030423618358, + "learning_rate": 1.2500716931867543e-05, + "loss": 0.5689, "step": 9284 }, { - "epoch": 1.9469490459215768, - "grad_norm": 6.457286587355363, - "learning_rate": 5.796114705253818e-06, - "loss": 0.545, + "epoch": 1.311070319118893, + "grad_norm": 4.2268187826796515, + "learning_rate": 1.2499240885810546e-05, + "loss": 0.7237, "step": 9285 }, { - "epoch": 1.9471587334871043, - "grad_norm": 8.304300458453238, - "learning_rate": 5.794060690458307e-06, - "loss": 0.7866, + "epoch": 1.311211522168879, + "grad_norm": 3.680876323663695, + "learning_rate": 1.249776478167227e-05, + "loss": 0.605, "step": 9286 }, { - "epoch": 1.9473684210526314, - "grad_norm": 6.86688387061758, - "learning_rate": 5.792006891224478e-06, - "loss": 0.7037, + "epoch": 1.3113527252188648, + "grad_norm": 4.002694119795089, + "learning_rate": 1.249628861948702e-05, + "loss": 0.6596, "step": 9287 }, { - "epoch": 1.947578108618159, - "grad_norm": 5.957818819577157, - "learning_rate": 5.789953307657596e-06, - "loss": 0.5539, + "epoch": 1.3114939282688507, + "grad_norm": 3.1645928399905494, + "learning_rate": 1.24948123992891e-05, + "loss": 0.5867, "step": 9288 }, { - "epoch": 1.9477877961836865, - "grad_norm": 6.766338458887155, - "learning_rate": 5.78789993986291e-06, - "loss": 0.7346, + "epoch": 1.3116351313188366, + "grad_norm": 3.8299127374595243, + "learning_rate": 1.2493336121112818e-05, + "loss": 0.7085, "step": 9289 }, { - "epoch": 1.9479974837492136, - "grad_norm": 8.245215517592019, - "learning_rate": 5.785846787945654e-06, - "loss": 0.8964, + "epoch": 1.3117763343688225, + "grad_norm": 3.255832351809516, + "learning_rate": 1.2491859784992477e-05, + "loss": 0.6707, "step": 9290 }, { - "epoch": 1.948207171314741, - "grad_norm": 7.5080481369699275, - "learning_rate": 5.783793852011058e-06, - "loss": 0.7699, + "epoch": 1.3119175374188083, + "grad_norm": 3.7400629711008397, + "learning_rate": 1.2490383390962395e-05, + "loss": 0.5892, "step": 9291 }, { - "epoch": 1.9484168588802684, - "grad_norm": 6.883068163996783, - "learning_rate": 5.781741132164349e-06, - "loss": 0.6528, + "epoch": 1.3120587404687942, + "grad_norm": 2.9668270687012286, + "learning_rate": 1.248890693905688e-05, + "loss": 0.5873, "step": 9292 }, { - "epoch": 1.9486265464457957, - "grad_norm": 5.283084538234154, - "learning_rate": 5.779688628510713e-06, - "loss": 0.344, + "epoch": 1.31219994351878, + "grad_norm": 3.719476391783897, + "learning_rate": 1.2487430429310239e-05, + "loss": 0.5367, "step": 9293 }, { - "epoch": 1.9488362340113232, - "grad_norm": 7.53107655912143, - "learning_rate": 5.777636341155357e-06, - "loss": 0.8135, + "epoch": 1.312341146568766, + "grad_norm": 4.012833342172237, + "learning_rate": 1.248595386175679e-05, + "loss": 0.5228, "step": 9294 }, { - "epoch": 1.9490459215768505, - "grad_norm": 6.193966469472814, - "learning_rate": 5.775584270203465e-06, - "loss": 0.4428, + "epoch": 1.3124823496187519, + "grad_norm": 3.5696478412825616, + "learning_rate": 1.2484477236430847e-05, + "loss": 0.5924, "step": 9295 }, { - "epoch": 1.9492556091423778, - "grad_norm": 5.562705661522564, - "learning_rate": 5.7735324157602055e-06, - "loss": 0.6155, + "epoch": 1.3126235526687378, + "grad_norm": 3.815641626023018, + "learning_rate": 1.2483000553366727e-05, + "loss": 0.6269, "step": 9296 }, { - "epoch": 1.9494652967079054, - "grad_norm": 6.83694466746269, - "learning_rate": 5.771480777930738e-06, - "loss": 0.5822, + "epoch": 1.3127647557187236, + "grad_norm": 4.166165230315048, + "learning_rate": 1.2481523812598746e-05, + "loss": 0.8467, "step": 9297 }, { - "epoch": 1.9496749842734324, - "grad_norm": 6.3695744723782735, - "learning_rate": 5.769429356820218e-06, - "loss": 0.5949, + "epoch": 1.3129059587687095, + "grad_norm": 3.705751989958468, + "learning_rate": 1.2480047014161223e-05, + "loss": 0.6672, "step": 9298 }, { - "epoch": 1.94988467183896, - "grad_norm": 6.262326135822616, - "learning_rate": 5.767378152533776e-06, - "loss": 0.5755, + "epoch": 1.3130471618186954, + "grad_norm": 3.405347242669965, + "learning_rate": 1.2478570158088477e-05, + "loss": 0.5666, "step": 9299 }, { - "epoch": 1.9500943594044873, - "grad_norm": 5.654409756464303, - "learning_rate": 5.765327165176549e-06, - "loss": 0.6237, + "epoch": 1.3131883648686813, + "grad_norm": 4.0849396138275935, + "learning_rate": 1.247709324441483e-05, + "loss": 0.7035, "step": 9300 }, { - "epoch": 1.9503040469700146, - "grad_norm": 7.2591537590281225, - "learning_rate": 5.763276394853648e-06, - "loss": 0.7722, + "epoch": 1.3133295679186672, + "grad_norm": 3.4379999750701145, + "learning_rate": 1.247561627317461e-05, + "loss": 0.5881, "step": 9301 }, { - "epoch": 1.950513734535542, - "grad_norm": 6.127519413257496, - "learning_rate": 5.7612258416701784e-06, - "loss": 0.6137, + "epoch": 1.313470770968653, + "grad_norm": 3.6419292726317734, + "learning_rate": 1.2474139244402134e-05, + "loss": 0.6041, "step": 9302 }, { - "epoch": 1.9507234221010694, - "grad_norm": 6.328713334956077, - "learning_rate": 5.759175505731236e-06, - "loss": 0.4834, + "epoch": 1.313611974018639, + "grad_norm": 3.16572552319187, + "learning_rate": 1.2472662158131732e-05, + "loss": 0.5277, "step": 9303 }, { - "epoch": 1.9509331096665967, - "grad_norm": 6.165626741839007, - "learning_rate": 5.757125387141906e-06, - "loss": 0.6624, + "epoch": 1.3137531770686246, + "grad_norm": 3.6123126059706836, + "learning_rate": 1.2471185014397728e-05, + "loss": 0.6163, "step": 9304 }, { - "epoch": 1.9511427972321242, - "grad_norm": 6.215042525426812, - "learning_rate": 5.755075486007261e-06, - "loss": 0.7724, + "epoch": 1.3138943801186105, + "grad_norm": 3.757437168569089, + "learning_rate": 1.2469707813234455e-05, + "loss": 0.642, "step": 9305 }, { - "epoch": 1.9513524847976516, - "grad_norm": 7.124243081454236, - "learning_rate": 5.753025802432355e-06, - "loss": 0.7672, + "epoch": 1.3140355831685964, + "grad_norm": 3.196902626158747, + "learning_rate": 1.2468230554676235e-05, + "loss": 0.4455, "step": 9306 }, { - "epoch": 1.9515621723631789, - "grad_norm": 7.188388350388214, - "learning_rate": 5.750976336522247e-06, - "loss": 0.7827, + "epoch": 1.3141767862185822, + "grad_norm": 3.557264732379261, + "learning_rate": 1.2466753238757406e-05, + "loss": 0.5001, "step": 9307 }, { - "epoch": 1.9517718599287064, - "grad_norm": 9.647800390544647, - "learning_rate": 5.748927088381967e-06, - "loss": 0.8735, + "epoch": 1.3143179892685681, + "grad_norm": 4.336686034052295, + "learning_rate": 1.2465275865512291e-05, + "loss": 0.5803, "step": 9308 }, { - "epoch": 1.9519815474942335, - "grad_norm": 7.625115787006105, - "learning_rate": 5.7468780581165515e-06, - "loss": 0.763, + "epoch": 1.314459192318554, + "grad_norm": 3.302825718200357, + "learning_rate": 1.2463798434975239e-05, + "loss": 0.5924, "step": 9309 }, { - "epoch": 1.952191235059761, - "grad_norm": 5.929156517893515, - "learning_rate": 5.744829245831012e-06, - "loss": 0.6244, + "epoch": 1.31460039536854, + "grad_norm": 3.6257427112141762, + "learning_rate": 1.2462320947180565e-05, + "loss": 0.5996, "step": 9310 }, { - "epoch": 1.9524009226252883, - "grad_norm": 6.857416179978395, - "learning_rate": 5.742780651630349e-06, - "loss": 0.6931, + "epoch": 1.3147415984185258, + "grad_norm": 4.410392126008087, + "learning_rate": 1.2460843402162624e-05, + "loss": 0.7127, "step": 9311 }, { - "epoch": 1.9526106101908156, - "grad_norm": 9.552718969179411, - "learning_rate": 5.740732275619562e-06, - "loss": 0.8407, + "epoch": 1.3148828014685117, + "grad_norm": 3.2330243897979747, + "learning_rate": 1.2459365799955741e-05, + "loss": 0.6044, "step": 9312 }, { - "epoch": 1.9528202977563431, - "grad_norm": 7.119021256418084, - "learning_rate": 5.7386841179036415e-06, - "loss": 0.6902, + "epoch": 1.3150240045184975, + "grad_norm": 3.9013937369360043, + "learning_rate": 1.245788814059426e-05, + "loss": 0.6431, "step": 9313 }, { - "epoch": 1.9530299853218704, - "grad_norm": 5.993071399822086, - "learning_rate": 5.736636178587543e-06, - "loss": 0.5794, + "epoch": 1.3151652075684834, + "grad_norm": 3.529792051402255, + "learning_rate": 1.245641042411252e-05, + "loss": 0.5846, "step": 9314 }, { - "epoch": 1.9532396728873977, - "grad_norm": 5.755348810075273, - "learning_rate": 5.734588457776234e-06, - "loss": 0.4426, + "epoch": 1.3153064106184693, + "grad_norm": 3.410021676331754, + "learning_rate": 1.2454932650544862e-05, + "loss": 0.52, "step": 9315 }, { - "epoch": 1.9534493604529253, - "grad_norm": 5.516629429167927, - "learning_rate": 5.732540955574668e-06, - "loss": 0.589, + "epoch": 1.3154476136684552, + "grad_norm": 3.2379182049059816, + "learning_rate": 1.2453454819925627e-05, + "loss": 0.5478, "step": 9316 }, { - "epoch": 1.9536590480184524, - "grad_norm": 5.857164654262589, - "learning_rate": 5.73049367208778e-06, - "loss": 0.5204, + "epoch": 1.315588816718441, + "grad_norm": 3.509887683378275, + "learning_rate": 1.2451976932289168e-05, + "loss": 0.4775, "step": 9317 }, { - "epoch": 1.9538687355839799, - "grad_norm": 6.070086157996333, - "learning_rate": 5.728446607420493e-06, - "loss": 0.6715, + "epoch": 1.315730019768427, + "grad_norm": 3.6648330664363553, + "learning_rate": 1.245049898766982e-05, + "loss": 0.4969, "step": 9318 }, { - "epoch": 1.9540784231495072, - "grad_norm": 6.321055237505776, - "learning_rate": 5.72639976167773e-06, - "loss": 0.5754, + "epoch": 1.3158712228184128, + "grad_norm": 3.612899750807376, + "learning_rate": 1.2449020986101934e-05, + "loss": 0.6447, "step": 9319 }, { - "epoch": 1.9542881107150345, - "grad_norm": 6.832625902490554, - "learning_rate": 5.724353134964386e-06, - "loss": 0.8166, + "epoch": 1.3160124258683987, + "grad_norm": 5.165230371738194, + "learning_rate": 1.2447542927619857e-05, + "loss": 0.8195, "step": 9320 }, { - "epoch": 1.954497798280562, - "grad_norm": 6.9326533414556515, - "learning_rate": 5.722306727385365e-06, - "loss": 0.5492, + "epoch": 1.3161536289183846, + "grad_norm": 4.036267354645072, + "learning_rate": 1.2446064812257941e-05, + "loss": 0.5093, "step": 9321 }, { - "epoch": 1.9547074858460893, - "grad_norm": 5.343943579308795, - "learning_rate": 5.7202605390455435e-06, - "loss": 0.5598, + "epoch": 1.3162948319683705, + "grad_norm": 2.7688796737880823, + "learning_rate": 1.2444586640050536e-05, + "loss": 0.4783, "step": 9322 }, { - "epoch": 1.9549171734116166, - "grad_norm": 6.967273725248564, - "learning_rate": 5.718214570049787e-06, - "loss": 0.6867, + "epoch": 1.3164360350183564, + "grad_norm": 3.896023635680067, + "learning_rate": 1.2443108411031992e-05, + "loss": 0.6337, "step": 9323 }, { - "epoch": 1.9551268609771442, - "grad_norm": 6.960116021922867, - "learning_rate": 5.716168820502961e-06, - "loss": 0.6135, + "epoch": 1.3165772380683423, + "grad_norm": 3.853454582969259, + "learning_rate": 1.2441630125236664e-05, + "loss": 0.6244, "step": 9324 }, { - "epoch": 1.9553365485426715, - "grad_norm": 6.7628145982709, - "learning_rate": 5.714123290509918e-06, - "loss": 0.5242, + "epoch": 1.3167184411183281, + "grad_norm": 3.337199941590131, + "learning_rate": 1.2440151782698904e-05, + "loss": 0.5576, "step": 9325 }, { - "epoch": 1.9555462361081988, - "grad_norm": 7.541825819569992, - "learning_rate": 5.7120779801754875e-06, - "loss": 0.63, + "epoch": 1.316859644168314, + "grad_norm": 3.3421757059944754, + "learning_rate": 1.2438673383453073e-05, + "loss": 0.5519, "step": 9326 }, { - "epoch": 1.9557559236737263, - "grad_norm": 5.936818211712879, - "learning_rate": 5.710032889604496e-06, - "loss": 0.5356, + "epoch": 1.3170008472183, + "grad_norm": 3.2967566547495375, + "learning_rate": 1.2437194927533524e-05, + "loss": 0.619, "step": 9327 }, { - "epoch": 1.9559656112392534, - "grad_norm": 7.847554958832894, - "learning_rate": 5.707988018901758e-06, - "loss": 0.7911, + "epoch": 1.3171420502682858, + "grad_norm": 3.6234533068333548, + "learning_rate": 1.243571641497462e-05, + "loss": 0.7383, "step": 9328 }, { - "epoch": 1.956175298804781, - "grad_norm": 6.765685472702689, - "learning_rate": 5.705943368172082e-06, - "loss": 0.6274, + "epoch": 1.3172832533182717, + "grad_norm": 3.775982829211028, + "learning_rate": 1.2434237845810714e-05, + "loss": 0.5902, "step": 9329 }, { - "epoch": 1.9563849863703082, - "grad_norm": 7.040261921243772, - "learning_rate": 5.703898937520256e-06, - "loss": 0.6799, + "epoch": 1.3174244563682576, + "grad_norm": 3.699851892501107, + "learning_rate": 1.2432759220076177e-05, + "loss": 0.6836, "step": 9330 }, { - "epoch": 1.9565946739358355, - "grad_norm": 6.47968987046206, - "learning_rate": 5.701854727051057e-06, - "loss": 0.5799, + "epoch": 1.3175656594182434, + "grad_norm": 4.226177458401757, + "learning_rate": 1.2431280537805363e-05, + "loss": 0.6984, "step": 9331 }, { - "epoch": 1.956804361501363, - "grad_norm": 6.363966215617241, - "learning_rate": 5.6998107368692625e-06, - "loss": 0.6411, + "epoch": 1.3177068624682293, + "grad_norm": 5.503905857685413, + "learning_rate": 1.242980179903264e-05, + "loss": 0.6524, "step": 9332 }, { - "epoch": 1.9570140490668904, - "grad_norm": 6.877135785062415, - "learning_rate": 5.697766967079621e-06, - "loss": 0.6187, + "epoch": 1.3178480655182152, + "grad_norm": 4.182109172160288, + "learning_rate": 1.242832300379237e-05, + "loss": 0.6601, "step": 9333 }, { - "epoch": 1.9572237366324177, - "grad_norm": 7.13610607904063, - "learning_rate": 5.6957234177868895e-06, - "loss": 0.9326, + "epoch": 1.317989268568201, + "grad_norm": 3.4885976871269135, + "learning_rate": 1.2426844152118926e-05, + "loss": 0.7145, "step": 9334 }, { - "epoch": 1.9574334241979452, - "grad_norm": 7.6776117481462265, - "learning_rate": 5.693680089095798e-06, - "loss": 0.8441, + "epoch": 1.318130471618187, + "grad_norm": 3.4704760759425204, + "learning_rate": 1.2425365244046674e-05, + "loss": 0.6912, "step": 9335 }, { - "epoch": 1.9576431117634723, - "grad_norm": 5.710194240747184, - "learning_rate": 5.691636981111066e-06, - "loss": 0.4383, + "epoch": 1.3182716746681729, + "grad_norm": 3.52727431863363, + "learning_rate": 1.2423886279609975e-05, + "loss": 0.6495, "step": 9336 }, { - "epoch": 1.9578527993289998, - "grad_norm": 7.237677067987588, - "learning_rate": 5.689594093937416e-06, - "loss": 0.6966, + "epoch": 1.3184128777181587, + "grad_norm": 3.8004598991392124, + "learning_rate": 1.242240725884321e-05, + "loss": 0.6967, "step": 9337 }, { - "epoch": 1.958062486894527, - "grad_norm": 5.363887846678818, - "learning_rate": 5.687551427679545e-06, - "loss": 0.4774, + "epoch": 1.3185540807681446, + "grad_norm": 3.4945221242998454, + "learning_rate": 1.2420928181780745e-05, + "loss": 0.6797, "step": 9338 }, { - "epoch": 1.9582721744600544, - "grad_norm": 7.902001910469446, - "learning_rate": 5.685508982442138e-06, - "loss": 0.7774, + "epoch": 1.3186952838181305, + "grad_norm": 4.048181658191856, + "learning_rate": 1.2419449048456955e-05, + "loss": 0.6253, "step": 9339 }, { - "epoch": 1.958481862025582, - "grad_norm": 7.4645462613137195, - "learning_rate": 5.6834667583298785e-06, - "loss": 0.6452, + "epoch": 1.3188364868681164, + "grad_norm": 3.2396788521223834, + "learning_rate": 1.2417969858906214e-05, + "loss": 0.5436, "step": 9340 }, { - "epoch": 1.9586915495911092, - "grad_norm": 6.735411050386404, - "learning_rate": 5.681424755447439e-06, - "loss": 0.5758, + "epoch": 1.3189776899181023, + "grad_norm": 3.2680796320000067, + "learning_rate": 1.2416490613162896e-05, + "loss": 0.5327, "step": 9341 }, { - "epoch": 1.9589012371566366, - "grad_norm": 7.766642635260357, - "learning_rate": 5.679382973899471e-06, - "loss": 0.8949, + "epoch": 1.3191188929680882, + "grad_norm": 3.715863324615412, + "learning_rate": 1.241501131126138e-05, + "loss": 0.5818, "step": 9342 }, { - "epoch": 1.959110924722164, - "grad_norm": 6.037821367055008, - "learning_rate": 5.6773414137906155e-06, - "loss": 0.6325, + "epoch": 1.319260096018074, + "grad_norm": 3.33465078891875, + "learning_rate": 1.241353195323605e-05, + "loss": 0.55, "step": 9343 }, { - "epoch": 1.9593206122876914, - "grad_norm": 7.673878220044886, - "learning_rate": 5.675300075225514e-06, - "loss": 0.8598, + "epoch": 1.31940129906806, + "grad_norm": 3.3057332986866985, + "learning_rate": 1.2412052539121273e-05, + "loss": 0.5576, "step": 9344 }, { - "epoch": 1.9595302998532187, - "grad_norm": 7.122512581610538, - "learning_rate": 5.673258958308778e-06, - "loss": 0.7313, + "epoch": 1.3195425021180458, + "grad_norm": 3.520822933687587, + "learning_rate": 1.2410573068951437e-05, + "loss": 0.5273, "step": 9345 }, { - "epoch": 1.9597399874187462, - "grad_norm": 6.773324047280245, - "learning_rate": 5.671218063145032e-06, - "loss": 0.6981, + "epoch": 1.3196837051680317, + "grad_norm": 3.5341433291135864, + "learning_rate": 1.2409093542760925e-05, + "loss": 0.5676, "step": 9346 }, { - "epoch": 1.9599496749842733, - "grad_norm": 6.289635908748517, - "learning_rate": 5.669177389838867e-06, - "loss": 0.6058, + "epoch": 1.3198249082180176, + "grad_norm": 3.535026490042776, + "learning_rate": 1.2407613960584121e-05, + "loss": 0.593, "step": 9347 }, { - "epoch": 1.9601593625498008, - "grad_norm": 7.868882676605081, - "learning_rate": 5.6671369384948674e-06, - "loss": 0.7898, + "epoch": 1.3199661112680035, + "grad_norm": 5.254598342605096, + "learning_rate": 1.240613432245541e-05, + "loss": 0.4673, "step": 9348 }, { - "epoch": 1.9603690501153281, - "grad_norm": 7.604711655325037, - "learning_rate": 5.665096709217615e-06, - "loss": 0.8801, + "epoch": 1.3201073143179893, + "grad_norm": 3.6432390362174587, + "learning_rate": 1.2404654628409172e-05, + "loss": 0.6045, "step": 9349 }, { - "epoch": 1.9605787376808554, - "grad_norm": 6.917368239753009, - "learning_rate": 5.66305670211168e-06, - "loss": 0.4461, + "epoch": 1.3202485173679752, + "grad_norm": 3.640047014244559, + "learning_rate": 1.2403174878479802e-05, + "loss": 0.6163, "step": 9350 }, { - "epoch": 1.960788425246383, - "grad_norm": 5.825341802707215, - "learning_rate": 5.66101691728161e-06, - "loss": 0.523, + "epoch": 1.320389720417961, + "grad_norm": 3.816127366732024, + "learning_rate": 1.2401695072701683e-05, + "loss": 0.632, "step": 9351 }, { - "epoch": 1.9609981128119103, - "grad_norm": 5.8033050447809496, - "learning_rate": 5.658977354831944e-06, - "loss": 0.4775, + "epoch": 1.320530923467947, + "grad_norm": 3.708569188697407, + "learning_rate": 1.240021521110921e-05, + "loss": 0.7233, "step": 9352 }, { - "epoch": 1.9612078003774376, - "grad_norm": 6.794858279510377, - "learning_rate": 5.656938014867223e-06, - "loss": 0.6737, + "epoch": 1.3206721265179329, + "grad_norm": 4.148344250944895, + "learning_rate": 1.239873529373677e-05, + "loss": 0.7323, "step": 9353 }, { - "epoch": 1.961417487942965, - "grad_norm": 5.606029073277059, - "learning_rate": 5.6548988974919605e-06, - "loss": 0.6143, + "epoch": 1.3208133295679187, + "grad_norm": 3.220730686155303, + "learning_rate": 1.239725532061876e-05, + "loss": 0.5624, "step": 9354 }, { - "epoch": 1.9616271755084922, - "grad_norm": 7.377239987409064, - "learning_rate": 5.6528600028106614e-06, - "loss": 0.7319, + "epoch": 1.3209545326179044, + "grad_norm": 3.5095879124962464, + "learning_rate": 1.239577529178957e-05, + "loss": 0.6158, "step": 9355 }, { - "epoch": 1.9618368630740197, - "grad_norm": 6.598389338215506, - "learning_rate": 5.650821330927832e-06, - "loss": 0.718, + "epoch": 1.3210957356678903, + "grad_norm": 4.038595904247723, + "learning_rate": 1.2394295207283598e-05, + "loss": 0.6899, "step": 9356 }, { - "epoch": 1.962046550639547, - "grad_norm": 7.09458677360249, - "learning_rate": 5.648782881947948e-06, - "loss": 0.4531, + "epoch": 1.3212369387178762, + "grad_norm": 4.568254446886248, + "learning_rate": 1.239281506713524e-05, + "loss": 0.7219, "step": 9357 }, { - "epoch": 1.9622562382050743, - "grad_norm": 6.265265433061276, - "learning_rate": 5.646744655975491e-06, - "loss": 0.5947, + "epoch": 1.321378141767862, + "grad_norm": 3.5765069412575334, + "learning_rate": 1.239133487137889e-05, + "loss": 0.548, "step": 9358 }, { - "epoch": 1.9624659257706019, - "grad_norm": 6.906000673046209, - "learning_rate": 5.644706653114923e-06, - "loss": 0.7007, + "epoch": 1.321519344817848, + "grad_norm": 4.517824425508421, + "learning_rate": 1.2389854620048952e-05, + "loss": 0.7614, "step": 9359 }, { - "epoch": 1.9626756133361292, - "grad_norm": 6.440475060835994, - "learning_rate": 5.642668873470687e-06, - "loss": 0.6435, + "epoch": 1.3216605478678338, + "grad_norm": 4.046679223340215, + "learning_rate": 1.2388374313179828e-05, + "loss": 0.6196, "step": 9360 }, { - "epoch": 1.9628853009016565, - "grad_norm": 6.82922668184217, - "learning_rate": 5.640631317147229e-06, - "loss": 0.5335, + "epoch": 1.3218017509178197, + "grad_norm": 3.7118062157675604, + "learning_rate": 1.2386893950805914e-05, + "loss": 0.6179, "step": 9361 }, { - "epoch": 1.963094988467184, - "grad_norm": 7.171982925019574, - "learning_rate": 5.638593984248983e-06, - "loss": 0.7807, + "epoch": 1.3219429539678056, + "grad_norm": 3.70906559079968, + "learning_rate": 1.2385413532961612e-05, + "loss": 0.6028, "step": 9362 }, { - "epoch": 1.9633046760327113, - "grad_norm": 8.62092906352122, - "learning_rate": 5.636556874880358e-06, - "loss": 0.7998, + "epoch": 1.3220841570177915, + "grad_norm": 3.6649247131006786, + "learning_rate": 1.2383933059681332e-05, + "loss": 0.6613, "step": 9363 }, { - "epoch": 1.9635143635982386, - "grad_norm": 7.999146266688588, - "learning_rate": 5.634519989145757e-06, - "loss": 0.8201, + "epoch": 1.3222253600677774, + "grad_norm": 4.025558316977827, + "learning_rate": 1.2382452530999479e-05, + "loss": 0.5858, "step": 9364 }, { - "epoch": 1.9637240511637661, - "grad_norm": 6.076879299069951, - "learning_rate": 5.632483327149583e-06, - "loss": 0.625, + "epoch": 1.3223665631177632, + "grad_norm": 3.43461804909093, + "learning_rate": 1.2380971946950458e-05, + "loss": 0.5644, "step": 9365 }, { - "epoch": 1.9639337387292932, - "grad_norm": 6.251812390313838, - "learning_rate": 5.630446888996208e-06, - "loss": 0.5772, + "epoch": 1.3225077661677491, + "grad_norm": 3.6261507436426985, + "learning_rate": 1.2379491307568676e-05, + "loss": 0.7124, "step": 9366 }, { - "epoch": 1.9641434262948207, - "grad_norm": 6.138863755946371, - "learning_rate": 5.628410674790015e-06, - "loss": 0.6098, + "epoch": 1.322648969217735, + "grad_norm": 3.6563503868976412, + "learning_rate": 1.2378010612888544e-05, + "loss": 0.6896, "step": 9367 }, { - "epoch": 1.964353113860348, - "grad_norm": 6.603710533719285, - "learning_rate": 5.626374684635354e-06, - "loss": 0.7069, + "epoch": 1.3227901722677209, + "grad_norm": 3.631086581687956, + "learning_rate": 1.2376529862944472e-05, + "loss": 0.5961, "step": 9368 }, { - "epoch": 1.9645628014258754, - "grad_norm": 6.7745432168439725, - "learning_rate": 5.624338918636574e-06, - "loss": 0.6011, + "epoch": 1.3229313753177068, + "grad_norm": 3.418877946589534, + "learning_rate": 1.2375049057770874e-05, + "loss": 0.5615, "step": 9369 }, { - "epoch": 1.9647724889914029, - "grad_norm": 5.227262983509193, - "learning_rate": 5.622303376898013e-06, - "loss": 0.4877, + "epoch": 1.3230725783676927, + "grad_norm": 3.258397362848523, + "learning_rate": 1.2373568197402157e-05, + "loss": 0.557, "step": 9370 }, { - "epoch": 1.9649821765569302, - "grad_norm": 7.412895579602011, - "learning_rate": 5.6202680595240056e-06, - "loss": 0.578, + "epoch": 1.3232137814176785, + "grad_norm": 3.789938580363218, + "learning_rate": 1.2372087281872745e-05, + "loss": 0.6114, "step": 9371 }, { - "epoch": 1.9651918641224575, - "grad_norm": 8.143893219406824, - "learning_rate": 5.618232966618848e-06, - "loss": 0.9202, + "epoch": 1.3233549844676644, + "grad_norm": 3.2990273647341706, + "learning_rate": 1.2370606311217047e-05, + "loss": 0.5651, "step": 9372 }, { - "epoch": 1.965401551687985, - "grad_norm": 6.731643576160104, - "learning_rate": 5.6161980982868505e-06, - "loss": 0.7872, + "epoch": 1.3234961875176503, + "grad_norm": 3.120162170597407, + "learning_rate": 1.2369125285469482e-05, + "loss": 0.5675, "step": 9373 }, { - "epoch": 1.965611239253512, - "grad_norm": 6.837902938038457, - "learning_rate": 5.614163454632308e-06, - "loss": 0.6965, + "epoch": 1.3236373905676362, + "grad_norm": 3.459996318656619, + "learning_rate": 1.2367644204664468e-05, + "loss": 0.6314, "step": 9374 }, { - "epoch": 1.9658209268190396, - "grad_norm": 6.374086728022143, - "learning_rate": 5.612129035759495e-06, - "loss": 0.6395, + "epoch": 1.323778593617622, + "grad_norm": 3.6902647229533283, + "learning_rate": 1.2366163068836427e-05, + "loss": 0.5108, "step": 9375 }, { - "epoch": 1.966030614384567, - "grad_norm": 6.633101033280499, - "learning_rate": 5.610094841772675e-06, - "loss": 0.702, + "epoch": 1.323919796667608, + "grad_norm": 3.3778885651381723, + "learning_rate": 1.2364681878019776e-05, + "loss": 0.5746, "step": 9376 }, { - "epoch": 1.9662403019500942, - "grad_norm": 5.402698443485467, - "learning_rate": 5.608060872776112e-06, - "loss": 0.4333, + "epoch": 1.3240609997175938, + "grad_norm": 3.242667512537071, + "learning_rate": 1.2363200632248942e-05, + "loss": 0.6375, "step": 9377 }, { - "epoch": 1.9664499895156218, - "grad_norm": 7.339949664691134, - "learning_rate": 5.6060271288740435e-06, - "loss": 0.8198, + "epoch": 1.3242022027675797, + "grad_norm": 3.377010724744004, + "learning_rate": 1.2361719331558346e-05, + "loss": 0.594, "step": 9378 }, { - "epoch": 1.966659677081149, - "grad_norm": 7.953023414300289, - "learning_rate": 5.60399361017071e-06, - "loss": 0.8272, + "epoch": 1.3243434058175656, + "grad_norm": 3.574507917565269, + "learning_rate": 1.2360237975982408e-05, + "loss": 0.5372, "step": 9379 }, { - "epoch": 1.9668693646466764, - "grad_norm": 9.21246066271867, - "learning_rate": 5.601960316770328e-06, - "loss": 1.0815, + "epoch": 1.3244846088675515, + "grad_norm": 3.963388790323442, + "learning_rate": 1.2358756565555563e-05, + "loss": 0.7209, "step": 9380 }, { - "epoch": 1.967079052212204, - "grad_norm": 6.2376399289821105, - "learning_rate": 5.599927248777103e-06, - "loss": 0.7165, + "epoch": 1.3246258119175374, + "grad_norm": 3.39928205935657, + "learning_rate": 1.2357275100312234e-05, + "loss": 0.5888, "step": 9381 }, { - "epoch": 1.9672887397777312, - "grad_norm": 8.07526508600555, - "learning_rate": 5.597894406295239e-06, - "loss": 0.9383, + "epoch": 1.3247670149675232, + "grad_norm": 4.166551988019832, + "learning_rate": 1.2355793580286848e-05, + "loss": 0.7082, "step": 9382 }, { - "epoch": 1.9674984273432585, - "grad_norm": 5.783708449822075, - "learning_rate": 5.595861789428925e-06, - "loss": 0.4718, + "epoch": 1.3249082180175091, + "grad_norm": 4.374973444731073, + "learning_rate": 1.2354312005513838e-05, + "loss": 0.6568, "step": 9383 }, { - "epoch": 1.967708114908786, - "grad_norm": 7.007941281927883, - "learning_rate": 5.593829398282334e-06, - "loss": 0.6255, + "epoch": 1.325049421067495, + "grad_norm": 3.218107840652149, + "learning_rate": 1.2352830376027634e-05, + "loss": 0.5173, "step": 9384 }, { - "epoch": 1.9679178024743131, - "grad_norm": 6.962596699983334, - "learning_rate": 5.591797232959623e-06, - "loss": 0.7037, + "epoch": 1.325190624117481, + "grad_norm": 3.27036034240229, + "learning_rate": 1.2351348691862665e-05, + "loss": 0.517, "step": 9385 }, { - "epoch": 1.9681274900398407, - "grad_norm": 7.8042862770405375, - "learning_rate": 5.58976529356495e-06, - "loss": 0.7684, + "epoch": 1.3253318271674668, + "grad_norm": 3.14250587262911, + "learning_rate": 1.2349866953053372e-05, + "loss": 0.4996, "step": 9386 }, { - "epoch": 1.968337177605368, - "grad_norm": 7.786136202570437, - "learning_rate": 5.587733580202459e-06, - "loss": 0.6926, + "epoch": 1.3254730302174527, + "grad_norm": 3.6981472072319552, + "learning_rate": 1.2348385159634181e-05, + "loss": 0.5938, "step": 9387 }, { - "epoch": 1.9685468651708953, - "grad_norm": 9.177226742386363, - "learning_rate": 5.585702092976275e-06, - "loss": 0.9397, + "epoch": 1.3256142332674385, + "grad_norm": 3.1320548093918226, + "learning_rate": 1.2346903311639537e-05, + "loss": 0.4986, "step": 9388 }, { - "epoch": 1.9687565527364228, - "grad_norm": 6.969590138568324, - "learning_rate": 5.583670831990511e-06, - "loss": 0.6873, + "epoch": 1.3257554363174244, + "grad_norm": 3.666485313871525, + "learning_rate": 1.2345421409103872e-05, + "loss": 0.6317, "step": 9389 }, { - "epoch": 1.96896624030195, - "grad_norm": 9.650787107579669, - "learning_rate": 5.5816397973492794e-06, - "loss": 1.1807, + "epoch": 1.3258966393674103, + "grad_norm": 3.2467727923996463, + "learning_rate": 1.2343939452061628e-05, + "loss": 0.4593, "step": 9390 }, { - "epoch": 1.9691759278674774, - "grad_norm": 6.902674016731259, - "learning_rate": 5.579608989156668e-06, - "loss": 0.6546, + "epoch": 1.3260378424173962, + "grad_norm": 3.4692872007049953, + "learning_rate": 1.2342457440547241e-05, + "loss": 0.5348, "step": 9391 }, { - "epoch": 1.969385615433005, - "grad_norm": 6.2798509356303915, - "learning_rate": 5.577578407516768e-06, - "loss": 0.5278, + "epoch": 1.326179045467382, + "grad_norm": 3.7259483955097905, + "learning_rate": 1.2340975374595157e-05, + "loss": 0.6006, "step": 9392 }, { - "epoch": 1.969595302998532, - "grad_norm": 8.447894334273606, - "learning_rate": 5.575548052533644e-06, - "loss": 0.9003, + "epoch": 1.326320248517368, + "grad_norm": 3.5740612258052273, + "learning_rate": 1.2339493254239814e-05, + "loss": 0.574, "step": 9393 }, { - "epoch": 1.9698049905640596, - "grad_norm": 6.726723638569409, - "learning_rate": 5.573517924311351e-06, - "loss": 0.8201, + "epoch": 1.3264614515673538, + "grad_norm": 2.9524646374962082, + "learning_rate": 1.2338011079515661e-05, + "loss": 0.4963, "step": 9394 }, { - "epoch": 1.9700146781295869, - "grad_norm": 8.783838241709548, - "learning_rate": 5.571488022953944e-06, - "loss": 0.8921, + "epoch": 1.3266026546173397, + "grad_norm": 3.76750563848066, + "learning_rate": 1.2336528850457138e-05, + "loss": 0.6411, "step": 9395 }, { - "epoch": 1.9702243656951142, - "grad_norm": 6.45283198817106, - "learning_rate": 5.569458348565463e-06, - "loss": 0.6829, + "epoch": 1.3267438576673256, + "grad_norm": 2.9736231327102804, + "learning_rate": 1.2335046567098694e-05, + "loss": 0.5136, "step": 9396 }, { - "epoch": 1.9704340532606417, - "grad_norm": 6.0174351594428455, - "learning_rate": 5.567428901249919e-06, - "loss": 0.5207, + "epoch": 1.3268850607173115, + "grad_norm": 3.668291127339204, + "learning_rate": 1.2333564229474778e-05, + "loss": 0.6207, "step": 9397 }, { - "epoch": 1.970643740826169, - "grad_norm": 7.699177638108156, - "learning_rate": 5.565399681111331e-06, - "loss": 0.6518, + "epoch": 1.3270262637672974, + "grad_norm": 3.75462918508023, + "learning_rate": 1.2332081837619836e-05, + "loss": 0.5616, "step": 9398 }, { - "epoch": 1.9708534283916963, - "grad_norm": 6.335525619839974, - "learning_rate": 5.5633706882537045e-06, - "loss": 0.4691, + "epoch": 1.3271674668172833, + "grad_norm": 3.393014992745285, + "learning_rate": 1.233059939156832e-05, + "loss": 0.5711, "step": 9399 }, { - "epoch": 1.9710631159572238, - "grad_norm": 5.174179431174651, - "learning_rate": 5.561341922781026e-06, - "loss": 0.5739, + "epoch": 1.3273086698672691, + "grad_norm": 3.6450057366952993, + "learning_rate": 1.2329116891354677e-05, + "loss": 0.5315, "step": 9400 }, { - "epoch": 1.9712728035227511, - "grad_norm": 6.324741584455501, - "learning_rate": 5.559313384797268e-06, - "loss": 0.5169, + "epoch": 1.327449872917255, + "grad_norm": 3.2572139309360395, + "learning_rate": 1.2327634337013366e-05, + "loss": 0.5645, "step": 9401 }, { - "epoch": 1.9714824910882784, - "grad_norm": 6.788546184522243, - "learning_rate": 5.557285074406405e-06, - "loss": 0.5962, + "epoch": 1.327591075967241, + "grad_norm": 3.1348991158688206, + "learning_rate": 1.2326151728578839e-05, + "loss": 0.5049, "step": 9402 }, { - "epoch": 1.971692178653806, - "grad_norm": 7.4878767110235716, - "learning_rate": 5.555256991712383e-06, - "loss": 0.7694, + "epoch": 1.3277322790172268, + "grad_norm": 3.418606562274722, + "learning_rate": 1.2324669066085549e-05, + "loss": 0.5024, "step": 9403 }, { - "epoch": 1.971901866219333, - "grad_norm": 6.381684228076002, - "learning_rate": 5.553229136819154e-06, - "loss": 0.5776, + "epoch": 1.3278734820672127, + "grad_norm": 3.256608069213126, + "learning_rate": 1.2323186349567955e-05, + "loss": 0.6353, "step": 9404 }, { - "epoch": 1.9721115537848606, - "grad_norm": 7.285354227541252, - "learning_rate": 5.5512015098306435e-06, - "loss": 0.7065, + "epoch": 1.3280146851171986, + "grad_norm": 3.913407544923666, + "learning_rate": 1.232170357906051e-05, + "loss": 0.6601, "step": 9405 }, { - "epoch": 1.9723212413503879, - "grad_norm": 7.472143171805601, - "learning_rate": 5.549174110850769e-06, - "loss": 1.0457, + "epoch": 1.3281558881671844, + "grad_norm": 3.024532204786768, + "learning_rate": 1.232022075459768e-05, + "loss": 0.5178, "step": 9406 }, { - "epoch": 1.9725309289159152, - "grad_norm": 9.481669256434031, - "learning_rate": 5.54714693998344e-06, - "loss": 1.0122, + "epoch": 1.3282970912171703, + "grad_norm": 3.6906275823672074, + "learning_rate": 1.2318737876213922e-05, + "loss": 0.6135, "step": 9407 }, { - "epoch": 1.9727406164814427, - "grad_norm": 5.4602505292051875, - "learning_rate": 5.545119997332558e-06, - "loss": 0.4985, + "epoch": 1.3284382942671562, + "grad_norm": 3.8646666897048476, + "learning_rate": 1.2317254943943695e-05, + "loss": 0.7091, "step": 9408 }, { - "epoch": 1.97295030404697, - "grad_norm": 8.207387419883343, - "learning_rate": 5.543093283002001e-06, - "loss": 0.9234, + "epoch": 1.328579497317142, + "grad_norm": 6.992988266133434, + "learning_rate": 1.2315771957821466e-05, + "loss": 0.7157, "step": 9409 }, { - "epoch": 1.9731599916124973, - "grad_norm": 6.2840237412150675, - "learning_rate": 5.541066797095641e-06, - "loss": 0.7049, + "epoch": 1.328720700367128, + "grad_norm": 3.4661409033845025, + "learning_rate": 1.2314288917881696e-05, + "loss": 0.5798, "step": 9410 }, { - "epoch": 1.9733696791780249, - "grad_norm": 8.583895324791946, - "learning_rate": 5.539040539717345e-06, - "loss": 0.7651, + "epoch": 1.3288619034171139, + "grad_norm": 4.215371370410614, + "learning_rate": 1.2312805824158852e-05, + "loss": 0.6174, "step": 9411 }, { - "epoch": 1.973579366743552, - "grad_norm": 4.752872794226008, - "learning_rate": 5.537014510970955e-06, - "loss": 0.4306, + "epoch": 1.3290031064670997, + "grad_norm": 3.1690829513304655, + "learning_rate": 1.2311322676687398e-05, + "loss": 0.6081, "step": 9412 }, { - "epoch": 1.9737890543090795, - "grad_norm": 6.442977046781971, - "learning_rate": 5.534988710960314e-06, - "loss": 0.7468, + "epoch": 1.3291443095170856, + "grad_norm": 3.4157954008135674, + "learning_rate": 1.2309839475501801e-05, + "loss": 0.6279, "step": 9413 }, { - "epoch": 1.973998741874607, - "grad_norm": 7.563168597369974, - "learning_rate": 5.532963139789247e-06, - "loss": 0.671, + "epoch": 1.3292855125670715, + "grad_norm": 3.1755921780632526, + "learning_rate": 1.2308356220636535e-05, + "loss": 0.5089, "step": 9414 }, { - "epoch": 1.974208429440134, - "grad_norm": 6.237503327962937, - "learning_rate": 5.530937797561564e-06, - "loss": 0.6813, + "epoch": 1.3294267156170574, + "grad_norm": 3.4491796843346245, + "learning_rate": 1.2306872912126068e-05, + "loss": 0.5568, "step": 9415 }, { - "epoch": 1.9744181170056616, - "grad_norm": 4.912833147401991, - "learning_rate": 5.528912684381071e-06, - "loss": 0.3851, + "epoch": 1.3295679186670433, + "grad_norm": 3.612064152234278, + "learning_rate": 1.230538955000487e-05, + "loss": 0.5153, "step": 9416 }, { - "epoch": 1.974627804571189, - "grad_norm": 7.374596284417961, - "learning_rate": 5.526887800351561e-06, - "loss": 0.7471, + "epoch": 1.3297091217170292, + "grad_norm": 3.6379477506253455, + "learning_rate": 1.2303906134307413e-05, + "loss": 0.5774, "step": 9417 }, { - "epoch": 1.9748374921367162, - "grad_norm": 5.144064355994955, - "learning_rate": 5.524863145576804e-06, - "loss": 0.4548, + "epoch": 1.329850324767015, + "grad_norm": 4.182307417877903, + "learning_rate": 1.2302422665068174e-05, + "loss": 0.6527, "step": 9418 }, { - "epoch": 1.9750471797022437, - "grad_norm": 7.442101373770801, - "learning_rate": 5.522838720160573e-06, - "loss": 0.7283, + "epoch": 1.329991527817001, + "grad_norm": 3.468247971401187, + "learning_rate": 1.2300939142321626e-05, + "loss": 0.6052, "step": 9419 }, { - "epoch": 1.975256867267771, - "grad_norm": 6.016025671682098, - "learning_rate": 5.5208145242066255e-06, - "loss": 0.6271, + "epoch": 1.3301327308669868, + "grad_norm": 3.3609426284591595, + "learning_rate": 1.2299455566102248e-05, + "loss": 0.5844, "step": 9420 }, { - "epoch": 1.9754665548332984, - "grad_norm": 7.407011519084812, - "learning_rate": 5.518790557818703e-06, - "loss": 0.7151, + "epoch": 1.3302739339169727, + "grad_norm": 5.227694973434564, + "learning_rate": 1.2297971936444512e-05, + "loss": 0.7641, "step": 9421 }, { - "epoch": 1.9756762423988259, - "grad_norm": 7.230074007706148, - "learning_rate": 5.516766821100532e-06, - "loss": 0.7484, + "epoch": 1.3304151369669586, + "grad_norm": 4.062006907220968, + "learning_rate": 1.2296488253382902e-05, + "loss": 0.6743, "step": 9422 }, { - "epoch": 1.975885929964353, - "grad_norm": 5.956696107692165, - "learning_rate": 5.514743314155842e-06, - "loss": 0.7203, + "epoch": 1.3305563400169444, + "grad_norm": 4.446921186139767, + "learning_rate": 1.2295004516951898e-05, + "loss": 0.6239, "step": 9423 }, { - "epoch": 1.9760956175298805, - "grad_norm": 5.081451357091475, - "learning_rate": 5.512720037088332e-06, - "loss": 0.4171, + "epoch": 1.3306975430669303, + "grad_norm": 3.2336327037120913, + "learning_rate": 1.229352072718598e-05, + "loss": 0.6276, "step": 9424 }, { - "epoch": 1.9763053050954078, - "grad_norm": 6.999221019875437, - "learning_rate": 5.510696990001709e-06, - "loss": 0.6734, + "epoch": 1.3308387461169162, + "grad_norm": 3.9540902965003504, + "learning_rate": 1.229203688411963e-05, + "loss": 0.6171, "step": 9425 }, { - "epoch": 1.976514992660935, - "grad_norm": 9.391370219701086, - "learning_rate": 5.50867417299965e-06, - "loss": 0.9458, + "epoch": 1.330979949166902, + "grad_norm": 3.4775552978711812, + "learning_rate": 1.2290552987787332e-05, + "loss": 0.4476, "step": 9426 }, { - "epoch": 1.9767246802264626, - "grad_norm": 8.891903065739188, - "learning_rate": 5.506651586185825e-06, - "loss": 1.0335, + "epoch": 1.331121152216888, + "grad_norm": 3.619380493059914, + "learning_rate": 1.2289069038223574e-05, + "loss": 0.5797, "step": 9427 }, { - "epoch": 1.97693436779199, - "grad_norm": 7.184276652326688, - "learning_rate": 5.5046292296639e-06, - "loss": 0.7992, + "epoch": 1.3312623552668739, + "grad_norm": 3.8797930015628768, + "learning_rate": 1.2287585035462838e-05, + "loss": 0.5436, "step": 9428 }, { - "epoch": 1.9771440553575172, - "grad_norm": 6.483110092348531, - "learning_rate": 5.502607103537528e-06, - "loss": 0.6756, + "epoch": 1.3314035583168597, + "grad_norm": 4.134773340550053, + "learning_rate": 1.2286100979539616e-05, + "loss": 0.7309, "step": 9429 }, { - "epoch": 1.9773537429230448, - "grad_norm": 6.608294088329845, - "learning_rate": 5.500585207910342e-06, - "loss": 0.6605, + "epoch": 1.3315447613668456, + "grad_norm": 3.5952435249030015, + "learning_rate": 1.228461687048839e-05, + "loss": 0.5614, "step": 9430 }, { - "epoch": 1.977563430488572, - "grad_norm": 7.008185365043187, - "learning_rate": 5.498563542885964e-06, - "loss": 0.7652, + "epoch": 1.3316859644168315, + "grad_norm": 3.6342837997832835, + "learning_rate": 1.2283132708343659e-05, + "loss": 0.5929, "step": 9431 }, { - "epoch": 1.9777731180540994, - "grad_norm": 7.706006103979323, - "learning_rate": 5.496542108568015e-06, - "loss": 0.759, + "epoch": 1.3318271674668174, + "grad_norm": 3.08637650514622, + "learning_rate": 1.2281648493139911e-05, + "loss": 0.4914, "step": 9432 }, { - "epoch": 1.977982805619627, - "grad_norm": 6.63354643857184, - "learning_rate": 5.494520905060095e-06, - "loss": 0.6688, + "epoch": 1.3319683705168033, + "grad_norm": 4.616875226200589, + "learning_rate": 1.2280164224911633e-05, + "loss": 0.775, "step": 9433 }, { - "epoch": 1.978192493185154, - "grad_norm": 7.450388954247213, - "learning_rate": 5.492499932465788e-06, - "loss": 0.7653, + "epoch": 1.3321095735667892, + "grad_norm": 2.577744620894552, + "learning_rate": 1.2278679903693325e-05, + "loss": 0.4532, "step": 9434 }, { - "epoch": 1.9784021807506815, - "grad_norm": 7.324945295029092, - "learning_rate": 5.490479190888683e-06, - "loss": 0.6122, + "epoch": 1.332250776616775, + "grad_norm": 3.3350150444291673, + "learning_rate": 1.227719552951948e-05, + "loss": 0.5754, "step": 9435 }, { - "epoch": 1.9786118683162088, - "grad_norm": 6.963614042749927, - "learning_rate": 5.488458680432333e-06, - "loss": 0.7877, + "epoch": 1.332391979666761, + "grad_norm": 3.636931871227736, + "learning_rate": 1.2275711102424595e-05, + "loss": 0.6325, "step": 9436 }, { - "epoch": 1.9788215558817361, - "grad_norm": 7.272216273213749, - "learning_rate": 5.486438401200309e-06, - "loss": 0.6951, + "epoch": 1.3325331827167468, + "grad_norm": 3.419171994897924, + "learning_rate": 1.2274226622443165e-05, + "loss": 0.5612, "step": 9437 }, { - "epoch": 1.9790312434472637, - "grad_norm": 6.290803817800356, - "learning_rate": 5.4844183532961426e-06, - "loss": 0.6761, + "epoch": 1.3326743857667327, + "grad_norm": 3.6348746938901484, + "learning_rate": 1.2272742089609694e-05, + "loss": 0.5717, "step": 9438 }, { - "epoch": 1.979240931012791, - "grad_norm": 7.270464472644718, - "learning_rate": 5.482398536823364e-06, - "loss": 0.7697, + "epoch": 1.3328155888167186, + "grad_norm": 3.8057851360772412, + "learning_rate": 1.2271257503958674e-05, + "loss": 0.6302, "step": 9439 }, { - "epoch": 1.9794506185783183, - "grad_norm": 6.65241455448122, - "learning_rate": 5.480378951885496e-06, - "loss": 0.6053, + "epoch": 1.3329567918667042, + "grad_norm": 4.31800193565284, + "learning_rate": 1.2269772865524612e-05, + "loss": 0.7276, "step": 9440 }, { - "epoch": 1.9796603061438458, - "grad_norm": 6.47877162068758, - "learning_rate": 5.4783595985860495e-06, - "loss": 0.7568, + "epoch": 1.3330979949166901, + "grad_norm": 4.21400117312378, + "learning_rate": 1.2268288174342013e-05, + "loss": 0.7091, "step": 9441 }, { - "epoch": 1.9798699937093729, - "grad_norm": 5.33570126000777, - "learning_rate": 5.476340477028517e-06, - "loss": 0.5061, + "epoch": 1.333239197966676, + "grad_norm": 3.059093786819134, + "learning_rate": 1.2266803430445372e-05, + "loss": 0.4818, "step": 9442 }, { - "epoch": 1.9800796812749004, - "grad_norm": 8.035842186557062, - "learning_rate": 5.474321587316376e-06, - "loss": 0.8157, + "epoch": 1.3333804010166619, + "grad_norm": 3.312637304449722, + "learning_rate": 1.2265318633869198e-05, + "loss": 0.6169, "step": 9443 }, { - "epoch": 1.9802893688404277, - "grad_norm": 8.60835021249437, - "learning_rate": 5.4723029295531075e-06, - "loss": 0.7627, + "epoch": 1.3335216040666478, + "grad_norm": 3.523818624103979, + "learning_rate": 1.2263833784647998e-05, + "loss": 0.578, "step": 9444 }, { - "epoch": 1.980499056405955, - "grad_norm": 6.672032054994563, - "learning_rate": 5.470284503842163e-06, - "loss": 0.6409, + "epoch": 1.3336628071166337, + "grad_norm": 4.14118523181956, + "learning_rate": 1.226234888281628e-05, + "loss": 0.7882, "step": 9445 }, { - "epoch": 1.9807087439714826, - "grad_norm": 8.440472147140861, - "learning_rate": 5.468266310286996e-06, - "loss": 0.7713, + "epoch": 1.3338040101666195, + "grad_norm": 3.8338976859005416, + "learning_rate": 1.2260863928408551e-05, + "loss": 0.6222, "step": 9446 }, { - "epoch": 1.9809184315370099, - "grad_norm": 7.330758440570189, - "learning_rate": 5.466248348991039e-06, - "loss": 0.8678, + "epoch": 1.3339452132166054, + "grad_norm": 3.105659687071912, + "learning_rate": 1.225937892145932e-05, + "loss": 0.5277, "step": 9447 }, { - "epoch": 1.9811281191025372, - "grad_norm": 7.548305564203889, - "learning_rate": 5.46423062005772e-06, - "loss": 0.8558, + "epoch": 1.3340864162665913, + "grad_norm": 3.8826484778187114, + "learning_rate": 1.2257893862003093e-05, + "loss": 0.6512, "step": 9448 }, { - "epoch": 1.9813378066680647, - "grad_norm": 6.738726490624901, - "learning_rate": 5.462213123590445e-06, - "loss": 0.7448, + "epoch": 1.3342276193165772, + "grad_norm": 3.216689869156623, + "learning_rate": 1.2256408750074397e-05, + "loss": 0.5126, "step": 9449 }, { - "epoch": 1.981547494233592, - "grad_norm": 5.880309786655555, - "learning_rate": 5.460195859692621e-06, - "loss": 0.6144, + "epoch": 1.334368822366563, + "grad_norm": 4.388395175411938, + "learning_rate": 1.225492358570773e-05, + "loss": 0.7349, "step": 9450 }, { - "epoch": 1.9817571817991193, - "grad_norm": 7.170694123341599, - "learning_rate": 5.458178828467635e-06, - "loss": 0.7332, + "epoch": 1.334510025416549, + "grad_norm": 5.098037335371972, + "learning_rate": 1.2253438368937615e-05, + "loss": 0.9015, "step": 9451 }, { - "epoch": 1.9819668693646468, - "grad_norm": 6.788659596038343, - "learning_rate": 5.456162030018856e-06, - "loss": 0.7349, + "epoch": 1.3346512284665348, + "grad_norm": 3.081887700340631, + "learning_rate": 1.2251953099798566e-05, + "loss": 0.5251, "step": 9452 }, { - "epoch": 1.982176556930174, - "grad_norm": 7.652891320562545, - "learning_rate": 5.454145464449654e-06, - "loss": 0.8881, + "epoch": 1.3347924315165207, + "grad_norm": 3.6573336918144856, + "learning_rate": 1.22504677783251e-05, + "loss": 0.6051, "step": 9453 }, { - "epoch": 1.9823862444957014, - "grad_norm": 8.598729244950007, - "learning_rate": 5.45212913186339e-06, - "loss": 0.8452, + "epoch": 1.3349336345665066, + "grad_norm": 4.0183552445341055, + "learning_rate": 1.2248982404551733e-05, + "loss": 0.6718, "step": 9454 }, { - "epoch": 1.9825959320612287, - "grad_norm": 5.928767362202632, - "learning_rate": 5.450113032363387e-06, - "loss": 0.5574, + "epoch": 1.3350748376164925, + "grad_norm": 4.247716634027352, + "learning_rate": 1.2247496978512988e-05, + "loss": 0.6922, "step": 9455 }, { - "epoch": 1.982805619626756, - "grad_norm": 6.9519064631136365, - "learning_rate": 5.448097166052983e-06, - "loss": 0.7053, + "epoch": 1.3352160406664784, + "grad_norm": 3.0253917359778604, + "learning_rate": 1.224601150024338e-05, + "loss": 0.487, "step": 9456 }, { - "epoch": 1.9830153071922836, - "grad_norm": 8.443731131257264, - "learning_rate": 5.4460815330354975e-06, - "loss": 0.8789, + "epoch": 1.3353572437164642, + "grad_norm": 4.202526400211722, + "learning_rate": 1.2244525969777438e-05, + "loss": 0.6371, "step": 9457 }, { - "epoch": 1.9832249947578109, - "grad_norm": 6.1762552647965325, - "learning_rate": 5.444066133414232e-06, - "loss": 0.4977, + "epoch": 1.3354984467664501, + "grad_norm": 3.849070660148693, + "learning_rate": 1.2243040387149682e-05, + "loss": 0.6035, "step": 9458 }, { - "epoch": 1.9834346823233382, - "grad_norm": 8.120962267109922, - "learning_rate": 5.442050967292476e-06, - "loss": 0.8862, + "epoch": 1.335639649816436, + "grad_norm": 3.790411585502975, + "learning_rate": 1.2241554752394633e-05, + "loss": 0.6354, "step": 9459 }, { - "epoch": 1.9836443698888657, - "grad_norm": 7.02294902222002, - "learning_rate": 5.440036034773517e-06, - "loss": 0.7794, + "epoch": 1.335780852866422, + "grad_norm": 3.3816453410918865, + "learning_rate": 1.2240069065546823e-05, + "loss": 0.5956, "step": 9460 }, { - "epoch": 1.9838540574543928, - "grad_norm": 6.166623535071192, - "learning_rate": 5.438021335960615e-06, - "loss": 0.6707, + "epoch": 1.3359220559164078, + "grad_norm": 3.8186172088018333, + "learning_rate": 1.2238583326640774e-05, + "loss": 0.6188, "step": 9461 }, { - "epoch": 1.9840637450199203, - "grad_norm": 6.545968616484437, - "learning_rate": 5.436006870957037e-06, - "loss": 0.5821, + "epoch": 1.3360632589663937, + "grad_norm": 3.889644140583435, + "learning_rate": 1.2237097535711016e-05, + "loss": 0.6197, "step": 9462 }, { - "epoch": 1.9842734325854476, - "grad_norm": 8.752112825788625, - "learning_rate": 5.433992639866022e-06, - "loss": 0.914, + "epoch": 1.3362044620163795, + "grad_norm": 4.091606484889772, + "learning_rate": 1.2235611692792078e-05, + "loss": 0.6102, "step": 9463 }, { - "epoch": 1.984483120150975, - "grad_norm": 5.213425043088967, - "learning_rate": 5.4319786427908e-06, - "loss": 0.4919, + "epoch": 1.3363456650663654, + "grad_norm": 4.009843468951958, + "learning_rate": 1.2234125797918488e-05, + "loss": 0.6242, "step": 9464 }, { - "epoch": 1.9846928077165025, - "grad_norm": 6.504175622476662, - "learning_rate": 5.429964879834595e-06, - "loss": 0.6517, + "epoch": 1.3364868681163513, + "grad_norm": 3.981661103480363, + "learning_rate": 1.2232639851124778e-05, + "loss": 0.6287, "step": 9465 }, { - "epoch": 1.9849024952820298, - "grad_norm": 6.6495877872249505, - "learning_rate": 5.42795135110062e-06, - "loss": 0.7559, + "epoch": 1.3366280711663372, + "grad_norm": 4.113754729262994, + "learning_rate": 1.2231153852445485e-05, + "loss": 0.7285, "step": 9466 }, { - "epoch": 1.985112182847557, - "grad_norm": 6.598786953938955, - "learning_rate": 5.425938056692068e-06, - "loss": 0.6777, + "epoch": 1.336769274216323, + "grad_norm": 3.030985693472055, + "learning_rate": 1.2229667801915136e-05, + "loss": 0.535, "step": 9467 }, { - "epoch": 1.9853218704130846, - "grad_norm": 7.346209749862573, - "learning_rate": 5.423924996712122e-06, - "loss": 0.6072, + "epoch": 1.336910477266309, + "grad_norm": 3.433861568259088, + "learning_rate": 1.2228181699568276e-05, + "loss": 0.5797, "step": 9468 }, { - "epoch": 1.985531557978612, - "grad_norm": 6.339433350768146, - "learning_rate": 5.421912171263959e-06, - "loss": 0.6662, + "epoch": 1.3370516803162948, + "grad_norm": 3.9918220370960906, + "learning_rate": 1.222669554543943e-05, + "loss": 0.6775, "step": 9469 }, { - "epoch": 1.9857412455441392, - "grad_norm": 8.24782553851633, - "learning_rate": 5.4198995804507335e-06, - "loss": 0.8687, + "epoch": 1.3371928833662807, + "grad_norm": 2.88206278251222, + "learning_rate": 1.2225209339563144e-05, + "loss": 0.4869, "step": 9470 }, { - "epoch": 1.9859509331096667, - "grad_norm": 6.104598991681448, - "learning_rate": 5.417887224375602e-06, - "loss": 0.5329, + "epoch": 1.3373340864162666, + "grad_norm": 3.212716043331327, + "learning_rate": 1.2223723081973955e-05, + "loss": 0.527, "step": 9471 }, { - "epoch": 1.9861606206751938, - "grad_norm": 7.071163539617937, - "learning_rate": 5.415875103141698e-06, - "loss": 0.7198, + "epoch": 1.3374752894662525, + "grad_norm": 3.429279549762187, + "learning_rate": 1.2222236772706402e-05, + "loss": 0.5842, "step": 9472 }, { - "epoch": 1.9863703082407214, - "grad_norm": 6.686773708184286, - "learning_rate": 5.413863216852142e-06, - "loss": 0.4949, + "epoch": 1.3376164925162384, + "grad_norm": 4.166665004383837, + "learning_rate": 1.2220750411795021e-05, + "loss": 0.7869, "step": 9473 }, { - "epoch": 1.9865799958062487, - "grad_norm": 6.42064073661928, - "learning_rate": 5.41185156561005e-06, - "loss": 0.548, + "epoch": 1.3377576955662243, + "grad_norm": 3.538006512037677, + "learning_rate": 1.2219263999274367e-05, + "loss": 0.5724, "step": 9474 }, { - "epoch": 1.986789683371776, - "grad_norm": 6.0919823130629425, - "learning_rate": 5.409840149518531e-06, - "loss": 0.72, + "epoch": 1.3378988986162101, + "grad_norm": 3.643270041208831, + "learning_rate": 1.2217777535178973e-05, + "loss": 0.6163, "step": 9475 }, { - "epoch": 1.9869993709373035, - "grad_norm": 6.618788605416218, - "learning_rate": 5.407828968680657e-06, - "loss": 0.5999, + "epoch": 1.338040101666196, + "grad_norm": 3.793927551252516, + "learning_rate": 1.2216291019543385e-05, + "loss": 0.6049, "step": 9476 }, { - "epoch": 1.9872090585028308, - "grad_norm": 7.0881444991206575, - "learning_rate": 5.405818023199514e-06, - "loss": 0.773, + "epoch": 1.338181304716182, + "grad_norm": 3.2237758161888936, + "learning_rate": 1.2214804452402152e-05, + "loss": 0.5407, "step": 9477 }, { - "epoch": 1.987418746068358, - "grad_norm": 6.443849337988713, - "learning_rate": 5.403807313178169e-06, - "loss": 0.6718, + "epoch": 1.3383225077661678, + "grad_norm": 3.735651225161646, + "learning_rate": 1.221331783378982e-05, + "loss": 0.6448, "step": 9478 }, { - "epoch": 1.9876284336338856, - "grad_norm": 6.907495589688916, - "learning_rate": 5.401796838719672e-06, - "loss": 0.7668, + "epoch": 1.3384637108161537, + "grad_norm": 3.7437083403399987, + "learning_rate": 1.2211831163740937e-05, + "loss": 0.565, "step": 9479 }, { - "epoch": 1.9878381211994127, - "grad_norm": 8.319902880082024, - "learning_rate": 5.399786599927057e-06, - "loss": 0.748, + "epoch": 1.3386049138661396, + "grad_norm": 3.312341068009971, + "learning_rate": 1.2210344442290054e-05, + "loss": 0.5292, "step": 9480 }, { - "epoch": 1.9880478087649402, - "grad_norm": 7.085893351681256, - "learning_rate": 5.397776596903361e-06, - "loss": 0.7148, + "epoch": 1.3387461169161254, + "grad_norm": 3.382365957889865, + "learning_rate": 1.2208857669471721e-05, + "loss": 0.6308, "step": 9481 }, { - "epoch": 1.9882574963304676, - "grad_norm": 6.175848088575357, - "learning_rate": 5.395766829751594e-06, - "loss": 0.5696, + "epoch": 1.3388873199661113, + "grad_norm": 3.4506389702983573, + "learning_rate": 1.2207370845320488e-05, + "loss": 0.5973, "step": 9482 }, { - "epoch": 1.9884671838959949, - "grad_norm": 7.25650782017251, - "learning_rate": 5.393757298574767e-06, - "loss": 0.7646, + "epoch": 1.3390285230160972, + "grad_norm": 3.5821221560061205, + "learning_rate": 1.220588396987091e-05, + "loss": 0.5703, "step": 9483 }, { - "epoch": 1.9886768714615224, - "grad_norm": 7.794097950516992, - "learning_rate": 5.391748003475865e-06, - "loss": 0.8639, + "epoch": 1.339169726066083, + "grad_norm": 3.1485399693975804, + "learning_rate": 1.2204397043157541e-05, + "loss": 0.4886, "step": 9484 }, { - "epoch": 1.9888865590270497, - "grad_norm": 6.552283306202356, - "learning_rate": 5.389738944557867e-06, - "loss": 0.6833, + "epoch": 1.339310929116069, + "grad_norm": 3.4094009214543055, + "learning_rate": 1.2202910065214939e-05, + "loss": 0.6504, "step": 9485 }, { - "epoch": 1.989096246592577, - "grad_norm": 6.8115769814195, - "learning_rate": 5.387730121923747e-06, - "loss": 0.6566, + "epoch": 1.3394521321660549, + "grad_norm": 3.6375790610975356, + "learning_rate": 1.2201423036077657e-05, + "loss": 0.6149, "step": 9486 }, { - "epoch": 1.9893059341581045, - "grad_norm": 7.080468123382386, - "learning_rate": 5.385721535676459e-06, - "loss": 0.8411, + "epoch": 1.3395933352160407, + "grad_norm": 3.8065820374636385, + "learning_rate": 1.2199935955780255e-05, + "loss": 0.6032, "step": 9487 }, { - "epoch": 1.9895156217236318, - "grad_norm": 5.651009838868676, - "learning_rate": 5.3837131859189466e-06, - "loss": 0.4571, + "epoch": 1.3397345382660266, + "grad_norm": 3.7384230161145853, + "learning_rate": 1.2198448824357292e-05, + "loss": 0.5512, "step": 9488 }, { - "epoch": 1.9897253092891591, - "grad_norm": 7.8272547683109, - "learning_rate": 5.381705072754136e-06, - "loss": 0.5843, + "epoch": 1.3398757413160125, + "grad_norm": 3.556937550206897, + "learning_rate": 1.2196961641843326e-05, + "loss": 0.658, "step": 9489 }, { - "epoch": 1.9899349968546867, - "grad_norm": 6.49177208006313, - "learning_rate": 5.3796971962849545e-06, - "loss": 0.6053, + "epoch": 1.3400169443659984, + "grad_norm": 3.5449440963912413, + "learning_rate": 1.2195474408272919e-05, + "loss": 0.6149, "step": 9490 }, { - "epoch": 1.9901446844202137, - "grad_norm": 6.859427276740151, - "learning_rate": 5.3776895566143005e-06, - "loss": 0.7397, + "epoch": 1.340158147415984, + "grad_norm": 3.6591956402124493, + "learning_rate": 1.2193987123680639e-05, + "loss": 0.5405, "step": 9491 }, { - "epoch": 1.9903543719857413, - "grad_norm": 8.95844756938858, - "learning_rate": 5.375682153845078e-06, - "loss": 0.9381, + "epoch": 1.34029935046597, + "grad_norm": 3.856805363911538, + "learning_rate": 1.2192499788101044e-05, + "loss": 0.6688, "step": 9492 }, { - "epoch": 1.9905640595512686, - "grad_norm": 5.995766473747759, - "learning_rate": 5.373674988080164e-06, - "loss": 0.5716, + "epoch": 1.3404405535159558, + "grad_norm": 3.6363421723079288, + "learning_rate": 1.2191012401568698e-05, + "loss": 0.6355, "step": 9493 }, { - "epoch": 1.9907737471167959, - "grad_norm": 6.592947171193219, - "learning_rate": 5.371668059422429e-06, - "loss": 0.658, + "epoch": 1.3405817565659417, + "grad_norm": 3.4936021069998713, + "learning_rate": 1.2189524964118174e-05, + "loss": 0.6023, "step": 9494 }, { - "epoch": 1.9909834346823234, - "grad_norm": 7.130471565893441, - "learning_rate": 5.3696613679747365e-06, - "loss": 0.7266, + "epoch": 1.3407229596159276, + "grad_norm": 3.4240243114236635, + "learning_rate": 1.2188037475784033e-05, + "loss": 0.5855, "step": 9495 }, { - "epoch": 1.9911931222478507, - "grad_norm": 9.731677914761875, - "learning_rate": 5.36765491383993e-06, - "loss": 1.1766, + "epoch": 1.3408641626659135, + "grad_norm": 3.688040769666093, + "learning_rate": 1.2186549936600847e-05, + "loss": 0.663, "step": 9496 }, { - "epoch": 1.991402809813378, - "grad_norm": 5.580330408713471, - "learning_rate": 5.36564869712084e-06, - "loss": 0.522, + "epoch": 1.3410053657158993, + "grad_norm": 3.642948190663818, + "learning_rate": 1.2185062346603184e-05, + "loss": 0.6207, "step": 9497 }, { - "epoch": 1.9916124973789056, - "grad_norm": 6.558839137341501, - "learning_rate": 5.3636427179202924e-06, - "loss": 0.6995, + "epoch": 1.3411465687658852, + "grad_norm": 2.824834396200575, + "learning_rate": 1.2183574705825618e-05, + "loss": 0.4484, "step": 9498 }, { - "epoch": 1.9918221849444326, - "grad_norm": 7.914830619142934, - "learning_rate": 5.3616369763411e-06, - "loss": 0.657, + "epoch": 1.341287771815871, + "grad_norm": 3.7750083967331434, + "learning_rate": 1.2182087014302715e-05, + "loss": 0.7176, "step": 9499 }, { - "epoch": 1.9920318725099602, - "grad_norm": 5.802051472066316, - "learning_rate": 5.359631472486057e-06, - "loss": 0.529, + "epoch": 1.341428974865857, + "grad_norm": 2.736824344403802, + "learning_rate": 1.2180599272069058e-05, + "loss": 0.5183, "step": 9500 }, { - "epoch": 1.9922415600754875, - "grad_norm": 6.144464228550104, - "learning_rate": 5.357626206457946e-06, - "loss": 0.6986, + "epoch": 1.3415701779158429, + "grad_norm": 3.847069793910702, + "learning_rate": 1.2179111479159212e-05, + "loss": 0.662, "step": 9501 }, { - "epoch": 1.9924512476410148, - "grad_norm": 6.582266032364133, - "learning_rate": 5.355621178359549e-06, - "loss": 0.6467, + "epoch": 1.3417113809658288, + "grad_norm": 3.1602452728343446, + "learning_rate": 1.2177623635607753e-05, + "loss": 0.587, "step": 9502 }, { - "epoch": 1.9926609352065423, - "grad_norm": 6.687776499674018, - "learning_rate": 5.353616388293617e-06, - "loss": 0.485, + "epoch": 1.3418525840158146, + "grad_norm": 3.0734893957851837, + "learning_rate": 1.2176135741449265e-05, + "loss": 0.513, "step": 9503 }, { - "epoch": 1.9928706227720696, - "grad_norm": 6.362933304209302, - "learning_rate": 5.351611836362907e-06, - "loss": 0.6516, + "epoch": 1.3419937870658005, + "grad_norm": 4.043516469574682, + "learning_rate": 1.2174647796718322e-05, + "loss": 0.6133, "step": 9504 }, { - "epoch": 1.993080310337597, - "grad_norm": 7.344398220094289, - "learning_rate": 5.3496075226701525e-06, - "loss": 0.6535, + "epoch": 1.3421349901157864, + "grad_norm": 3.5405752870275475, + "learning_rate": 1.2173159801449503e-05, + "loss": 0.5529, "step": 9505 }, { - "epoch": 1.9932899979031244, - "grad_norm": 5.859689075476522, - "learning_rate": 5.347603447318075e-06, - "loss": 0.5416, + "epoch": 1.3422761931657723, + "grad_norm": 4.056864171822321, + "learning_rate": 1.217167175567739e-05, + "loss": 0.7302, "step": 9506 }, { - "epoch": 1.9934996854686517, - "grad_norm": 5.985380369338966, - "learning_rate": 5.345599610409389e-06, - "loss": 0.6754, + "epoch": 1.3424173962157582, + "grad_norm": 3.591377802408437, + "learning_rate": 1.217018365943656e-05, + "loss": 0.5727, "step": 9507 }, { - "epoch": 1.993709373034179, - "grad_norm": 7.494279636468731, - "learning_rate": 5.3435960120468014e-06, - "loss": 0.827, + "epoch": 1.342558599265744, + "grad_norm": 3.2951522500026615, + "learning_rate": 1.2168695512761604e-05, + "loss": 0.6631, "step": 9508 }, { - "epoch": 1.9939190605997066, - "grad_norm": 7.187364253286043, - "learning_rate": 5.341592652332992e-06, - "loss": 0.5401, + "epoch": 1.34269980231573, + "grad_norm": 3.21489168530271, + "learning_rate": 1.2167207315687098e-05, + "loss": 0.581, "step": 9509 }, { - "epoch": 1.9941287481652337, - "grad_norm": 7.121691402763271, - "learning_rate": 5.339589531370637e-06, - "loss": 0.9629, + "epoch": 1.3428410053657158, + "grad_norm": 3.6168848446528057, + "learning_rate": 1.2165719068247626e-05, + "loss": 0.5349, "step": 9510 }, { - "epoch": 1.9943384357307612, - "grad_norm": 7.3734510550967505, - "learning_rate": 5.3375866492624e-06, - "loss": 0.6937, + "epoch": 1.3429822084157017, + "grad_norm": 2.8616998666174656, + "learning_rate": 1.2164230770477782e-05, + "loss": 0.5222, "step": 9511 }, { - "epoch": 1.9945481232962885, - "grad_norm": 8.1002367787133, - "learning_rate": 5.3355840061109435e-06, - "loss": 0.7852, + "epoch": 1.3431234114656876, + "grad_norm": 3.824965610396753, + "learning_rate": 1.216274242241215e-05, + "loss": 0.755, "step": 9512 }, { - "epoch": 1.9947578108618158, - "grad_norm": 8.492345264078343, - "learning_rate": 5.333581602018887e-06, - "loss": 0.7809, + "epoch": 1.3432646145156735, + "grad_norm": 4.215403999937383, + "learning_rate": 1.2161254024085318e-05, + "loss": 0.7723, "step": 9513 }, { - "epoch": 1.9949674984273433, - "grad_norm": 6.378506011343575, - "learning_rate": 5.331579437088867e-06, - "loss": 0.5481, + "epoch": 1.3434058175656594, + "grad_norm": 3.621464128164179, + "learning_rate": 1.2159765575531877e-05, + "loss": 0.6364, "step": 9514 }, { - "epoch": 1.9951771859928706, - "grad_norm": 8.355202408906475, - "learning_rate": 5.329577511423503e-06, - "loss": 0.9749, + "epoch": 1.3435470206156452, + "grad_norm": 3.5188310359724935, + "learning_rate": 1.2158277076786415e-05, + "loss": 0.5834, "step": 9515 }, { - "epoch": 1.995386873558398, - "grad_norm": 6.531685533871646, - "learning_rate": 5.327575825125391e-06, - "loss": 0.5367, + "epoch": 1.3436882236656311, + "grad_norm": 3.793689224665667, + "learning_rate": 1.2156788527883524e-05, + "loss": 0.6081, "step": 9516 }, { - "epoch": 1.9955965611239255, - "grad_norm": 6.219300373022308, - "learning_rate": 5.325574378297118e-06, - "loss": 0.5536, + "epoch": 1.343829426715617, + "grad_norm": 3.1310544953142605, + "learning_rate": 1.2155299928857803e-05, + "loss": 0.4742, "step": 9517 }, { - "epoch": 1.9958062486894526, - "grad_norm": 7.491805274937188, - "learning_rate": 5.3235731710412695e-06, - "loss": 0.795, + "epoch": 1.3439706297656029, + "grad_norm": 3.2075901555208857, + "learning_rate": 1.2153811279743841e-05, + "loss": 0.4664, "step": 9518 }, { - "epoch": 1.99601593625498, - "grad_norm": 5.109495819920621, - "learning_rate": 5.321572203460403e-06, - "loss": 0.4679, + "epoch": 1.3441118328155888, + "grad_norm": 3.70370555178042, + "learning_rate": 1.2152322580576232e-05, + "loss": 0.5163, "step": 9519 }, { - "epoch": 1.9962256238205074, - "grad_norm": 8.200383703092333, - "learning_rate": 5.31957147565708e-06, - "loss": 0.6264, + "epoch": 1.3442530358655747, + "grad_norm": 3.250533588515463, + "learning_rate": 1.215083383138958e-05, + "loss": 0.5344, "step": 9520 }, { - "epoch": 1.9964353113860347, - "grad_norm": 6.5908987510039125, - "learning_rate": 5.317570987733836e-06, - "loss": 0.6694, + "epoch": 1.3443942389155605, + "grad_norm": 3.3894286347363045, + "learning_rate": 1.2149345032218476e-05, + "loss": 0.5855, "step": 9521 }, { - "epoch": 1.9966449989515622, - "grad_norm": 8.0699981547914, - "learning_rate": 5.315570739793194e-06, - "loss": 0.7175, + "epoch": 1.3445354419655464, + "grad_norm": 3.6396430476646104, + "learning_rate": 1.2147856183097524e-05, + "loss": 0.5681, "step": 9522 }, { - "epoch": 1.9968546865170895, - "grad_norm": 6.843659335835738, - "learning_rate": 5.313570731937679e-06, - "loss": 0.5744, + "epoch": 1.3446766450155323, + "grad_norm": 3.3830036673809336, + "learning_rate": 1.214636728406132e-05, + "loss": 0.4724, "step": 9523 }, { - "epoch": 1.9970643740826168, - "grad_norm": 6.540647539926049, - "learning_rate": 5.311570964269796e-06, - "loss": 0.7388, + "epoch": 1.3448178480655182, + "grad_norm": 3.5911779114940634, + "learning_rate": 1.2144878335144469e-05, + "loss": 0.6269, "step": 9524 }, { - "epoch": 1.9972740616481444, - "grad_norm": 7.399494533194387, - "learning_rate": 5.3095714368920325e-06, - "loss": 0.7449, + "epoch": 1.344959051115504, + "grad_norm": 3.773120524563539, + "learning_rate": 1.214338933638157e-05, + "loss": 0.6615, "step": 9525 }, { - "epoch": 1.9974837492136717, - "grad_norm": 7.474271122216735, - "learning_rate": 5.3075721499068634e-06, - "loss": 0.679, + "epoch": 1.34510025416549, + "grad_norm": 3.9252476929937816, + "learning_rate": 1.214190028780723e-05, + "loss": 0.5989, "step": 9526 }, { - "epoch": 1.997693436779199, - "grad_norm": 7.012617566297008, - "learning_rate": 5.305573103416766e-06, - "loss": 0.8424, + "epoch": 1.3452414572154758, + "grad_norm": 4.526512144068702, + "learning_rate": 1.2140411189456049e-05, + "loss": 0.7814, "step": 9527 }, { - "epoch": 1.9979031243447265, - "grad_norm": 6.1609432156821935, - "learning_rate": 5.3035742975241836e-06, - "loss": 0.4282, + "epoch": 1.3453826602654617, + "grad_norm": 3.5181255584299254, + "learning_rate": 1.213892204136264e-05, + "loss": 0.5874, "step": 9528 }, { - "epoch": 1.9981128119102536, - "grad_norm": 5.870847821874881, - "learning_rate": 5.3015757323315675e-06, - "loss": 0.4788, + "epoch": 1.3455238633154476, + "grad_norm": 3.5823587304544557, + "learning_rate": 1.2137432843561602e-05, + "loss": 0.7258, "step": 9529 }, { - "epoch": 1.9983224994757811, - "grad_norm": 7.395338042292884, - "learning_rate": 5.299577407941345e-06, - "loss": 0.6664, + "epoch": 1.3456650663654335, + "grad_norm": 3.173298931162847, + "learning_rate": 1.2135943596087554e-05, + "loss": 0.4945, "step": 9530 }, { - "epoch": 1.9985321870413084, - "grad_norm": 9.68710053816261, - "learning_rate": 5.297579324455928e-06, - "loss": 1.1013, + "epoch": 1.3458062694154194, + "grad_norm": 3.0909190647424603, + "learning_rate": 1.2134454298975096e-05, + "loss": 0.4773, "step": 9531 }, { - "epoch": 1.9987418746068357, - "grad_norm": 8.797282530913737, - "learning_rate": 5.295581481977727e-06, - "loss": 0.6876, + "epoch": 1.3459474724654052, + "grad_norm": 4.557325221355176, + "learning_rate": 1.2132964952258841e-05, + "loss": 0.7261, "step": 9532 }, { - "epoch": 1.9989515621723632, - "grad_norm": 8.530586612719365, - "learning_rate": 5.293583880609143e-06, - "loss": 0.7476, + "epoch": 1.3460886755153911, + "grad_norm": 2.9188963538333317, + "learning_rate": 1.2131475555973403e-05, + "loss": 0.5147, "step": 9533 }, { - "epoch": 1.9991612497378906, - "grad_norm": 6.792998468077389, - "learning_rate": 5.291586520452541e-06, - "loss": 0.5898, + "epoch": 1.346229878565377, + "grad_norm": 3.4002038474717007, + "learning_rate": 1.2129986110153395e-05, + "loss": 0.6207, "step": 9534 }, { - "epoch": 1.9993709373034179, - "grad_norm": 7.300385259550495, - "learning_rate": 5.289589401610293e-06, - "loss": 0.6956, + "epoch": 1.346371081615363, + "grad_norm": 3.9747152147812397, + "learning_rate": 1.2128496614833427e-05, + "loss": 0.6869, "step": 9535 }, { - "epoch": 1.9995806248689454, - "grad_norm": 7.400259138093475, - "learning_rate": 5.2875925241847645e-06, - "loss": 0.6737, + "epoch": 1.3465122846653488, + "grad_norm": 4.008302576254742, + "learning_rate": 1.2127007070048117e-05, + "loss": 0.6335, "step": 9536 }, { - "epoch": 1.9997903124344725, - "grad_norm": 6.629400462192665, - "learning_rate": 5.285595888278292e-06, - "loss": 0.6111, + "epoch": 1.3466534877153347, + "grad_norm": 3.8891035007349144, + "learning_rate": 1.2125517475832082e-05, + "loss": 0.6671, "step": 9537 }, { - "epoch": 2.0, - "grad_norm": 5.767978948009052, - "learning_rate": 5.283599493993202e-06, - "loss": 0.5602, + "epoch": 1.3467946907653205, + "grad_norm": 3.6055300176029235, + "learning_rate": 1.2124027832219942e-05, + "loss": 0.648, "step": 9538 }, { - "epoch": 2.0002096875655275, - "grad_norm": 3.598477005270664, - "learning_rate": 5.281603341431822e-06, - "loss": 0.2145, + "epoch": 1.3469358938153064, + "grad_norm": 4.043433125870707, + "learning_rate": 1.2122538139246308e-05, + "loss": 0.6794, "step": 9539 }, { - "epoch": 2.0004193751310546, - "grad_norm": 3.533707912011714, - "learning_rate": 5.279607430696452e-06, - "loss": 0.2322, + "epoch": 1.3470770968652923, + "grad_norm": 3.9877474671700477, + "learning_rate": 1.2121048396945807e-05, + "loss": 0.738, "step": 9540 }, { - "epoch": 2.000629062696582, - "grad_norm": 3.9390980060928014, - "learning_rate": 5.277611761889392e-06, - "loss": 0.1959, + "epoch": 1.3472182999152782, + "grad_norm": 3.8026822932372704, + "learning_rate": 1.2119558605353055e-05, + "loss": 0.6718, "step": 9541 }, { - "epoch": 2.0008387502621097, - "grad_norm": 3.4720062287390703, - "learning_rate": 5.275616335112918e-06, - "loss": 0.1642, + "epoch": 1.347359502965264, + "grad_norm": 3.217331994605831, + "learning_rate": 1.2118068764502677e-05, + "loss": 0.4566, "step": 9542 }, { - "epoch": 2.0010484378276368, - "grad_norm": 3.2003169542283163, - "learning_rate": 5.273621150469299e-06, - "loss": 0.1834, + "epoch": 1.34750070601525, + "grad_norm": 3.5242429495015286, + "learning_rate": 1.2116578874429296e-05, + "loss": 0.5035, "step": 9543 }, { - "epoch": 2.0012581253931643, - "grad_norm": 4.379293570544043, - "learning_rate": 5.271626208060791e-06, - "loss": 0.2045, + "epoch": 1.3476419090652358, + "grad_norm": 3.9053960290713903, + "learning_rate": 1.2115088935167538e-05, + "loss": 0.6263, "step": 9544 }, { - "epoch": 2.0014678129586914, - "grad_norm": 3.9151749958400313, - "learning_rate": 5.269631507989647e-06, - "loss": 0.1967, + "epoch": 1.3477831121152217, + "grad_norm": 4.42878320311865, + "learning_rate": 1.2113598946752024e-05, + "loss": 0.7508, "step": 9545 }, { - "epoch": 2.001677500524219, - "grad_norm": 3.7454938122245536, - "learning_rate": 5.267637050358092e-06, - "loss": 0.2195, + "epoch": 1.3479243151652076, + "grad_norm": 3.7035363290646934, + "learning_rate": 1.2112108909217386e-05, + "loss": 0.5638, "step": 9546 }, { - "epoch": 2.0018871880897464, - "grad_norm": 4.391417624413541, - "learning_rate": 5.265642835268343e-06, - "loss": 0.254, + "epoch": 1.3480655182151935, + "grad_norm": 3.3943791876491387, + "learning_rate": 1.2110618822598244e-05, + "loss": 0.533, "step": 9547 }, { - "epoch": 2.0020968756552735, - "grad_norm": 3.7194747169797924, - "learning_rate": 5.2636488628226125e-06, - "loss": 0.1743, + "epoch": 1.3482067212651794, + "grad_norm": 3.1301372543178565, + "learning_rate": 1.2109128686929235e-05, + "loss": 0.5697, "step": 9548 }, { - "epoch": 2.002306563220801, - "grad_norm": 3.2097374997450947, - "learning_rate": 5.261655133123089e-06, - "loss": 0.1752, + "epoch": 1.3483479243151653, + "grad_norm": 3.8403506783955375, + "learning_rate": 1.2107638502244987e-05, + "loss": 0.5298, "step": 9549 }, { - "epoch": 2.0025162507863286, - "grad_norm": 2.915400488410725, - "learning_rate": 5.259661646271962e-06, - "loss": 0.152, + "epoch": 1.3484891273651511, + "grad_norm": 3.119745327035719, + "learning_rate": 1.210614826858013e-05, + "loss": 0.5531, "step": 9550 }, { - "epoch": 2.0027259383518556, - "grad_norm": 3.4782649504915537, - "learning_rate": 5.257668402371398e-06, - "loss": 0.2002, + "epoch": 1.348630330415137, + "grad_norm": 3.648846785896331, + "learning_rate": 1.2104657985969297e-05, + "loss": 0.5876, "step": 9551 }, { - "epoch": 2.002935625917383, - "grad_norm": 3.226478019123892, - "learning_rate": 5.255675401523549e-06, - "loss": 0.1956, + "epoch": 1.348771533465123, + "grad_norm": 2.8929726323726164, + "learning_rate": 1.2103167654447121e-05, + "loss": 0.4608, "step": 9552 }, { - "epoch": 2.0031453134829103, - "grad_norm": 2.49423828490175, - "learning_rate": 5.253682643830564e-06, - "loss": 0.1435, + "epoch": 1.3489127365151088, + "grad_norm": 3.544931281620047, + "learning_rate": 1.2101677274048235e-05, + "loss": 0.5945, "step": 9553 }, { - "epoch": 2.0033550010484378, - "grad_norm": 3.1133456986312553, - "learning_rate": 5.2516901293945845e-06, - "loss": 0.1664, + "epoch": 1.3490539395650947, + "grad_norm": 3.456959518184993, + "learning_rate": 1.210018684480728e-05, + "loss": 0.5418, "step": 9554 }, { - "epoch": 2.0035646886139653, - "grad_norm": 2.8309237238346334, - "learning_rate": 5.249697858317713e-06, - "loss": 0.1373, + "epoch": 1.3491951426150806, + "grad_norm": 3.886402380303061, + "learning_rate": 1.209869636675889e-05, + "loss": 0.6027, "step": 9555 }, { - "epoch": 2.0037743761794924, - "grad_norm": 3.3596507953111403, - "learning_rate": 5.247705830702065e-06, - "loss": 0.1886, + "epoch": 1.3493363456650664, + "grad_norm": 3.3387055498435543, + "learning_rate": 1.20972058399377e-05, + "loss": 0.5495, "step": 9556 }, { - "epoch": 2.00398406374502, - "grad_norm": 2.9423103088570683, - "learning_rate": 5.2457140466497394e-06, - "loss": 0.159, + "epoch": 1.3494775487150523, + "grad_norm": 3.958090957148684, + "learning_rate": 1.2095715264378354e-05, + "loss": 0.5441, "step": 9557 }, { - "epoch": 2.0041937513105474, - "grad_norm": 3.5576378153921007, - "learning_rate": 5.243722506262815e-06, - "loss": 0.1744, + "epoch": 1.3496187517650382, + "grad_norm": 3.612642290091949, + "learning_rate": 1.2094224640115488e-05, + "loss": 0.504, "step": 9558 }, { - "epoch": 2.0044034388760745, - "grad_norm": 3.5724011372558224, - "learning_rate": 5.241731209643357e-06, - "loss": 0.1909, + "epoch": 1.349759954815024, + "grad_norm": 3.540389752631854, + "learning_rate": 1.2092733967183748e-05, + "loss": 0.6273, "step": 9559 }, { - "epoch": 2.004613126441602, - "grad_norm": 2.922771159607419, - "learning_rate": 5.239740156893432e-06, - "loss": 0.1276, + "epoch": 1.34990115786501, + "grad_norm": 3.5153059835275795, + "learning_rate": 1.2091243245617774e-05, + "loss": 0.5221, "step": 9560 }, { - "epoch": 2.0048228140071296, - "grad_norm": 4.05732032089272, - "learning_rate": 5.237749348115077e-06, - "loss": 0.2023, + "epoch": 1.3500423609149959, + "grad_norm": 4.8231698925566935, + "learning_rate": 1.208975247545221e-05, + "loss": 0.5928, "step": 9561 }, { - "epoch": 2.0050325015726567, - "grad_norm": 4.255949172512091, - "learning_rate": 5.23575878341033e-06, - "loss": 0.2401, + "epoch": 1.3501835639649817, + "grad_norm": 3.62756304152625, + "learning_rate": 1.20882616567217e-05, + "loss": 0.6213, "step": 9562 }, { - "epoch": 2.005242189138184, - "grad_norm": 4.236095703928406, - "learning_rate": 5.233768462881209e-06, - "loss": 0.1943, + "epoch": 1.3503247670149676, + "grad_norm": 3.3866522704679576, + "learning_rate": 1.2086770789460893e-05, + "loss": 0.6137, "step": 9563 }, { - "epoch": 2.0054518767037113, - "grad_norm": 3.7567133654819527, - "learning_rate": 5.231778386629718e-06, - "loss": 0.1779, + "epoch": 1.3504659700649535, + "grad_norm": 3.2778297241943672, + "learning_rate": 1.2085279873704433e-05, + "loss": 0.6035, "step": 9564 }, { - "epoch": 2.005661564269239, - "grad_norm": 4.030923733611952, - "learning_rate": 5.229788554757853e-06, - "loss": 0.1835, + "epoch": 1.3506071731149394, + "grad_norm": 4.092318074083094, + "learning_rate": 1.2083788909486968e-05, + "loss": 0.7384, "step": 9565 }, { - "epoch": 2.0058712518347663, - "grad_norm": 4.397573243341452, - "learning_rate": 5.227798967367603e-06, - "loss": 0.2215, + "epoch": 1.3507483761649253, + "grad_norm": 4.1116713090401875, + "learning_rate": 1.208229789684315e-05, + "loss": 0.7017, "step": 9566 }, { - "epoch": 2.0060809394002934, - "grad_norm": 3.5191453731177518, - "learning_rate": 5.225809624560933e-06, - "loss": 0.1435, + "epoch": 1.3508895792149112, + "grad_norm": 3.7660344721301158, + "learning_rate": 1.2080806835807627e-05, + "loss": 0.557, "step": 9567 }, { - "epoch": 2.006290626965821, - "grad_norm": 3.239360549296363, - "learning_rate": 5.223820526439796e-06, - "loss": 0.1657, + "epoch": 1.351030782264897, + "grad_norm": 3.8051255262347916, + "learning_rate": 1.2079315726415053e-05, + "loss": 0.6113, "step": 9568 }, { - "epoch": 2.0065003145313485, - "grad_norm": 3.9533902462593975, - "learning_rate": 5.221831673106146e-06, - "loss": 0.1784, + "epoch": 1.351171985314883, + "grad_norm": 4.092283924453941, + "learning_rate": 1.2077824568700081e-05, + "loss": 0.579, "step": 9569 }, { - "epoch": 2.0067100020968756, - "grad_norm": 4.150861028379902, - "learning_rate": 5.2198430646619045e-06, - "loss": 0.2011, + "epoch": 1.3513131883648688, + "grad_norm": 3.8555923042832156, + "learning_rate": 1.2076333362697358e-05, + "loss": 0.5777, "step": 9570 }, { - "epoch": 2.006919689662403, - "grad_norm": 2.589652690827136, - "learning_rate": 5.217854701209001e-06, - "loss": 0.1212, + "epoch": 1.3514543914148547, + "grad_norm": 3.83965775290258, + "learning_rate": 1.2074842108441549e-05, + "loss": 0.7044, "step": 9571 }, { - "epoch": 2.00712937722793, - "grad_norm": 2.9636329584249, - "learning_rate": 5.215866582849334e-06, - "loss": 0.1359, + "epoch": 1.3515955944648406, + "grad_norm": 3.2322568850720432, + "learning_rate": 1.2073350805967308e-05, + "loss": 0.5628, "step": 9572 }, { - "epoch": 2.0073390647934577, - "grad_norm": 3.119259996002108, - "learning_rate": 5.2138787096848066e-06, - "loss": 0.1286, + "epoch": 1.3517367975148264, + "grad_norm": 3.6840572335469775, + "learning_rate": 1.2071859455309283e-05, + "loss": 0.6861, "step": 9573 }, { - "epoch": 2.0075487523589852, - "grad_norm": 3.1734878688501724, - "learning_rate": 5.211891081817297e-06, - "loss": 0.1376, + "epoch": 1.3518780005648123, + "grad_norm": 2.7052780743796947, + "learning_rate": 1.2070368056502142e-05, + "loss": 0.4357, "step": 9574 }, { - "epoch": 2.0077584399245123, - "grad_norm": 4.126856300646213, - "learning_rate": 5.209903699348668e-06, - "loss": 0.1677, + "epoch": 1.3520192036147982, + "grad_norm": 3.3132526904086532, + "learning_rate": 1.2068876609580542e-05, + "loss": 0.519, "step": 9575 }, { - "epoch": 2.00796812749004, - "grad_norm": 3.3645318805528084, - "learning_rate": 5.207916562380789e-06, - "loss": 0.1249, + "epoch": 1.352160406664784, + "grad_norm": 2.649418849941158, + "learning_rate": 1.2067385114579144e-05, + "loss": 0.4213, "step": 9576 }, { - "epoch": 2.0081778150555674, - "grad_norm": 3.515844320237842, - "learning_rate": 5.205929671015492e-06, - "loss": 0.174, + "epoch": 1.3523016097147698, + "grad_norm": 3.423730960299369, + "learning_rate": 1.2065893571532608e-05, + "loss": 0.6404, "step": 9577 }, { - "epoch": 2.0083875026210944, - "grad_norm": 3.297452062501551, - "learning_rate": 5.20394302535462e-06, - "loss": 0.1168, + "epoch": 1.3524428127647556, + "grad_norm": 3.0746884448413363, + "learning_rate": 1.2064401980475595e-05, + "loss": 0.5439, "step": 9578 }, { - "epoch": 2.008597190186622, - "grad_norm": 3.5917045677727297, - "learning_rate": 5.201956625499984e-06, - "loss": 0.1512, + "epoch": 1.3525840158147415, + "grad_norm": 3.965427070319809, + "learning_rate": 1.2062910341442772e-05, + "loss": 0.4749, "step": 9579 }, { - "epoch": 2.0088068777521495, - "grad_norm": 4.16183215891395, - "learning_rate": 5.199970471553391e-06, - "loss": 0.1607, + "epoch": 1.3527252188647274, + "grad_norm": 4.650046649905797, + "learning_rate": 1.2061418654468808e-05, + "loss": 0.8024, "step": 9580 }, { - "epoch": 2.0090165653176766, - "grad_norm": 3.5389679135331074, - "learning_rate": 5.197984563616635e-06, - "loss": 0.1506, + "epoch": 1.3528664219147133, + "grad_norm": 3.691678769830578, + "learning_rate": 1.2059926919588362e-05, + "loss": 0.517, "step": 9581 }, { - "epoch": 2.009226252883204, - "grad_norm": 3.638484210935406, - "learning_rate": 5.195998901791503e-06, - "loss": 0.1664, + "epoch": 1.3530076249646992, + "grad_norm": 4.12070737124535, + "learning_rate": 1.2058435136836101e-05, + "loss": 0.7345, "step": 9582 }, { - "epoch": 2.009435940448731, - "grad_norm": 3.903128448172513, - "learning_rate": 5.19401348617976e-06, - "loss": 0.1511, + "epoch": 1.353148828014685, + "grad_norm": 2.6607546974361256, + "learning_rate": 1.20569433062467e-05, + "loss": 0.4608, "step": 9583 }, { - "epoch": 2.0096456280142587, - "grad_norm": 4.753730943679167, - "learning_rate": 5.1920283168831574e-06, - "loss": 0.1572, + "epoch": 1.353290031064671, + "grad_norm": 3.206979913276047, + "learning_rate": 1.2055451427854825e-05, + "loss": 0.5592, "step": 9584 }, { - "epoch": 2.0098553155797863, - "grad_norm": 6.516551821184562, - "learning_rate": 5.190043394003446e-06, - "loss": 0.2198, + "epoch": 1.3534312341146568, + "grad_norm": 3.4708931668316048, + "learning_rate": 1.2053959501695144e-05, + "loss": 0.5813, "step": 9585 }, { - "epoch": 2.0100650031453133, - "grad_norm": 3.79207372958416, - "learning_rate": 5.188058717642348e-06, - "loss": 0.1632, + "epoch": 1.3535724371646427, + "grad_norm": 4.150168831451216, + "learning_rate": 1.2052467527802337e-05, + "loss": 0.6581, "step": 9586 }, { - "epoch": 2.010274690710841, - "grad_norm": 4.551769207676404, - "learning_rate": 5.186074287901593e-06, - "loss": 0.1667, + "epoch": 1.3537136402146286, + "grad_norm": 4.071220790020379, + "learning_rate": 1.2050975506211064e-05, + "loss": 0.6702, "step": 9587 }, { - "epoch": 2.0104843782763684, - "grad_norm": 3.657065323431733, - "learning_rate": 5.184090104882878e-06, - "loss": 0.1747, + "epoch": 1.3538548432646145, + "grad_norm": 3.185381947965574, + "learning_rate": 1.2049483436956009e-05, + "loss": 0.4974, "step": 9588 }, { - "epoch": 2.0106940658418955, - "grad_norm": 3.4201333464222854, - "learning_rate": 5.182106168687895e-06, - "loss": 0.1429, + "epoch": 1.3539960463146004, + "grad_norm": 3.3874929161237968, + "learning_rate": 1.2047991320071846e-05, + "loss": 0.6607, "step": 9589 }, { - "epoch": 2.010903753407423, - "grad_norm": 4.005225802675114, - "learning_rate": 5.180122479418328e-06, - "loss": 0.1998, + "epoch": 1.3541372493645862, + "grad_norm": 3.8498809519898094, + "learning_rate": 1.2046499155593245e-05, + "loss": 0.5391, "step": 9590 }, { - "epoch": 2.01111344097295, - "grad_norm": 3.2020629341771842, - "learning_rate": 5.17813903717585e-06, - "loss": 0.1196, + "epoch": 1.3542784524145721, + "grad_norm": 3.6170898665804576, + "learning_rate": 1.2045006943554888e-05, + "loss": 0.5916, "step": 9591 }, { - "epoch": 2.0113231285384776, - "grad_norm": 4.431591184129306, - "learning_rate": 5.176155842062101e-06, - "loss": 0.1832, + "epoch": 1.354419655464558, + "grad_norm": 3.4111803866828514, + "learning_rate": 1.2043514683991455e-05, + "loss": 0.6185, "step": 9592 }, { - "epoch": 2.011532816104005, - "grad_norm": 3.2884369822231387, - "learning_rate": 5.174172894178733e-06, - "loss": 0.1653, + "epoch": 1.3545608585145439, + "grad_norm": 3.8404482081581612, + "learning_rate": 1.2042022376937623e-05, + "loss": 0.6198, "step": 9593 }, { - "epoch": 2.0117425036695322, - "grad_norm": 4.546683063257566, - "learning_rate": 5.1721901936273775e-06, - "loss": 0.1459, + "epoch": 1.3547020615645298, + "grad_norm": 3.3711475659094567, + "learning_rate": 1.2040530022428074e-05, + "loss": 0.6063, "step": 9594 }, { - "epoch": 2.0119521912350598, - "grad_norm": 3.9704349508650614, - "learning_rate": 5.1702077405096485e-06, - "loss": 0.1487, + "epoch": 1.3548432646145157, + "grad_norm": 3.018159802003804, + "learning_rate": 1.2039037620497486e-05, + "loss": 0.5202, "step": 9595 }, { - "epoch": 2.0121618788005873, - "grad_norm": 3.705160865880354, - "learning_rate": 5.168225534927145e-06, - "loss": 0.1258, + "epoch": 1.3549844676645015, + "grad_norm": 3.875906756022677, + "learning_rate": 1.2037545171180545e-05, + "loss": 0.5542, "step": 9596 }, { - "epoch": 2.0123715663661144, - "grad_norm": 4.843432841377177, - "learning_rate": 5.1662435769814675e-06, - "loss": 0.1806, + "epoch": 1.3551256707144874, + "grad_norm": 3.265397958477719, + "learning_rate": 1.2036052674511935e-05, + "loss": 0.5538, "step": 9597 }, { - "epoch": 2.012581253931642, - "grad_norm": 4.586769806280941, - "learning_rate": 5.1642618667741875e-06, - "loss": 0.1645, + "epoch": 1.3552668737644733, + "grad_norm": 3.0044130476694453, + "learning_rate": 1.2034560130526341e-05, + "loss": 0.6045, "step": 9598 }, { - "epoch": 2.0127909414971694, - "grad_norm": 3.0805542999739552, - "learning_rate": 5.162280404406877e-06, - "loss": 0.1414, + "epoch": 1.3554080768144592, + "grad_norm": 3.854163388616594, + "learning_rate": 1.2033067539258445e-05, + "loss": 0.5719, "step": 9599 }, { - "epoch": 2.0130006290626965, - "grad_norm": 3.656694495761225, - "learning_rate": 5.160299189981086e-06, - "loss": 0.135, + "epoch": 1.355549279864445, + "grad_norm": 3.180140753909663, + "learning_rate": 1.203157490074294e-05, + "loss": 0.5658, "step": 9600 }, { - "epoch": 2.013210316628224, - "grad_norm": 3.827620723898737, - "learning_rate": 5.158318223598351e-06, - "loss": 0.1267, + "epoch": 1.355690482914431, + "grad_norm": 3.952477447384529, + "learning_rate": 1.2030082215014512e-05, + "loss": 0.6382, "step": 9601 }, { - "epoch": 2.013420004193751, - "grad_norm": 4.397724670627419, - "learning_rate": 5.156337505360205e-06, - "loss": 0.1492, + "epoch": 1.3558316859644168, + "grad_norm": 4.983977996624493, + "learning_rate": 1.2028589482107847e-05, + "loss": 0.9133, "step": 9602 }, { - "epoch": 2.0136296917592786, - "grad_norm": 5.218183819209082, - "learning_rate": 5.154357035368167e-06, - "loss": 0.193, + "epoch": 1.3559728890144027, + "grad_norm": 4.5326877660431215, + "learning_rate": 1.2027096702057642e-05, + "loss": 0.6255, "step": 9603 }, { - "epoch": 2.013839379324806, - "grad_norm": 4.0734098527295295, - "learning_rate": 5.152376813723736e-06, - "loss": 0.1749, + "epoch": 1.3561140920643886, + "grad_norm": 3.271977635469051, + "learning_rate": 1.2025603874898582e-05, + "loss": 0.5985, "step": 9604 }, { - "epoch": 2.0140490668903333, - "grad_norm": 3.7934669970046246, - "learning_rate": 5.150396840528396e-06, - "loss": 0.1183, + "epoch": 1.3562552951143745, + "grad_norm": 3.3828582566653664, + "learning_rate": 1.2024111000665364e-05, + "loss": 0.6167, "step": 9605 }, { - "epoch": 2.014258754455861, - "grad_norm": 4.311910532620614, - "learning_rate": 5.148417115883634e-06, - "loss": 0.1438, + "epoch": 1.3563964981643604, + "grad_norm": 3.320390225738393, + "learning_rate": 1.2022618079392683e-05, + "loss": 0.5289, "step": 9606 }, { - "epoch": 2.0144684420213883, - "grad_norm": 3.6661508302291677, - "learning_rate": 5.146437639890904e-06, - "loss": 0.1415, + "epoch": 1.3565377012143462, + "grad_norm": 3.602871716011303, + "learning_rate": 1.2021125111115226e-05, + "loss": 0.6812, "step": 9607 }, { - "epoch": 2.0146781295869154, - "grad_norm": 4.233076823204496, - "learning_rate": 5.14445841265167e-06, - "loss": 0.1784, + "epoch": 1.3566789042643321, + "grad_norm": 3.671038619851102, + "learning_rate": 1.2019632095867697e-05, + "loss": 0.6355, "step": 9608 }, { - "epoch": 2.014887817152443, - "grad_norm": 4.024010762906287, - "learning_rate": 5.14247943426736e-06, - "loss": 0.1484, + "epoch": 1.356820107314318, + "grad_norm": 3.5070214707993985, + "learning_rate": 1.2018139033684792e-05, + "loss": 0.6224, "step": 9609 }, { - "epoch": 2.0150975047179704, - "grad_norm": 3.3112016937504576, - "learning_rate": 5.140500704839401e-06, - "loss": 0.1086, + "epoch": 1.356961310364304, + "grad_norm": 3.159328657828265, + "learning_rate": 1.2016645924601207e-05, + "loss": 0.5248, "step": 9610 }, { - "epoch": 2.0153071922834975, - "grad_norm": 7.712004509953738, - "learning_rate": 5.1385222244692115e-06, - "loss": 0.1419, + "epoch": 1.3571025134142898, + "grad_norm": 3.57104889441666, + "learning_rate": 1.2015152768651639e-05, + "loss": 0.6037, "step": 9611 }, { - "epoch": 2.015516879849025, - "grad_norm": 4.648598272183988, - "learning_rate": 5.1365439932581965e-06, - "loss": 0.1805, + "epoch": 1.3572437164642757, + "grad_norm": 3.862294965345124, + "learning_rate": 1.2013659565870795e-05, + "loss": 0.5974, "step": 9612 }, { - "epoch": 2.015726567414552, - "grad_norm": 2.771395711399631, - "learning_rate": 5.134566011307729e-06, - "loss": 0.1147, + "epoch": 1.3573849195142615, + "grad_norm": 2.951118723195497, + "learning_rate": 1.2012166316293368e-05, + "loss": 0.4768, "step": 9613 }, { - "epoch": 2.0159362549800797, - "grad_norm": 3.6040552159791424, - "learning_rate": 5.132588278719193e-06, - "loss": 0.0988, + "epoch": 1.3575261225642474, + "grad_norm": 3.2488337533779403, + "learning_rate": 1.201067301995407e-05, + "loss": 0.5017, "step": 9614 }, { - "epoch": 2.016145942545607, - "grad_norm": 4.810532742168623, - "learning_rate": 5.130610795593952e-06, - "loss": 0.1422, + "epoch": 1.3576673256142333, + "grad_norm": 3.5839883674653055, + "learning_rate": 1.2009179676887595e-05, + "loss": 0.6074, "step": 9615 }, { - "epoch": 2.0163556301111343, - "grad_norm": 4.10575577449865, - "learning_rate": 5.1286335620333535e-06, - "loss": 0.1617, + "epoch": 1.3578085286642192, + "grad_norm": 3.872031407575974, + "learning_rate": 1.200768628712865e-05, + "loss": 0.6949, "step": 9616 }, { - "epoch": 2.016565317676662, - "grad_norm": 4.153823863048835, - "learning_rate": 5.1266565781387315e-06, - "loss": 0.152, + "epoch": 1.357949731714205, + "grad_norm": 3.7792197243867713, + "learning_rate": 1.2006192850711947e-05, + "loss": 0.5557, "step": 9617 }, { - "epoch": 2.0167750052421893, - "grad_norm": 2.791959776811991, - "learning_rate": 5.124679844011415e-06, - "loss": 0.0966, + "epoch": 1.358090934764191, + "grad_norm": 3.140092643543506, + "learning_rate": 1.2004699367672187e-05, + "loss": 0.5139, "step": 9618 }, { - "epoch": 2.0169846928077164, - "grad_norm": 3.4449761606579012, - "learning_rate": 5.1227033597527075e-06, - "loss": 0.1271, + "epoch": 1.3582321378141768, + "grad_norm": 3.4205135141860383, + "learning_rate": 1.200320583804408e-05, + "loss": 0.6581, "step": 9619 }, { - "epoch": 2.017194380373244, - "grad_norm": 5.433131439449977, - "learning_rate": 5.120727125463918e-06, - "loss": 0.1284, + "epoch": 1.3583733408641627, + "grad_norm": 3.325379728959165, + "learning_rate": 1.2001712261862335e-05, + "loss": 0.5607, "step": 9620 }, { - "epoch": 2.017404067938771, - "grad_norm": 4.168742365151846, - "learning_rate": 5.118751141246325e-06, - "loss": 0.1673, + "epoch": 1.3585145439141486, + "grad_norm": 3.8533916500499568, + "learning_rate": 1.200021863916166e-05, + "loss": 0.6201, "step": 9621 }, { - "epoch": 2.0176137555042986, - "grad_norm": 3.901538213960578, - "learning_rate": 5.1167754072011976e-06, - "loss": 0.1569, + "epoch": 1.3586557469641345, + "grad_norm": 2.8772616894553695, + "learning_rate": 1.1998724969976767e-05, + "loss": 0.4814, "step": 9622 }, { - "epoch": 2.017823443069826, - "grad_norm": 3.69980330390423, - "learning_rate": 5.114799923429802e-06, - "loss": 0.1466, + "epoch": 1.3587969500141204, + "grad_norm": 3.3077896887816465, + "learning_rate": 1.1997231254342373e-05, + "loss": 0.588, "step": 9623 }, { - "epoch": 2.018033130635353, - "grad_norm": 3.67441934422368, - "learning_rate": 5.112824690033386e-06, - "loss": 0.1707, + "epoch": 1.3589381530641063, + "grad_norm": 3.480748247466281, + "learning_rate": 1.1995737492293183e-05, + "loss": 0.5331, "step": 9624 }, { - "epoch": 2.0182428182008807, - "grad_norm": 4.336406741655112, - "learning_rate": 5.110849707113182e-06, - "loss": 0.167, + "epoch": 1.3590793561140921, + "grad_norm": 3.2350781462675577, + "learning_rate": 1.1994243683863917e-05, + "loss": 0.5811, "step": 9625 }, { - "epoch": 2.0184525057664082, - "grad_norm": 4.513238140846731, - "learning_rate": 5.108874974770404e-06, - "loss": 0.1787, + "epoch": 1.359220559164078, + "grad_norm": 3.5556758783072007, + "learning_rate": 1.199274982908929e-05, + "loss": 0.5822, "step": 9626 }, { - "epoch": 2.0186621933319353, - "grad_norm": 3.571342308267117, - "learning_rate": 5.106900493106272e-06, - "loss": 0.1363, + "epoch": 1.359361762214064, + "grad_norm": 3.6454018586939148, + "learning_rate": 1.1991255928004017e-05, + "loss": 0.6955, "step": 9627 }, { - "epoch": 2.018871880897463, - "grad_norm": 3.086493993330371, - "learning_rate": 5.1049262622219716e-06, - "loss": 0.1429, + "epoch": 1.3595029652640496, + "grad_norm": 3.138915238883516, + "learning_rate": 1.1989761980642816e-05, + "loss": 0.5355, "step": 9628 }, { - "epoch": 2.0190815684629904, - "grad_norm": 4.210058631020082, - "learning_rate": 5.102952282218696e-06, - "loss": 0.2052, + "epoch": 1.3596441683140355, + "grad_norm": 3.807185563231134, + "learning_rate": 1.1988267987040407e-05, + "loss": 0.6903, "step": 9629 }, { - "epoch": 2.0192912560285174, - "grad_norm": 3.602835000121995, - "learning_rate": 5.100978553197609e-06, - "loss": 0.133, + "epoch": 1.3597853713640213, + "grad_norm": 3.711439996349804, + "learning_rate": 1.1986773947231505e-05, + "loss": 0.5559, "step": 9630 }, { - "epoch": 2.019500943594045, - "grad_norm": 4.354146716021864, - "learning_rate": 5.099005075259864e-06, - "loss": 0.1449, + "epoch": 1.3599265744140072, + "grad_norm": 4.121347095392864, + "learning_rate": 1.1985279861250839e-05, + "loss": 0.8647, "step": 9631 }, { - "epoch": 2.019710631159572, - "grad_norm": 3.876350521511767, - "learning_rate": 5.097031848506609e-06, - "loss": 0.15, + "epoch": 1.360067777463993, + "grad_norm": 3.9800070935294927, + "learning_rate": 1.1983785729133125e-05, + "loss": 0.6464, "step": 9632 }, { - "epoch": 2.0199203187250996, - "grad_norm": 3.8037142031292004, - "learning_rate": 5.095058873038979e-06, - "loss": 0.1284, + "epoch": 1.360208980513979, + "grad_norm": 3.3340724805056734, + "learning_rate": 1.1982291550913086e-05, + "loss": 0.4897, "step": 9633 }, { - "epoch": 2.020130006290627, - "grad_norm": 3.859768656536526, - "learning_rate": 5.093086148958089e-06, - "loss": 0.1606, + "epoch": 1.3603501835639649, + "grad_norm": 3.773649616607825, + "learning_rate": 1.1980797326625446e-05, + "loss": 0.6684, "step": 9634 }, { - "epoch": 2.020339693856154, - "grad_norm": 5.196297389355179, - "learning_rate": 5.091113676365042e-06, - "loss": 0.2104, + "epoch": 1.3604913866139507, + "grad_norm": 4.168909257878763, + "learning_rate": 1.1979303056304939e-05, + "loss": 0.6817, "step": 9635 }, { - "epoch": 2.0205493814216817, - "grad_norm": 3.8644041731888263, - "learning_rate": 5.089141455360935e-06, - "loss": 0.1751, + "epoch": 1.3606325896639366, + "grad_norm": 4.360133535000221, + "learning_rate": 1.1977808739986275e-05, + "loss": 0.7174, "step": 9636 }, { - "epoch": 2.0207590689872093, - "grad_norm": 3.2820119277574427, - "learning_rate": 5.087169486046847e-06, - "loss": 0.152, + "epoch": 1.3607737927139225, + "grad_norm": 3.434483859719177, + "learning_rate": 1.1976314377704194e-05, + "loss": 0.5409, "step": 9637 }, { - "epoch": 2.0209687565527363, - "grad_norm": 4.014960921051778, - "learning_rate": 5.0851977685238395e-06, - "loss": 0.1781, + "epoch": 1.3609149957639084, + "grad_norm": 3.29630776412953, + "learning_rate": 1.1974819969493421e-05, + "loss": 0.4974, "step": 9638 }, { - "epoch": 2.021178444118264, - "grad_norm": 3.9988857477306294, - "learning_rate": 5.0832263028929716e-06, - "loss": 0.1352, + "epoch": 1.3610561988138943, + "grad_norm": 3.2994146302921874, + "learning_rate": 1.197332551538868e-05, + "loss": 0.5569, "step": 9639 }, { - "epoch": 2.021388131683791, - "grad_norm": 4.310035989869975, - "learning_rate": 5.0812550892552856e-06, - "loss": 0.1798, + "epoch": 1.3611974018638802, + "grad_norm": 3.4391787595369165, + "learning_rate": 1.1971831015424713e-05, + "loss": 0.5012, "step": 9640 }, { - "epoch": 2.0215978192493185, - "grad_norm": 4.541028360738635, - "learning_rate": 5.079284127711809e-06, - "loss": 0.168, + "epoch": 1.361338604913866, + "grad_norm": 3.60454613573677, + "learning_rate": 1.1970336469636242e-05, + "loss": 0.5419, "step": 9641 }, { - "epoch": 2.021807506814846, - "grad_norm": 3.925784069031799, - "learning_rate": 5.07731341836355e-06, - "loss": 0.1562, + "epoch": 1.361479807963852, + "grad_norm": 3.4139943660590655, + "learning_rate": 1.1968841878057999e-05, + "loss": 0.5419, "step": 9642 }, { - "epoch": 2.022017194380373, - "grad_norm": 3.8410072306637577, - "learning_rate": 5.075342961311522e-06, - "loss": 0.1566, + "epoch": 1.3616210110138378, + "grad_norm": 3.4780406608918573, + "learning_rate": 1.1967347240724726e-05, + "loss": 0.5563, "step": 9643 }, { - "epoch": 2.0222268819459006, - "grad_norm": 3.7500665263891064, - "learning_rate": 5.073372756656702e-06, - "loss": 0.1776, + "epoch": 1.3617622140638237, + "grad_norm": 3.2040876231654667, + "learning_rate": 1.196585255767115e-05, + "loss": 0.5687, "step": 9644 }, { - "epoch": 2.022436569511428, - "grad_norm": 3.8304556143148574, - "learning_rate": 5.07140280450008e-06, - "loss": 0.1292, + "epoch": 1.3619034171138096, + "grad_norm": 3.639307647944429, + "learning_rate": 1.1964357828932012e-05, + "loss": 0.5126, "step": 9645 }, { - "epoch": 2.0226462570769552, - "grad_norm": 4.252951200630093, - "learning_rate": 5.069433104942611e-06, - "loss": 0.1411, + "epoch": 1.3620446201637955, + "grad_norm": 3.280448594947904, + "learning_rate": 1.1962863054542045e-05, + "loss": 0.5603, "step": 9646 }, { - "epoch": 2.0228559446424828, - "grad_norm": 5.062434457922006, - "learning_rate": 5.067463658085243e-06, - "loss": 0.1811, + "epoch": 1.3621858232137813, + "grad_norm": 3.401640038162629, + "learning_rate": 1.1961368234535989e-05, + "loss": 0.4777, "step": 9647 }, { - "epoch": 2.0230656322080103, - "grad_norm": 3.8351394665434957, - "learning_rate": 5.065494464028917e-06, - "loss": 0.135, + "epoch": 1.3623270262637672, + "grad_norm": 3.360545749756557, + "learning_rate": 1.195987336894858e-05, + "loss": 0.5937, "step": 9648 }, { - "epoch": 2.0232753197735374, - "grad_norm": 3.965814489117527, - "learning_rate": 5.063525522874564e-06, - "loss": 0.1653, + "epoch": 1.362468229313753, + "grad_norm": 3.887874244975685, + "learning_rate": 1.1958378457814561e-05, + "loss": 0.6562, "step": 9649 }, { - "epoch": 2.023485007339065, - "grad_norm": 3.928235644928943, - "learning_rate": 5.061556834723089e-06, - "loss": 0.175, + "epoch": 1.362609432363739, + "grad_norm": 3.3896912526995138, + "learning_rate": 1.1956883501168672e-05, + "loss": 0.5771, "step": 9650 }, { - "epoch": 2.023694694904592, - "grad_norm": 3.749121003522189, - "learning_rate": 5.0595883996753866e-06, - "loss": 0.148, + "epoch": 1.3627506354137249, + "grad_norm": 3.9893455374066225, + "learning_rate": 1.1955388499045657e-05, + "loss": 0.6632, "step": 9651 }, { - "epoch": 2.0239043824701195, - "grad_norm": 3.441236476290837, - "learning_rate": 5.057620217832353e-06, - "loss": 0.1677, + "epoch": 1.3628918384637108, + "grad_norm": 3.7872396418076475, + "learning_rate": 1.195389345148026e-05, + "loss": 0.6515, "step": 9652 }, { - "epoch": 2.024114070035647, - "grad_norm": 3.6638049897352682, - "learning_rate": 5.0556522892948545e-06, - "loss": 0.1304, + "epoch": 1.3630330415136966, + "grad_norm": 3.4555450865747335, + "learning_rate": 1.1952398358507217e-05, + "loss": 0.602, "step": 9653 }, { - "epoch": 2.024323757601174, - "grad_norm": 3.124557263408565, - "learning_rate": 5.0536846141637474e-06, - "loss": 0.1193, + "epoch": 1.3631742445636825, + "grad_norm": 3.7943293962979587, + "learning_rate": 1.1950903220161286e-05, + "loss": 0.5877, "step": 9654 }, { - "epoch": 2.0245334451667016, - "grad_norm": 3.654230789002932, - "learning_rate": 5.051717192539887e-06, - "loss": 0.1876, + "epoch": 1.3633154476136684, + "grad_norm": 4.207783619361302, + "learning_rate": 1.19494080364772e-05, + "loss": 0.6858, "step": 9655 }, { - "epoch": 2.024743132732229, - "grad_norm": 4.848428850887715, - "learning_rate": 5.0497500245240986e-06, - "loss": 0.2077, + "epoch": 1.3634566506636543, + "grad_norm": 3.559069176284669, + "learning_rate": 1.1947912807489716e-05, + "loss": 0.5028, "step": 9656 }, { - "epoch": 2.0249528202977563, - "grad_norm": 3.0322540494652768, - "learning_rate": 5.0477831102172116e-06, - "loss": 0.1141, + "epoch": 1.3635978537136402, + "grad_norm": 3.5770529567636893, + "learning_rate": 1.1946417533233583e-05, + "loss": 0.538, "step": 9657 }, { - "epoch": 2.025162507863284, - "grad_norm": 3.7332693679355295, - "learning_rate": 5.045816449720029e-06, - "loss": 0.1523, + "epoch": 1.363739056763626, + "grad_norm": 4.048065132112137, + "learning_rate": 1.1944922213743543e-05, + "loss": 0.6946, "step": 9658 }, { - "epoch": 2.025372195428811, - "grad_norm": 4.0242393627446065, - "learning_rate": 5.043850043133343e-06, - "loss": 0.1354, + "epoch": 1.363880259813612, + "grad_norm": 3.656929180681471, + "learning_rate": 1.1943426849054352e-05, + "loss": 0.5812, "step": 9659 }, { - "epoch": 2.0255818829943384, - "grad_norm": 4.281777340609352, - "learning_rate": 5.041883890557937e-06, - "loss": 0.1314, + "epoch": 1.3640214628635978, + "grad_norm": 3.4456719507511218, + "learning_rate": 1.194193143920076e-05, + "loss": 0.5694, "step": 9660 }, { - "epoch": 2.025791570559866, - "grad_norm": 4.322734741048664, - "learning_rate": 5.0399179920945855e-06, - "loss": 0.1852, + "epoch": 1.3641626659135837, + "grad_norm": 3.5589391590368598, + "learning_rate": 1.194043598421752e-05, + "loss": 0.5158, "step": 9661 }, { - "epoch": 2.026001258125393, - "grad_norm": 4.100995785980219, - "learning_rate": 5.0379523478440394e-06, - "loss": 0.1615, + "epoch": 1.3643038689635696, + "grad_norm": 3.6221146462769855, + "learning_rate": 1.1938940484139387e-05, + "loss": 0.5612, "step": 9662 }, { - "epoch": 2.0262109456909205, - "grad_norm": 3.5853989549038685, - "learning_rate": 5.035986957907039e-06, - "loss": 0.154, + "epoch": 1.3644450720135555, + "grad_norm": 4.006134342080583, + "learning_rate": 1.1937444939001112e-05, + "loss": 0.6565, "step": 9663 }, { - "epoch": 2.026420633256448, - "grad_norm": 4.473961295130404, - "learning_rate": 5.0340218223843185e-06, - "loss": 0.1587, + "epoch": 1.3645862750635414, + "grad_norm": 3.549335710020018, + "learning_rate": 1.1935949348837457e-05, + "loss": 0.538, "step": 9664 }, { - "epoch": 2.026630320821975, - "grad_norm": 3.417181978681296, - "learning_rate": 5.032056941376591e-06, - "loss": 0.1554, + "epoch": 1.3647274781135272, + "grad_norm": 3.3004266406333724, + "learning_rate": 1.1934453713683172e-05, + "loss": 0.5646, "step": 9665 }, { - "epoch": 2.0268400083875027, - "grad_norm": 3.616791410469849, - "learning_rate": 5.030092314984565e-06, - "loss": 0.1261, + "epoch": 1.3648686811635131, + "grad_norm": 3.560759942181429, + "learning_rate": 1.193295803357302e-05, + "loss": 0.5983, "step": 9666 }, { - "epoch": 2.02704969595303, - "grad_norm": 3.65784884265805, - "learning_rate": 5.028127943308927e-06, - "loss": 0.1548, + "epoch": 1.365009884213499, + "grad_norm": 3.182642121220377, + "learning_rate": 1.1931462308541755e-05, + "loss": 0.4601, "step": 9667 }, { - "epoch": 2.0272593835185573, - "grad_norm": 4.784307433963424, - "learning_rate": 5.026163826450351e-06, - "loss": 0.1979, + "epoch": 1.3651510872634849, + "grad_norm": 3.767783514403507, + "learning_rate": 1.1929966538624143e-05, + "loss": 0.6496, "step": 9668 }, { - "epoch": 2.027469071084085, - "grad_norm": 4.591937761993029, - "learning_rate": 5.024199964509506e-06, - "loss": 0.1376, + "epoch": 1.3652922903134708, + "grad_norm": 3.4343599900659596, + "learning_rate": 1.1928470723854943e-05, + "loss": 0.5697, "step": 9669 }, { - "epoch": 2.027678758649612, - "grad_norm": 3.882193314038145, - "learning_rate": 5.022236357587051e-06, - "loss": 0.14, + "epoch": 1.3654334933634567, + "grad_norm": 3.1547662380213133, + "learning_rate": 1.1926974864268914e-05, + "loss": 0.5111, "step": 9670 }, { - "epoch": 2.0278884462151394, - "grad_norm": 4.840160751948868, - "learning_rate": 5.020273005783609e-06, - "loss": 0.1118, + "epoch": 1.3655746964134425, + "grad_norm": 3.1486913888952786, + "learning_rate": 1.1925478959900822e-05, + "loss": 0.5117, "step": 9671 }, { - "epoch": 2.028098133780667, - "grad_norm": 4.450026740831961, - "learning_rate": 5.018309909199811e-06, - "loss": 0.1506, + "epoch": 1.3657158994634284, + "grad_norm": 4.000747073947447, + "learning_rate": 1.1923983010785431e-05, + "loss": 0.712, "step": 9672 }, { - "epoch": 2.028307821346194, - "grad_norm": 3.6572217134003013, - "learning_rate": 5.016347067936275e-06, - "loss": 0.1395, + "epoch": 1.3658571025134143, + "grad_norm": 3.6659613725404356, + "learning_rate": 1.1922487016957502e-05, + "loss": 0.6617, "step": 9673 }, { - "epoch": 2.0285175089117216, - "grad_norm": 3.752521424318331, - "learning_rate": 5.014384482093595e-06, - "loss": 0.1166, + "epoch": 1.3659983055634002, + "grad_norm": 3.1451920485647924, + "learning_rate": 1.1920990978451812e-05, + "loss": 0.5392, "step": 9674 }, { - "epoch": 2.028727196477249, - "grad_norm": 5.949694644663068, - "learning_rate": 5.0124221517723535e-06, - "loss": 0.2236, + "epoch": 1.366139508613386, + "grad_norm": 3.5731119234595057, + "learning_rate": 1.1919494895303119e-05, + "loss": 0.6448, "step": 9675 }, { - "epoch": 2.028936884042776, - "grad_norm": 4.317470239454295, - "learning_rate": 5.010460077073132e-06, - "loss": 0.1875, + "epoch": 1.366280711663372, + "grad_norm": 2.9264539038494077, + "learning_rate": 1.1917998767546188e-05, + "loss": 0.4946, "step": 9676 }, { - "epoch": 2.0291465716083037, - "grad_norm": 3.51429979514136, - "learning_rate": 5.008498258096481e-06, - "loss": 0.1248, + "epoch": 1.3664219147133578, + "grad_norm": 2.989890993693794, + "learning_rate": 1.1916502595215799e-05, + "loss": 0.4369, "step": 9677 }, { - "epoch": 2.029356259173831, - "grad_norm": 4.995472775571437, - "learning_rate": 5.006536694942956e-06, - "loss": 0.1787, + "epoch": 1.3665631177633437, + "grad_norm": 3.382161647620616, + "learning_rate": 1.1915006378346719e-05, + "loss": 0.6747, "step": 9678 }, { - "epoch": 2.0295659467393583, - "grad_norm": 3.3343069323315184, - "learning_rate": 5.004575387713087e-06, - "loss": 0.1603, + "epoch": 1.3667043208133296, + "grad_norm": 4.741227238624383, + "learning_rate": 1.1913510116973715e-05, + "loss": 0.7579, "step": 9679 }, { - "epoch": 2.029775634304886, - "grad_norm": 4.934297369703559, - "learning_rate": 5.002614336507388e-06, - "loss": 0.1622, + "epoch": 1.3668455238633155, + "grad_norm": 4.353092237430788, + "learning_rate": 1.1912013811131562e-05, + "loss": 0.7957, "step": 9680 }, { - "epoch": 2.029985321870413, - "grad_norm": 4.564986126409634, - "learning_rate": 5.000653541426371e-06, - "loss": 0.1701, + "epoch": 1.3669867269133014, + "grad_norm": 3.453339276092592, + "learning_rate": 1.1910517460855033e-05, + "loss": 0.5697, "step": 9681 }, { - "epoch": 2.0301950094359404, - "grad_norm": 4.4370152060040695, - "learning_rate": 4.998693002570536e-06, - "loss": 0.118, + "epoch": 1.3671279299632872, + "grad_norm": 3.8990299737361074, + "learning_rate": 1.1909021066178906e-05, + "loss": 0.6719, "step": 9682 }, { - "epoch": 2.030404697001468, - "grad_norm": 3.8685425209905264, - "learning_rate": 4.9967327200403595e-06, - "loss": 0.1618, + "epoch": 1.3672691330132731, + "grad_norm": 4.980671183569356, + "learning_rate": 1.1907524627137952e-05, + "loss": 0.8008, "step": 9683 }, { - "epoch": 2.030614384566995, - "grad_norm": 5.61159471794053, - "learning_rate": 4.994772693936304e-06, - "loss": 0.1696, + "epoch": 1.367410336063259, + "grad_norm": 4.3038522735200155, + "learning_rate": 1.1906028143766952e-05, + "loss": 0.8133, "step": 9684 }, { - "epoch": 2.0308240721325226, - "grad_norm": 3.67717618411063, - "learning_rate": 4.992812924358833e-06, - "loss": 0.1199, + "epoch": 1.367551539113245, + "grad_norm": 3.898066080258327, + "learning_rate": 1.1904531616100677e-05, + "loss": 0.6813, "step": 9685 }, { - "epoch": 2.03103375969805, - "grad_norm": 3.057613163850684, - "learning_rate": 4.9908534114083804e-06, - "loss": 0.1133, + "epoch": 1.3676927421632308, + "grad_norm": 3.2000271490358125, + "learning_rate": 1.1903035044173914e-05, + "loss": 0.6034, "step": 9686 }, { - "epoch": 2.031243447263577, - "grad_norm": 5.112729928585523, - "learning_rate": 4.988894155185383e-06, - "loss": 0.1617, + "epoch": 1.3678339452132167, + "grad_norm": 3.247603800982416, + "learning_rate": 1.1901538428021434e-05, + "loss": 0.5338, "step": 9687 }, { - "epoch": 2.0314531348291047, - "grad_norm": 4.454841502439337, - "learning_rate": 4.98693515579025e-06, - "loss": 0.1599, + "epoch": 1.3679751482632025, + "grad_norm": 3.638379363697139, + "learning_rate": 1.1900041767678024e-05, + "loss": 0.611, "step": 9688 }, { - "epoch": 2.031662822394632, - "grad_norm": 4.702200159440042, - "learning_rate": 4.984976413323382e-06, - "loss": 0.1726, + "epoch": 1.3681163513131884, + "grad_norm": 3.2273460435656203, + "learning_rate": 1.1898545063178464e-05, + "loss": 0.5944, "step": 9689 }, { - "epoch": 2.0318725099601593, - "grad_norm": 4.117285605362018, - "learning_rate": 4.983017927885172e-06, - "loss": 0.1379, + "epoch": 1.3682575543631743, + "grad_norm": 3.748882091294822, + "learning_rate": 1.1897048314557538e-05, + "loss": 0.6217, "step": 9690 }, { - "epoch": 2.032082197525687, - "grad_norm": 4.460421659721673, - "learning_rate": 4.981059699576001e-06, - "loss": 0.1443, + "epoch": 1.3683987574131602, + "grad_norm": 3.629050422243412, + "learning_rate": 1.1895551521850029e-05, + "loss": 0.59, "step": 9691 }, { - "epoch": 2.032291885091214, - "grad_norm": 3.7528911494793444, - "learning_rate": 4.979101728496217e-06, - "loss": 0.1447, + "epoch": 1.368539960463146, + "grad_norm": 3.828777141849607, + "learning_rate": 1.189405468509072e-05, + "loss": 0.6178, "step": 9692 }, { - "epoch": 2.0325015726567415, - "grad_norm": 4.655677877870464, - "learning_rate": 4.977144014746179e-06, - "loss": 0.1544, + "epoch": 1.368681163513132, + "grad_norm": 3.415580766378138, + "learning_rate": 1.1892557804314393e-05, + "loss": 0.628, "step": 9693 }, { - "epoch": 2.032711260222269, - "grad_norm": 4.931239573723924, - "learning_rate": 4.975186558426222e-06, - "loss": 0.1482, + "epoch": 1.3688223665631178, + "grad_norm": 3.1561384863108444, + "learning_rate": 1.1891060879555847e-05, + "loss": 0.5067, "step": 9694 }, { - "epoch": 2.032920947787796, - "grad_norm": 4.157377861062008, - "learning_rate": 4.973229359636677e-06, - "loss": 0.1268, + "epoch": 1.3689635696131037, + "grad_norm": 3.7993824449502975, + "learning_rate": 1.1889563910849862e-05, + "loss": 0.6941, "step": 9695 }, { - "epoch": 2.0331306353533236, - "grad_norm": 4.061055616565977, - "learning_rate": 4.971272418477837e-06, - "loss": 0.1322, + "epoch": 1.3691047726630896, + "grad_norm": 4.015409225989134, + "learning_rate": 1.1888066898231223e-05, + "loss": 0.6221, "step": 9696 }, { - "epoch": 2.0333403229188507, - "grad_norm": 4.7175996389496255, - "learning_rate": 4.969315735050008e-06, - "loss": 0.2018, + "epoch": 1.3692459757130755, + "grad_norm": 3.3418894142521545, + "learning_rate": 1.188656984173473e-05, + "loss": 0.5816, "step": 9697 }, { - "epoch": 2.0335500104843782, - "grad_norm": 4.254970893928762, - "learning_rate": 4.967359309453477e-06, - "loss": 0.1315, + "epoch": 1.3693871787630614, + "grad_norm": 4.359613640592059, + "learning_rate": 1.1885072741395165e-05, + "loss": 0.7643, "step": 9698 }, { - "epoch": 2.0337596980499058, - "grad_norm": 3.999630750226817, - "learning_rate": 4.965403141788508e-06, - "loss": 0.1164, + "epoch": 1.3695283818130473, + "grad_norm": 3.156193514316977, + "learning_rate": 1.1883575597247328e-05, + "loss": 0.6123, "step": 9699 }, { - "epoch": 2.033969385615433, - "grad_norm": 6.287055805916647, - "learning_rate": 4.9634472321553586e-06, - "loss": 0.2057, + "epoch": 1.3696695848630331, + "grad_norm": 3.347179755112301, + "learning_rate": 1.1882078409326003e-05, + "loss": 0.5814, "step": 9700 }, { - "epoch": 2.0341790731809604, - "grad_norm": 4.379526313468788, - "learning_rate": 4.961491580654277e-06, - "loss": 0.1865, + "epoch": 1.369810787913019, + "grad_norm": 3.449162422752734, + "learning_rate": 1.1880581177665992e-05, + "loss": 0.5976, "step": 9701 }, { - "epoch": 2.034388760746488, - "grad_norm": 4.804854996308676, - "learning_rate": 4.959536187385488e-06, - "loss": 0.1581, + "epoch": 1.369951990963005, + "grad_norm": 3.952167109872566, + "learning_rate": 1.1879083902302082e-05, + "loss": 0.8094, "step": 9702 }, { - "epoch": 2.034598448312015, - "grad_norm": 3.7674044495623313, - "learning_rate": 4.957581052449213e-06, - "loss": 0.1376, + "epoch": 1.3700931940129908, + "grad_norm": 4.094360212721599, + "learning_rate": 1.187758658326908e-05, + "loss": 0.7686, "step": 9703 }, { - "epoch": 2.0348081358775425, - "grad_norm": 4.544457370068342, - "learning_rate": 4.955626175945657e-06, - "loss": 0.134, + "epoch": 1.3702343970629767, + "grad_norm": 2.8081190051965197, + "learning_rate": 1.187608922060177e-05, + "loss": 0.4638, "step": 9704 }, { - "epoch": 2.03501782344307, - "grad_norm": 3.871084734591505, - "learning_rate": 4.953671557975004e-06, - "loss": 0.1259, + "epoch": 1.3703756001129626, + "grad_norm": 3.6687702897539953, + "learning_rate": 1.187459181433496e-05, + "loss": 0.6114, "step": 9705 }, { - "epoch": 2.035227511008597, - "grad_norm": 3.6286614216617328, - "learning_rate": 4.951717198637436e-06, - "loss": 0.1278, + "epoch": 1.3705168031629484, + "grad_norm": 2.862833450539544, + "learning_rate": 1.1873094364503446e-05, + "loss": 0.4513, "step": 9706 }, { - "epoch": 2.0354371985741246, - "grad_norm": 3.7150033181097735, - "learning_rate": 4.94976309803312e-06, - "loss": 0.1277, + "epoch": 1.3706580062129343, + "grad_norm": 3.983960515132614, + "learning_rate": 1.1871596871142028e-05, + "loss": 0.5604, "step": 9707 }, { - "epoch": 2.0356468861396517, - "grad_norm": 5.25235936780869, - "learning_rate": 4.947809256262206e-06, - "loss": 0.1781, + "epoch": 1.3707992092629202, + "grad_norm": 4.138931200767664, + "learning_rate": 1.1870099334285507e-05, + "loss": 0.6688, "step": 9708 }, { - "epoch": 2.0358565737051793, - "grad_norm": 3.8211949856235448, - "learning_rate": 4.945855673424825e-06, - "loss": 0.1342, + "epoch": 1.370940412312906, + "grad_norm": 4.129426963557256, + "learning_rate": 1.1868601753968685e-05, + "loss": 0.6732, "step": 9709 }, { - "epoch": 2.036066261270707, - "grad_norm": 6.102145700753543, - "learning_rate": 4.9439023496211125e-06, - "loss": 0.195, + "epoch": 1.371081615362892, + "grad_norm": 4.183808897738178, + "learning_rate": 1.1867104130226363e-05, + "loss": 0.7716, "step": 9710 }, { - "epoch": 2.036275948836234, - "grad_norm": 4.857021782272797, - "learning_rate": 4.9419492849511686e-06, - "loss": 0.2308, + "epoch": 1.3712228184128779, + "grad_norm": 2.9515273160131184, + "learning_rate": 1.1865606463093348e-05, + "loss": 0.5989, "step": 9711 }, { - "epoch": 2.0364856364017614, - "grad_norm": 4.775266027482804, - "learning_rate": 4.939996479515101e-06, - "loss": 0.1639, + "epoch": 1.3713640214628637, + "grad_norm": 3.301489264859595, + "learning_rate": 1.1864108752604451e-05, + "loss": 0.581, "step": 9712 }, { - "epoch": 2.036695323967289, - "grad_norm": 4.080888562450899, - "learning_rate": 4.9380439334129896e-06, - "loss": 0.1645, + "epoch": 1.3715052245128494, + "grad_norm": 3.5452167062730036, + "learning_rate": 1.1862610998794464e-05, + "loss": 0.5289, "step": 9713 }, { - "epoch": 2.036905011532816, - "grad_norm": 4.343808832816453, - "learning_rate": 4.936091646744902e-06, - "loss": 0.1431, + "epoch": 1.3716464275628353, + "grad_norm": 4.150861803632419, + "learning_rate": 1.1861113201698204e-05, + "loss": 0.5866, "step": 9714 }, { - "epoch": 2.0371146990983435, - "grad_norm": 3.536102113061114, - "learning_rate": 4.9341396196109045e-06, - "loss": 0.1301, + "epoch": 1.3717876306128212, + "grad_norm": 3.3607604028115095, + "learning_rate": 1.1859615361350476e-05, + "loss": 0.5766, "step": 9715 }, { - "epoch": 2.0373243866638706, - "grad_norm": 5.193094418937569, - "learning_rate": 4.932187852111039e-06, - "loss": 0.1531, + "epoch": 1.371928833662807, + "grad_norm": 3.7059513660729557, + "learning_rate": 1.1858117477786094e-05, + "loss": 0.5998, "step": 9716 }, { - "epoch": 2.037534074229398, - "grad_norm": 4.895702018534499, - "learning_rate": 4.930236344345331e-06, - "loss": 0.1369, + "epoch": 1.372070036712793, + "grad_norm": 3.5978810782207775, + "learning_rate": 1.185661955103986e-05, + "loss": 0.5843, "step": 9717 }, { - "epoch": 2.0377437617949257, - "grad_norm": 3.423019109997735, - "learning_rate": 4.928285096413803e-06, - "loss": 0.1192, + "epoch": 1.3722112397627788, + "grad_norm": 2.8662963344178283, + "learning_rate": 1.1855121581146591e-05, + "loss": 0.4701, "step": 9718 }, { - "epoch": 2.0379534493604528, - "grad_norm": 4.230739263443211, - "learning_rate": 4.926334108416464e-06, - "loss": 0.1392, + "epoch": 1.3723524428127647, + "grad_norm": 4.38119502110531, + "learning_rate": 1.1853623568141096e-05, + "loss": 0.8014, "step": 9719 }, { - "epoch": 2.0381631369259803, - "grad_norm": 4.696825001644658, - "learning_rate": 4.9243833804533025e-06, - "loss": 0.1739, + "epoch": 1.3724936458627506, + "grad_norm": 3.3884837231900407, + "learning_rate": 1.1852125512058194e-05, + "loss": 0.5865, "step": 9720 }, { - "epoch": 2.038372824491508, - "grad_norm": 4.588169001557765, - "learning_rate": 4.922432912624292e-06, - "loss": 0.1538, + "epoch": 1.3726348489127365, + "grad_norm": 3.3285667973469315, + "learning_rate": 1.1850627412932689e-05, + "loss": 0.5534, "step": 9721 }, { - "epoch": 2.038582512057035, - "grad_norm": 3.8433952645173965, - "learning_rate": 4.920482705029405e-06, - "loss": 0.1445, + "epoch": 1.3727760519627223, + "grad_norm": 3.3871523942701978, + "learning_rate": 1.1849129270799407e-05, + "loss": 0.6523, "step": 9722 }, { - "epoch": 2.0387921996225624, - "grad_norm": 5.986940461029213, - "learning_rate": 4.9185327577685856e-06, - "loss": 0.1726, + "epoch": 1.3729172550127082, + "grad_norm": 3.3233362460581253, + "learning_rate": 1.1847631085693159e-05, + "loss": 0.5428, "step": 9723 }, { - "epoch": 2.03900188718809, - "grad_norm": 4.715264503870053, - "learning_rate": 4.91658307094178e-06, - "loss": 0.1465, + "epoch": 1.373058458062694, + "grad_norm": 4.003976371980904, + "learning_rate": 1.184613285764876e-05, + "loss": 0.5814, "step": 9724 }, { - "epoch": 2.039211574753617, - "grad_norm": 4.957169834451293, - "learning_rate": 4.914633644648908e-06, - "loss": 0.1826, + "epoch": 1.37319966111268, + "grad_norm": 3.1192788606947466, + "learning_rate": 1.1844634586701033e-05, + "loss": 0.4958, "step": 9725 }, { - "epoch": 2.0394212623191446, - "grad_norm": 3.5697092278553204, - "learning_rate": 4.912684478989878e-06, - "loss": 0.1258, + "epoch": 1.3733408641626659, + "grad_norm": 4.9136562495903, + "learning_rate": 1.1843136272884795e-05, + "loss": 0.785, "step": 9726 }, { - "epoch": 2.0396309498846716, - "grad_norm": 4.2516744340078185, - "learning_rate": 4.910735574064592e-06, - "loss": 0.1435, + "epoch": 1.3734820672126518, + "grad_norm": 3.263384357558984, + "learning_rate": 1.1841637916234863e-05, + "loss": 0.5401, "step": 9727 }, { - "epoch": 2.039840637450199, - "grad_norm": 3.5410851949368687, - "learning_rate": 4.908786929972938e-06, - "loss": 0.1322, + "epoch": 1.3736232702626376, + "grad_norm": 3.447933221610095, + "learning_rate": 1.1840139516786062e-05, + "loss": 0.5425, "step": 9728 }, { - "epoch": 2.0400503250157267, - "grad_norm": 4.552925139619608, - "learning_rate": 4.906838546814785e-06, - "loss": 0.2012, + "epoch": 1.3737644733126235, + "grad_norm": 4.661399010102938, + "learning_rate": 1.1838641074573215e-05, + "loss": 0.69, "step": 9729 }, { - "epoch": 2.040260012581254, - "grad_norm": 4.005678051532541, - "learning_rate": 4.9048904246899865e-06, - "loss": 0.1157, + "epoch": 1.3739056763626094, + "grad_norm": 3.929607806567896, + "learning_rate": 1.1837142589631143e-05, + "loss": 0.6756, "step": 9730 }, { - "epoch": 2.0404697001467813, - "grad_norm": 4.651973172814157, - "learning_rate": 4.902942563698394e-06, - "loss": 0.1711, + "epoch": 1.3740468794125953, + "grad_norm": 3.6594125342675428, + "learning_rate": 1.1835644061994671e-05, + "loss": 0.6849, "step": 9731 }, { - "epoch": 2.040679387712309, - "grad_norm": 4.7361127375757315, - "learning_rate": 4.900994963939835e-06, - "loss": 0.1604, + "epoch": 1.3741880824625812, + "grad_norm": 4.4694313735728235, + "learning_rate": 1.1834145491698623e-05, + "loss": 0.8197, "step": 9732 }, { - "epoch": 2.040889075277836, - "grad_norm": 4.101375486560881, - "learning_rate": 4.899047625514125e-06, - "loss": 0.1507, + "epoch": 1.374329285512567, + "grad_norm": 3.229564401317567, + "learning_rate": 1.1832646878777828e-05, + "loss": 0.5154, "step": 9733 }, { - "epoch": 2.0410987628433634, - "grad_norm": 3.4864299214351524, - "learning_rate": 4.897100548521075e-06, - "loss": 0.1285, + "epoch": 1.374470488562553, + "grad_norm": 3.515410559625843, + "learning_rate": 1.1831148223267108e-05, + "loss": 0.5867, "step": 9734 }, { - "epoch": 2.0413084504088905, - "grad_norm": 4.828958347326046, - "learning_rate": 4.89515373306047e-06, - "loss": 0.2053, + "epoch": 1.3746116916125388, + "grad_norm": 4.31672025687122, + "learning_rate": 1.1829649525201295e-05, + "loss": 0.6331, "step": 9735 }, { - "epoch": 2.041518137974418, - "grad_norm": 5.321312523641549, - "learning_rate": 4.893207179232091e-06, - "loss": 0.1776, + "epoch": 1.3747528946625247, + "grad_norm": 3.552669283661908, + "learning_rate": 1.1828150784615216e-05, + "loss": 0.5944, "step": 9736 }, { - "epoch": 2.0417278255399456, - "grad_norm": 5.064349778548283, - "learning_rate": 4.8912608871357025e-06, - "loss": 0.198, + "epoch": 1.3748940977125106, + "grad_norm": 3.4551438493463884, + "learning_rate": 1.1826652001543706e-05, + "loss": 0.5886, "step": 9737 }, { - "epoch": 2.0419375131054727, - "grad_norm": 3.549737635953889, - "learning_rate": 4.889314856871051e-06, - "loss": 0.1566, + "epoch": 1.3750353007624965, + "grad_norm": 3.0695532393122935, + "learning_rate": 1.1825153176021591e-05, + "loss": 0.4931, "step": 9738 }, { - "epoch": 2.042147200671, - "grad_norm": 3.391221718259378, - "learning_rate": 4.887369088537877e-06, - "loss": 0.1202, + "epoch": 1.3751765038124824, + "grad_norm": 3.159324824804856, + "learning_rate": 1.18236543080837e-05, + "loss": 0.546, "step": 9739 }, { - "epoch": 2.0423568882365277, - "grad_norm": 4.126576377111557, - "learning_rate": 4.885423582235909e-06, - "loss": 0.1524, + "epoch": 1.3753177068624682, + "grad_norm": 3.6465346062754023, + "learning_rate": 1.1822155397764873e-05, + "loss": 0.5368, "step": 9740 }, { - "epoch": 2.042566575802055, - "grad_norm": 3.7244952351384835, - "learning_rate": 4.883478338064851e-06, - "loss": 0.1657, + "epoch": 1.3754589099124541, + "grad_norm": 3.7869249013729274, + "learning_rate": 1.1820656445099945e-05, + "loss": 0.6378, "step": 9741 }, { - "epoch": 2.0427762633675823, - "grad_norm": 3.8013591666363133, - "learning_rate": 4.8815333561244e-06, - "loss": 0.114, + "epoch": 1.37560011296244, + "grad_norm": 4.463094303519522, + "learning_rate": 1.1819157450123745e-05, + "loss": 0.7917, "step": 9742 }, { - "epoch": 2.04298595093311, - "grad_norm": 4.469455178598115, - "learning_rate": 4.879588636514246e-06, - "loss": 0.1448, + "epoch": 1.3757413160124259, + "grad_norm": 3.340895739921409, + "learning_rate": 1.1817658412871111e-05, + "loss": 0.621, "step": 9743 }, { - "epoch": 2.043195638498637, - "grad_norm": 4.279595719914409, - "learning_rate": 4.877644179334051e-06, - "loss": 0.1781, + "epoch": 1.3758825190624118, + "grad_norm": 3.709870672943968, + "learning_rate": 1.1816159333376882e-05, + "loss": 0.5998, "step": 9744 }, { - "epoch": 2.0434053260641645, - "grad_norm": 4.290524092660325, - "learning_rate": 4.875699984683481e-06, - "loss": 0.1569, + "epoch": 1.3760237221123977, + "grad_norm": 3.3313619283033153, + "learning_rate": 1.181466021167589e-05, + "loss": 0.5437, "step": 9745 }, { - "epoch": 2.0436150136296916, - "grad_norm": 4.721326246909336, - "learning_rate": 4.873756052662173e-06, - "loss": 0.154, + "epoch": 1.3761649251623835, + "grad_norm": 3.275514741295965, + "learning_rate": 1.1813161047802986e-05, + "loss": 0.5392, "step": 9746 }, { - "epoch": 2.043824701195219, - "grad_norm": 4.140175251669526, - "learning_rate": 4.871812383369755e-06, - "loss": 0.1556, + "epoch": 1.3763061282123694, + "grad_norm": 3.53508871286227, + "learning_rate": 1.1811661841792994e-05, + "loss": 0.5829, "step": 9747 }, { - "epoch": 2.0440343887607466, - "grad_norm": 5.5721019646631005, - "learning_rate": 4.869868976905847e-06, - "loss": 0.187, + "epoch": 1.3764473312623553, + "grad_norm": 3.1587306534107396, + "learning_rate": 1.1810162593680768e-05, + "loss": 0.5146, "step": 9748 }, { - "epoch": 2.0442440763262737, - "grad_norm": 3.110542674309111, - "learning_rate": 4.867925833370058e-06, - "loss": 0.1296, + "epoch": 1.3765885343123412, + "grad_norm": 3.454807859008688, + "learning_rate": 1.1808663303501144e-05, + "loss": 0.5529, "step": 9749 }, { - "epoch": 2.0444537638918012, - "grad_norm": 3.6780221504132427, - "learning_rate": 4.865982952861965e-06, - "loss": 0.1284, + "epoch": 1.376729737362327, + "grad_norm": 3.7564512226826734, + "learning_rate": 1.1807163971288967e-05, + "loss": 0.5536, "step": 9750 }, { - "epoch": 2.0446634514573288, - "grad_norm": 3.241275803154536, - "learning_rate": 4.86404033548115e-06, - "loss": 0.118, + "epoch": 1.376870940412313, + "grad_norm": 3.533939776177978, + "learning_rate": 1.1805664597079079e-05, + "loss": 0.6011, "step": 9751 }, { - "epoch": 2.044873139022856, - "grad_norm": 3.5889762481026013, - "learning_rate": 4.86209798132718e-06, - "loss": 0.1745, + "epoch": 1.3770121434622988, + "grad_norm": 3.6917663269597414, + "learning_rate": 1.1804165180906326e-05, + "loss": 0.5653, "step": 9752 }, { - "epoch": 2.0450828265883834, - "grad_norm": 5.162952073405409, - "learning_rate": 4.860155890499598e-06, - "loss": 0.148, + "epoch": 1.3771533465122847, + "grad_norm": 2.769049607367366, + "learning_rate": 1.180266572280555e-05, + "loss": 0.5117, "step": 9753 }, { - "epoch": 2.045292514153911, - "grad_norm": 5.159580770033513, - "learning_rate": 4.858214063097939e-06, - "loss": 0.1767, + "epoch": 1.3772945495622706, + "grad_norm": 2.8121869843323335, + "learning_rate": 1.1801166222811607e-05, + "loss": 0.4824, "step": 9754 }, { - "epoch": 2.045502201719438, - "grad_norm": 5.295814069975944, - "learning_rate": 4.856272499221727e-06, - "loss": 0.2044, + "epoch": 1.3774357526122565, + "grad_norm": 3.146871945506094, + "learning_rate": 1.1799666680959336e-05, + "loss": 0.5277, "step": 9755 }, { - "epoch": 2.0457118892849655, - "grad_norm": 3.7763549369453497, - "learning_rate": 4.854331198970474e-06, - "loss": 0.1236, + "epoch": 1.3775769556622424, + "grad_norm": 2.7525305789580683, + "learning_rate": 1.1798167097283585e-05, + "loss": 0.428, "step": 9756 }, { - "epoch": 2.0459215768504926, - "grad_norm": 4.764330352553426, - "learning_rate": 4.852390162443672e-06, - "loss": 0.1438, + "epoch": 1.3777181587122282, + "grad_norm": 3.892573235945514, + "learning_rate": 1.1796667471819212e-05, + "loss": 0.6413, "step": 9757 }, { - "epoch": 2.04613126441602, - "grad_norm": 4.303369723345257, - "learning_rate": 4.850449389740798e-06, - "loss": 0.1714, + "epoch": 1.3778593617622141, + "grad_norm": 3.016804903544927, + "learning_rate": 1.1795167804601062e-05, + "loss": 0.5101, "step": 9758 }, { - "epoch": 2.0463409519815476, - "grad_norm": 3.6617425814999245, - "learning_rate": 4.848508880961329e-06, - "loss": 0.1238, + "epoch": 1.3780005648122, + "grad_norm": 3.4675363112830433, + "learning_rate": 1.1793668095663985e-05, + "loss": 0.6008, "step": 9759 }, { - "epoch": 2.0465506395470747, - "grad_norm": 3.7887024712096538, - "learning_rate": 4.8465686362047105e-06, - "loss": 0.1401, + "epoch": 1.378141767862186, + "grad_norm": 4.417380556410781, + "learning_rate": 1.179216834504284e-05, + "loss": 0.6685, "step": 9760 }, { - "epoch": 2.0467603271126023, - "grad_norm": 5.122948427632381, - "learning_rate": 4.84462865557039e-06, - "loss": 0.1731, + "epoch": 1.3782829709121718, + "grad_norm": 3.3837343840831213, + "learning_rate": 1.1790668552772474e-05, + "loss": 0.5289, "step": 9761 }, { - "epoch": 2.04697001467813, - "grad_norm": 5.1304353174633786, - "learning_rate": 4.842688939157795e-06, - "loss": 0.1949, + "epoch": 1.3784241739621577, + "grad_norm": 3.5698947040078193, + "learning_rate": 1.1789168718887745e-05, + "loss": 0.5771, "step": 9762 }, { - "epoch": 2.047179702243657, - "grad_norm": 4.8860890133668775, - "learning_rate": 4.840749487066331e-06, - "loss": 0.156, + "epoch": 1.3785653770121435, + "grad_norm": 3.330740856368138, + "learning_rate": 1.178766884342351e-05, + "loss": 0.5973, "step": 9763 }, { - "epoch": 2.0473893898091844, - "grad_norm": 3.5049700590622486, - "learning_rate": 4.838810299395405e-06, - "loss": 0.1161, + "epoch": 1.3787065800621292, + "grad_norm": 3.490031525427344, + "learning_rate": 1.1786168926414617e-05, + "loss": 0.6195, "step": 9764 }, { - "epoch": 2.0475990773747115, - "grad_norm": 5.596071627268399, - "learning_rate": 4.836871376244405e-06, - "loss": 0.2077, + "epoch": 1.378847783112115, + "grad_norm": 3.348713811897324, + "learning_rate": 1.1784668967895933e-05, + "loss": 0.5544, "step": 9765 }, { - "epoch": 2.047808764940239, - "grad_norm": 4.456633883171991, - "learning_rate": 4.834932717712704e-06, - "loss": 0.1598, + "epoch": 1.378988986162101, + "grad_norm": 3.690194137201315, + "learning_rate": 1.1783168967902314e-05, + "loss": 0.5572, "step": 9766 }, { - "epoch": 2.0480184525057665, - "grad_norm": 4.957496731684181, - "learning_rate": 4.832994323899655e-06, - "loss": 0.1734, + "epoch": 1.3791301892120869, + "grad_norm": 3.7642724196409034, + "learning_rate": 1.1781668926468617e-05, + "loss": 0.7328, "step": 9767 }, { - "epoch": 2.0482281400712936, - "grad_norm": 4.111405757886916, - "learning_rate": 4.831056194904612e-06, - "loss": 0.1697, + "epoch": 1.3792713922620727, + "grad_norm": 4.017831017060751, + "learning_rate": 1.1780168843629705e-05, + "loss": 0.5905, "step": 9768 }, { - "epoch": 2.048437827636821, - "grad_norm": 4.466436643882388, - "learning_rate": 4.829118330826901e-06, - "loss": 0.123, + "epoch": 1.3794125953120586, + "grad_norm": 3.4603542941422205, + "learning_rate": 1.1778668719420436e-05, + "loss": 0.56, "step": 9769 }, { - "epoch": 2.0486475152023487, - "grad_norm": 3.5683220093800783, - "learning_rate": 4.827180731765847e-06, - "loss": 0.1398, + "epoch": 1.3795537983620445, + "grad_norm": 3.337968851817994, + "learning_rate": 1.1777168553875673e-05, + "loss": 0.5759, "step": 9770 }, { - "epoch": 2.0488572027678758, - "grad_norm": 4.294467718239291, - "learning_rate": 4.8252433978207535e-06, - "loss": 0.1477, + "epoch": 1.3796950014120304, + "grad_norm": 2.9142122751242363, + "learning_rate": 1.1775668347030285e-05, + "loss": 0.4959, "step": 9771 }, { - "epoch": 2.0490668903334033, - "grad_norm": 4.339898188495926, - "learning_rate": 4.823306329090906e-06, - "loss": 0.1257, + "epoch": 1.3798362044620163, + "grad_norm": 3.180372606449813, + "learning_rate": 1.177416809891913e-05, + "loss": 0.55, "step": 9772 }, { - "epoch": 2.049276577898931, - "grad_norm": 4.910078758039469, - "learning_rate": 4.821369525675588e-06, - "loss": 0.1577, + "epoch": 1.3799774075120022, + "grad_norm": 5.035317492115083, + "learning_rate": 1.177266780957707e-05, + "loss": 0.8248, "step": 9773 }, { - "epoch": 2.049486265464458, - "grad_norm": 4.550252070991739, - "learning_rate": 4.819432987674072e-06, - "loss": 0.165, + "epoch": 1.380118610561988, + "grad_norm": 4.663232741963081, + "learning_rate": 1.1771167479038978e-05, + "loss": 0.9226, "step": 9774 }, { - "epoch": 2.0496959530299854, - "grad_norm": 4.217995165035445, - "learning_rate": 4.817496715185593e-06, - "loss": 0.1548, + "epoch": 1.380259813611974, + "grad_norm": 3.132884227813378, + "learning_rate": 1.1769667107339723e-05, + "loss": 0.5052, "step": 9775 }, { - "epoch": 2.0499056405955125, - "grad_norm": 3.617185027114443, - "learning_rate": 4.815560708309394e-06, - "loss": 0.1528, + "epoch": 1.3804010166619598, + "grad_norm": 4.581676630594322, + "learning_rate": 1.1768166694514164e-05, + "loss": 0.8078, "step": 9776 }, { - "epoch": 2.05011532816104, - "grad_norm": 4.196934124657236, - "learning_rate": 4.813624967144707e-06, - "loss": 0.1628, + "epoch": 1.3805422197119457, + "grad_norm": 4.193798055572653, + "learning_rate": 1.1766666240597175e-05, + "loss": 0.6975, "step": 9777 }, { - "epoch": 2.0503250157265676, - "grad_norm": 3.9782240726376563, - "learning_rate": 4.811689491790734e-06, - "loss": 0.1388, + "epoch": 1.3806834227619316, + "grad_norm": 5.124414889814807, + "learning_rate": 1.1765165745623628e-05, + "loss": 0.619, "step": 9778 }, { - "epoch": 2.0505347032920946, - "grad_norm": 3.9864284535697143, - "learning_rate": 4.8097542823466695e-06, - "loss": 0.1419, + "epoch": 1.3808246258119174, + "grad_norm": 4.252056176484407, + "learning_rate": 1.176366520962839e-05, + "loss": 0.7259, "step": 9779 }, { - "epoch": 2.050744390857622, - "grad_norm": 3.90096987191959, - "learning_rate": 4.807819338911704e-06, - "loss": 0.1499, + "epoch": 1.3809658288619033, + "grad_norm": 2.9195014983301752, + "learning_rate": 1.1762164632646334e-05, + "loss": 0.5025, "step": 9780 }, { - "epoch": 2.0509540784231497, - "grad_norm": 4.889395632797826, - "learning_rate": 4.805884661585e-06, - "loss": 0.1528, + "epoch": 1.3811070319118892, + "grad_norm": 3.9734387021587545, + "learning_rate": 1.1760664014712333e-05, + "loss": 0.5762, "step": 9781 }, { - "epoch": 2.051163765988677, - "grad_norm": 3.903589573525906, - "learning_rate": 4.803950250465719e-06, - "loss": 0.1374, + "epoch": 1.381248234961875, + "grad_norm": 3.7314506552838127, + "learning_rate": 1.1759163355861258e-05, + "loss": 0.6414, "step": 9782 }, { - "epoch": 2.0513734535542043, - "grad_norm": 5.266024356669548, - "learning_rate": 4.802016105652999e-06, - "loss": 0.1686, + "epoch": 1.381389438011861, + "grad_norm": 3.710591580155354, + "learning_rate": 1.1757662656127991e-05, + "loss": 0.6516, "step": 9783 }, { - "epoch": 2.0515831411197314, - "grad_norm": 4.2575823995114925, - "learning_rate": 4.8000822272459665e-06, - "loss": 0.1347, + "epoch": 1.3815306410618469, + "grad_norm": 3.6393822229586408, + "learning_rate": 1.1756161915547399e-05, + "loss": 0.5177, "step": 9784 }, { - "epoch": 2.051792828685259, - "grad_norm": 4.789495569809839, - "learning_rate": 4.7981486153437375e-06, - "loss": 0.1468, + "epoch": 1.3816718441118327, + "grad_norm": 5.264317155556696, + "learning_rate": 1.1754661134154365e-05, + "loss": 0.9137, "step": 9785 }, { - "epoch": 2.0520025162507864, - "grad_norm": 3.881873722552294, - "learning_rate": 4.796215270045419e-06, - "loss": 0.1699, + "epoch": 1.3818130471618186, + "grad_norm": 3.026734606674291, + "learning_rate": 1.1753160311983764e-05, + "loss": 0.539, "step": 9786 }, { - "epoch": 2.0522122038163135, - "grad_norm": 3.4816791161654623, - "learning_rate": 4.7942821914500924e-06, - "loss": 0.122, + "epoch": 1.3819542502118045, + "grad_norm": 3.5862027531122544, + "learning_rate": 1.1751659449070476e-05, + "loss": 0.5774, "step": 9787 }, { - "epoch": 2.052421891381841, - "grad_norm": 5.230403549857729, - "learning_rate": 4.7923493796568295e-06, - "loss": 0.1586, + "epoch": 1.3820954532617904, + "grad_norm": 2.89619428501797, + "learning_rate": 1.1750158545449377e-05, + "loss": 0.5595, "step": 9788 }, { - "epoch": 2.0526315789473686, - "grad_norm": 3.509921936939052, - "learning_rate": 4.790416834764696e-06, - "loss": 0.125, + "epoch": 1.3822366563117763, + "grad_norm": 3.971553670414237, + "learning_rate": 1.1748657601155348e-05, + "loss": 0.7073, "step": 9789 }, { - "epoch": 2.0528412665128957, - "grad_norm": 4.613713760192309, - "learning_rate": 4.788484556872732e-06, - "loss": 0.1282, + "epoch": 1.3823778593617622, + "grad_norm": 3.912382116884756, + "learning_rate": 1.1747156616223272e-05, + "loss": 0.7153, "step": 9790 }, { - "epoch": 2.053050954078423, - "grad_norm": 4.474865594477442, - "learning_rate": 4.786552546079978e-06, - "loss": 0.1604, + "epoch": 1.382519062411748, + "grad_norm": 3.2735081099008143, + "learning_rate": 1.1745655590688031e-05, + "loss": 0.5579, "step": 9791 }, { - "epoch": 2.0532606416439507, - "grad_norm": 4.297074097442312, - "learning_rate": 4.784620802485447e-06, - "loss": 0.1671, + "epoch": 1.382660265461734, + "grad_norm": 3.739299942420825, + "learning_rate": 1.174415452458451e-05, + "loss": 0.6645, "step": 9792 }, { - "epoch": 2.053470329209478, - "grad_norm": 3.974078148528853, - "learning_rate": 4.7826893261881425e-06, - "loss": 0.1696, + "epoch": 1.3828014685117198, + "grad_norm": 3.916063995026166, + "learning_rate": 1.174265341794759e-05, + "loss": 0.6811, "step": 9793 }, { - "epoch": 2.0536800167750053, - "grad_norm": 4.259664670862086, - "learning_rate": 4.7807581172870625e-06, - "loss": 0.1467, + "epoch": 1.3829426715617057, + "grad_norm": 2.831358415426989, + "learning_rate": 1.1741152270812155e-05, + "loss": 0.5025, "step": 9794 }, { - "epoch": 2.0538897043405324, - "grad_norm": 4.8450665149936825, - "learning_rate": 4.77882717588118e-06, - "loss": 0.1589, + "epoch": 1.3830838746116916, + "grad_norm": 3.3238169771380335, + "learning_rate": 1.1739651083213096e-05, + "loss": 0.509, "step": 9795 }, { - "epoch": 2.05409939190606, - "grad_norm": 4.729706117036031, - "learning_rate": 4.776896502069458e-06, - "loss": 0.1701, + "epoch": 1.3832250776616775, + "grad_norm": 3.1495024528451334, + "learning_rate": 1.1738149855185295e-05, + "loss": 0.5744, "step": 9796 }, { - "epoch": 2.0543090794715875, - "grad_norm": 4.402136589844036, - "learning_rate": 4.774966095950849e-06, - "loss": 0.1163, + "epoch": 1.3833662807116633, + "grad_norm": 4.342164407483664, + "learning_rate": 1.1736648586763645e-05, + "loss": 0.6318, "step": 9797 }, { - "epoch": 2.0545187670371146, - "grad_norm": 4.311885643127705, - "learning_rate": 4.773035957624294e-06, - "loss": 0.1338, + "epoch": 1.3835074837616492, + "grad_norm": 4.198229574454235, + "learning_rate": 1.1735147277983027e-05, + "loss": 0.7214, "step": 9798 }, { - "epoch": 2.054728454602642, - "grad_norm": 4.553215427546349, - "learning_rate": 4.771106087188712e-06, - "loss": 0.1582, + "epoch": 1.383648686811635, + "grad_norm": 3.435757810516378, + "learning_rate": 1.173364592887834e-05, + "loss": 0.5782, "step": 9799 }, { - "epoch": 2.0549381421681696, - "grad_norm": 3.9721270021601525, - "learning_rate": 4.769176484743008e-06, - "loss": 0.1405, + "epoch": 1.383789889861621, + "grad_norm": 3.5343009118416986, + "learning_rate": 1.1732144539484467e-05, + "loss": 0.5617, "step": 9800 }, { - "epoch": 2.0551478297336967, - "grad_norm": 3.4334814984468074, - "learning_rate": 4.767247150386085e-06, - "loss": 0.1412, + "epoch": 1.3839310929116069, + "grad_norm": 4.512213061190638, + "learning_rate": 1.1730643109836306e-05, + "loss": 0.8468, "step": 9801 }, { - "epoch": 2.0553575172992242, - "grad_norm": 5.467359886772754, - "learning_rate": 4.7653180842168174e-06, - "loss": 0.1861, + "epoch": 1.3840722959615928, + "grad_norm": 3.1442163701738886, + "learning_rate": 1.1729141639968745e-05, + "loss": 0.5359, "step": 9802 }, { - "epoch": 2.0555672048647513, - "grad_norm": 3.3055167609799705, - "learning_rate": 4.763389286334081e-06, - "loss": 0.1199, + "epoch": 1.3842134990115786, + "grad_norm": 3.9419128741573264, + "learning_rate": 1.172764012991668e-05, + "loss": 0.6845, "step": 9803 }, { - "epoch": 2.055776892430279, - "grad_norm": 5.645059321460999, - "learning_rate": 4.7614607568367275e-06, - "loss": 0.1718, + "epoch": 1.3843547020615645, + "grad_norm": 3.6723802782369126, + "learning_rate": 1.1726138579715002e-05, + "loss": 0.6403, "step": 9804 }, { - "epoch": 2.0559865799958064, - "grad_norm": 3.8038382911663797, - "learning_rate": 4.759532495823591e-06, - "loss": 0.1377, + "epoch": 1.3844959051115504, + "grad_norm": 3.1925545760678347, + "learning_rate": 1.1724636989398613e-05, + "loss": 0.5057, "step": 9805 }, { - "epoch": 2.0561962675613334, - "grad_norm": 3.794227323765912, - "learning_rate": 4.7576045033935035e-06, - "loss": 0.1236, + "epoch": 1.3846371081615363, + "grad_norm": 2.9651585994013585, + "learning_rate": 1.1723135359002403e-05, + "loss": 0.5296, "step": 9806 }, { - "epoch": 2.056405955126861, - "grad_norm": 4.274609286417137, - "learning_rate": 4.7556767796452805e-06, - "loss": 0.1293, + "epoch": 1.3847783112115222, + "grad_norm": 3.6699050345342727, + "learning_rate": 1.1721633688561269e-05, + "loss": 0.6312, "step": 9807 }, { - "epoch": 2.0566156426923885, - "grad_norm": 3.891406885181315, - "learning_rate": 4.75374932467772e-06, - "loss": 0.1597, + "epoch": 1.384919514261508, + "grad_norm": 3.691039350007807, + "learning_rate": 1.1720131978110115e-05, + "loss": 0.7026, "step": 9808 }, { - "epoch": 2.0568253302579156, - "grad_norm": 4.425901164713951, - "learning_rate": 4.7518221385896014e-06, - "loss": 0.1799, + "epoch": 1.385060717311494, + "grad_norm": 3.146417988611298, + "learning_rate": 1.1718630227683836e-05, + "loss": 0.501, "step": 9809 }, { - "epoch": 2.057035017823443, - "grad_norm": 4.3101988235130415, - "learning_rate": 4.749895221479703e-06, - "loss": 0.1557, + "epoch": 1.3852019203614798, + "grad_norm": 3.2611257374837237, + "learning_rate": 1.171712843731733e-05, + "loss": 0.5615, "step": 9810 }, { - "epoch": 2.0572447053889706, - "grad_norm": 4.509880468806988, - "learning_rate": 4.747968573446783e-06, - "loss": 0.116, + "epoch": 1.3853431234114657, + "grad_norm": 3.575599148623598, + "learning_rate": 1.1715626607045502e-05, + "loss": 0.7724, "step": 9811 }, { - "epoch": 2.0574543929544977, - "grad_norm": 3.862237420804464, - "learning_rate": 4.746042194589578e-06, - "loss": 0.16, + "epoch": 1.3854843264614516, + "grad_norm": 3.2158472768598383, + "learning_rate": 1.1714124736903254e-05, + "loss": 0.4755, "step": 9812 }, { - "epoch": 2.0576640805200253, - "grad_norm": 3.3057082533171704, - "learning_rate": 4.744116085006826e-06, - "loss": 0.1321, + "epoch": 1.3856255295114375, + "grad_norm": 3.196403257017226, + "learning_rate": 1.1712622826925488e-05, + "loss": 0.5397, "step": 9813 }, { - "epoch": 2.0578737680855523, - "grad_norm": 4.291659391518171, - "learning_rate": 4.742190244797237e-06, - "loss": 0.1892, + "epoch": 1.3857667325614234, + "grad_norm": 3.1979300951708893, + "learning_rate": 1.1711120877147107e-05, + "loss": 0.5166, "step": 9814 }, { - "epoch": 2.05808345565108, - "grad_norm": 4.935304095135732, - "learning_rate": 4.740264674059521e-06, - "loss": 0.1756, + "epoch": 1.3859079356114092, + "grad_norm": 2.927604481874085, + "learning_rate": 1.1709618887603013e-05, + "loss": 0.549, "step": 9815 }, { - "epoch": 2.0582931432166074, - "grad_norm": 3.7112552541916908, - "learning_rate": 4.738339372892359e-06, - "loss": 0.1678, + "epoch": 1.3860491386613951, + "grad_norm": 3.8130612155834367, + "learning_rate": 1.1708116858328116e-05, + "loss": 0.5751, "step": 9816 }, { - "epoch": 2.0585028307821345, - "grad_norm": 3.398814387291517, - "learning_rate": 4.736414341394434e-06, - "loss": 0.1435, + "epoch": 1.386190341711381, + "grad_norm": 3.3210961544953657, + "learning_rate": 1.1706614789357321e-05, + "loss": 0.5132, "step": 9817 }, { - "epoch": 2.058712518347662, - "grad_norm": 5.072759137732459, - "learning_rate": 4.7344895796644e-06, - "loss": 0.1513, + "epoch": 1.3863315447613669, + "grad_norm": 3.926222479621664, + "learning_rate": 1.1705112680725538e-05, + "loss": 0.6276, "step": 9818 }, { - "epoch": 2.0589222059131895, - "grad_norm": 3.9355020514602628, - "learning_rate": 4.7325650878009115e-06, - "loss": 0.1688, + "epoch": 1.3864727478113528, + "grad_norm": 4.021742170805987, + "learning_rate": 1.1703610532467669e-05, + "loss": 0.6613, "step": 9819 }, { - "epoch": 2.0591318934787166, - "grad_norm": 4.388975638193464, - "learning_rate": 4.730640865902597e-06, - "loss": 0.1661, + "epoch": 1.3866139508613387, + "grad_norm": 3.4556335468295765, + "learning_rate": 1.1702108344618627e-05, + "loss": 0.5525, "step": 9820 }, { - "epoch": 2.059341581044244, - "grad_norm": 5.006149333268862, - "learning_rate": 4.728716914068075e-06, - "loss": 0.1719, + "epoch": 1.3867551539113245, + "grad_norm": 4.499309035479272, + "learning_rate": 1.1700606117213325e-05, + "loss": 0.6804, "step": 9821 }, { - "epoch": 2.0595512686097712, - "grad_norm": 5.162142712719607, - "learning_rate": 4.726793232395952e-06, - "loss": 0.1624, + "epoch": 1.3868963569613104, + "grad_norm": 3.366822180333817, + "learning_rate": 1.1699103850286668e-05, + "loss": 0.6411, "step": 9822 }, { - "epoch": 2.0597609561752988, - "grad_norm": 4.5253183458291275, - "learning_rate": 4.724869820984826e-06, - "loss": 0.1908, + "epoch": 1.3870375600112963, + "grad_norm": 3.6335560180900734, + "learning_rate": 1.1697601543873573e-05, + "loss": 0.6906, "step": 9823 }, { - "epoch": 2.0599706437408263, - "grad_norm": 3.5961990092210754, - "learning_rate": 4.7229466799332725e-06, - "loss": 0.1499, + "epoch": 1.3871787630612822, + "grad_norm": 3.2170350418810316, + "learning_rate": 1.1696099198008953e-05, + "loss": 0.471, "step": 9824 }, { - "epoch": 2.0601803313063534, - "grad_norm": 3.161093635157218, - "learning_rate": 4.721023809339849e-06, - "loss": 0.1113, + "epoch": 1.387319966111268, + "grad_norm": 3.810120499373073, + "learning_rate": 1.1694596812727714e-05, + "loss": 0.5571, "step": 9825 }, { - "epoch": 2.060390018871881, - "grad_norm": 4.33355494499541, - "learning_rate": 4.719101209303115e-06, - "loss": 0.1466, + "epoch": 1.387461169161254, + "grad_norm": 4.397999742688604, + "learning_rate": 1.1693094388064786e-05, + "loss": 0.7314, "step": 9826 }, { - "epoch": 2.0605997064374084, - "grad_norm": 4.535528047199118, - "learning_rate": 4.7171788799216e-06, - "loss": 0.1553, + "epoch": 1.3876023722112398, + "grad_norm": 3.648693252172972, + "learning_rate": 1.1691591924055068e-05, + "loss": 0.5917, "step": 9827 }, { - "epoch": 2.0608093940029355, - "grad_norm": 4.317305053367913, - "learning_rate": 4.7152568212938336e-06, - "loss": 0.1803, + "epoch": 1.3877435752612257, + "grad_norm": 4.439013085587571, + "learning_rate": 1.1690089420733486e-05, + "loss": 0.6649, "step": 9828 }, { - "epoch": 2.061019081568463, - "grad_norm": 3.5967097815860796, - "learning_rate": 4.713335033518321e-06, - "loss": 0.1493, + "epoch": 1.3878847783112116, + "grad_norm": 3.305157732351099, + "learning_rate": 1.1688586878134957e-05, + "loss": 0.5703, "step": 9829 }, { - "epoch": 2.0612287691339906, - "grad_norm": 5.5806160733606776, - "learning_rate": 4.711413516693554e-06, - "loss": 0.2129, + "epoch": 1.3880259813611975, + "grad_norm": 3.4168869854880115, + "learning_rate": 1.1687084296294398e-05, + "loss": 0.5209, "step": 9830 }, { - "epoch": 2.0614384566995176, - "grad_norm": 3.1217119338915467, - "learning_rate": 4.709492270918016e-06, - "loss": 0.12, + "epoch": 1.3881671844111834, + "grad_norm": 3.333916935510874, + "learning_rate": 1.1685581675246729e-05, + "loss": 0.6147, "step": 9831 }, { - "epoch": 2.061648144265045, - "grad_norm": 4.184075015744736, - "learning_rate": 4.707571296290182e-06, - "loss": 0.1701, + "epoch": 1.3883083874611692, + "grad_norm": 3.7175505390952064, + "learning_rate": 1.168407901502687e-05, + "loss": 0.5546, "step": 9832 }, { - "epoch": 2.0618578318305723, - "grad_norm": 4.655865407348202, - "learning_rate": 4.705650592908492e-06, - "loss": 0.1619, + "epoch": 1.3884495905111551, + "grad_norm": 2.7021335870646737, + "learning_rate": 1.1682576315669738e-05, + "loss": 0.4484, "step": 9833 }, { - "epoch": 2.0620675193961, - "grad_norm": 3.4016707751298623, - "learning_rate": 4.703730160871392e-06, - "loss": 0.1484, + "epoch": 1.388590793561141, + "grad_norm": 4.139068438553842, + "learning_rate": 1.1681073577210262e-05, + "loss": 0.6094, "step": 9834 }, { - "epoch": 2.0622772069616273, - "grad_norm": 4.1713595286173835, - "learning_rate": 4.70181000027731e-06, - "loss": 0.1523, + "epoch": 1.388731996611127, + "grad_norm": 4.208528815254102, + "learning_rate": 1.1679570799683365e-05, + "loss": 0.7139, "step": 9835 }, { - "epoch": 2.0624868945271544, - "grad_norm": 4.062228782362276, - "learning_rate": 4.699890111224655e-06, - "loss": 0.1446, + "epoch": 1.3888731996611128, + "grad_norm": 3.782106734504938, + "learning_rate": 1.1678067983123965e-05, + "loss": 0.6417, "step": 9836 }, { - "epoch": 2.062696582092682, - "grad_norm": 4.173052503513639, - "learning_rate": 4.697970493811821e-06, - "loss": 0.1736, + "epoch": 1.3890144027110987, + "grad_norm": 3.3050716208228588, + "learning_rate": 1.167656512756699e-05, + "loss": 0.5154, "step": 9837 }, { - "epoch": 2.0629062696582094, - "grad_norm": 4.410452885530461, - "learning_rate": 4.696051148137198e-06, - "loss": 0.1321, + "epoch": 1.3891556057610845, + "grad_norm": 2.8798767534758616, + "learning_rate": 1.1675062233047365e-05, + "loss": 0.4874, "step": 9838 }, { - "epoch": 2.0631159572237365, - "grad_norm": 6.680127587795301, - "learning_rate": 4.694132074299149e-06, - "loss": 0.1961, + "epoch": 1.3892968088110704, + "grad_norm": 4.161224567767254, + "learning_rate": 1.167355929960002e-05, + "loss": 0.5871, "step": 9839 }, { - "epoch": 2.063325644789264, - "grad_norm": 3.6277563751302626, - "learning_rate": 4.692213272396039e-06, - "loss": 0.1517, + "epoch": 1.3894380118610563, + "grad_norm": 3.5447509114756715, + "learning_rate": 1.1672056327259876e-05, + "loss": 0.6609, "step": 9840 }, { - "epoch": 2.0635353323547916, - "grad_norm": 4.977892836811028, - "learning_rate": 4.6902947425262035e-06, - "loss": 0.1358, + "epoch": 1.3895792149110422, + "grad_norm": 3.6684300654529878, + "learning_rate": 1.1670553316061865e-05, + "loss": 0.573, "step": 9841 }, { - "epoch": 2.0637450199203187, - "grad_norm": 3.4455307834054785, - "learning_rate": 4.688376484787969e-06, - "loss": 0.1167, + "epoch": 1.389720417961028, + "grad_norm": 3.94433741990711, + "learning_rate": 1.1669050266040917e-05, + "loss": 0.6535, "step": 9842 }, { - "epoch": 2.063954707485846, - "grad_norm": 5.285245420263358, - "learning_rate": 4.68645849927965e-06, - "loss": 0.1755, + "epoch": 1.389861621011014, + "grad_norm": 4.473791419229923, + "learning_rate": 1.1667547177231966e-05, + "loss": 0.624, "step": 9843 }, { - "epoch": 2.0641643950513733, - "grad_norm": 3.9498095787337597, - "learning_rate": 4.684540786099555e-06, - "loss": 0.169, + "epoch": 1.3900028240609998, + "grad_norm": 4.1886857825418184, + "learning_rate": 1.1666044049669934e-05, + "loss": 0.6623, "step": 9844 }, { - "epoch": 2.064374082616901, - "grad_norm": 4.306700814187674, - "learning_rate": 4.682623345345962e-06, - "loss": 0.1309, + "epoch": 1.3901440271109857, + "grad_norm": 3.4432835279406597, + "learning_rate": 1.1664540883389757e-05, + "loss": 0.5244, "step": 9845 }, { - "epoch": 2.0645837701824283, - "grad_norm": 4.2602422546839485, - "learning_rate": 4.680706177117143e-06, - "loss": 0.1299, + "epoch": 1.3902852301609716, + "grad_norm": 3.3807762809208954, + "learning_rate": 1.1663037678426371e-05, + "loss": 0.5699, "step": 9846 }, { - "epoch": 2.0647934577479554, - "grad_norm": 3.85664829718295, - "learning_rate": 4.678789281511361e-06, - "loss": 0.1347, + "epoch": 1.3904264332109575, + "grad_norm": 3.579029842404051, + "learning_rate": 1.1661534434814707e-05, + "loss": 0.5556, "step": 9847 }, { - "epoch": 2.065003145313483, - "grad_norm": 4.302621917990932, - "learning_rate": 4.676872658626854e-06, - "loss": 0.1583, + "epoch": 1.3905676362609434, + "grad_norm": 3.4287956254850154, + "learning_rate": 1.16600311525897e-05, + "loss": 0.5294, "step": 9848 }, { - "epoch": 2.0652128328790105, - "grad_norm": 4.7393011265663265, - "learning_rate": 4.674956308561859e-06, - "loss": 0.1927, + "epoch": 1.390708839310929, + "grad_norm": 4.0355526996282665, + "learning_rate": 1.1658527831786289e-05, + "loss": 0.5515, "step": 9849 }, { - "epoch": 2.0654225204445376, - "grad_norm": 4.7959998082606035, - "learning_rate": 4.6730402314145895e-06, - "loss": 0.1449, + "epoch": 1.390850042360915, + "grad_norm": 4.239185880095875, + "learning_rate": 1.1657024472439402e-05, + "loss": 0.618, "step": 9850 }, { - "epoch": 2.065632208010065, - "grad_norm": 4.5838107467083855, - "learning_rate": 4.671124427283243e-06, - "loss": 0.137, + "epoch": 1.3909912454109008, + "grad_norm": 3.983146704913183, + "learning_rate": 1.1655521074583986e-05, + "loss": 0.5716, "step": 9851 }, { - "epoch": 2.065841895575592, - "grad_norm": 4.402381285534479, - "learning_rate": 4.6692088962660134e-06, - "loss": 0.1683, + "epoch": 1.3911324484608867, + "grad_norm": 3.3410843897360936, + "learning_rate": 1.1654017638254976e-05, + "loss": 0.6279, "step": 9852 }, { - "epoch": 2.0660515831411197, - "grad_norm": 4.600697316242818, - "learning_rate": 4.66729363846108e-06, - "loss": 0.1595, + "epoch": 1.3912736515108726, + "grad_norm": 3.6403595886753464, + "learning_rate": 1.1652514163487307e-05, + "loss": 0.5591, "step": 9853 }, { - "epoch": 2.0662612707066472, - "grad_norm": 3.5785812220866093, - "learning_rate": 4.665378653966591e-06, - "loss": 0.1223, + "epoch": 1.3914148545608584, + "grad_norm": 3.384141194899694, + "learning_rate": 1.1651010650315923e-05, + "loss": 0.576, "step": 9854 }, { - "epoch": 2.0664709582721743, - "grad_norm": 4.913849900889719, - "learning_rate": 4.663463942880696e-06, - "loss": 0.2068, + "epoch": 1.3915560576108443, + "grad_norm": 3.7380540587158393, + "learning_rate": 1.1649507098775765e-05, + "loss": 0.6175, "step": 9855 }, { - "epoch": 2.066680645837702, - "grad_norm": 3.676429624335855, - "learning_rate": 4.661549505301533e-06, - "loss": 0.1387, + "epoch": 1.3916972606608302, + "grad_norm": 4.076833621569356, + "learning_rate": 1.1648003508901775e-05, + "loss": 0.648, "step": 9856 }, { - "epoch": 2.0668903334032294, - "grad_norm": 4.841741721034167, - "learning_rate": 4.659635341327219e-06, - "loss": 0.1829, + "epoch": 1.391838463710816, + "grad_norm": 3.3176339572170135, + "learning_rate": 1.1646499880728897e-05, + "loss": 0.5357, "step": 9857 }, { - "epoch": 2.0671000209687564, - "grad_norm": 5.388231917468368, - "learning_rate": 4.657721451055852e-06, - "loss": 0.1335, + "epoch": 1.391979666760802, + "grad_norm": 4.420644302896781, + "learning_rate": 1.164499621429207e-05, + "loss": 0.7863, "step": 9858 }, { - "epoch": 2.067309708534284, - "grad_norm": 4.6134229582954855, - "learning_rate": 4.65580783458553e-06, - "loss": 0.1716, + "epoch": 1.3921208698107879, + "grad_norm": 4.1825793253469845, + "learning_rate": 1.1643492509626242e-05, + "loss": 0.681, "step": 9859 }, { - "epoch": 2.0675193960998115, - "grad_norm": 3.838022936901379, - "learning_rate": 4.653894492014321e-06, - "loss": 0.1722, + "epoch": 1.3922620728607737, + "grad_norm": 3.7939241788630405, + "learning_rate": 1.1641988766766359e-05, + "loss": 0.5817, "step": 9860 }, { - "epoch": 2.0677290836653386, - "grad_norm": 4.25482259127271, - "learning_rate": 4.651981423440296e-06, - "loss": 0.1707, + "epoch": 1.3924032759107596, + "grad_norm": 3.514503969440174, + "learning_rate": 1.1640484985747365e-05, + "loss": 0.6232, "step": 9861 }, { - "epoch": 2.067938771230866, - "grad_norm": 5.0728955024655376, - "learning_rate": 4.650068628961499e-06, - "loss": 0.2098, + "epoch": 1.3925444789607455, + "grad_norm": 4.367900527283707, + "learning_rate": 1.1638981166604206e-05, + "loss": 0.653, "step": 9862 }, { - "epoch": 2.068148458796393, - "grad_norm": 4.511049454244188, - "learning_rate": 4.64815610867596e-06, - "loss": 0.1499, + "epoch": 1.3926856820107314, + "grad_norm": 4.135579436941323, + "learning_rate": 1.1637477309371837e-05, + "loss": 0.775, "step": 9863 }, { - "epoch": 2.0683581463619207, - "grad_norm": 3.762999021380038, - "learning_rate": 4.646243862681702e-06, - "loss": 0.1231, + "epoch": 1.3928268850607173, + "grad_norm": 3.8635195881828857, + "learning_rate": 1.16359734140852e-05, + "loss": 0.7002, "step": 9864 }, { - "epoch": 2.0685678339274483, - "grad_norm": 4.892388772060436, - "learning_rate": 4.644331891076736e-06, - "loss": 0.169, + "epoch": 1.3929680881107032, + "grad_norm": 4.04607179465437, + "learning_rate": 1.1634469480779249e-05, + "loss": 0.6851, "step": 9865 }, { - "epoch": 2.0687775214929753, - "grad_norm": 4.394890335882914, - "learning_rate": 4.642420193959049e-06, - "loss": 0.1457, + "epoch": 1.393109291160689, + "grad_norm": 3.447249623318818, + "learning_rate": 1.1632965509488932e-05, + "loss": 0.5526, "step": 9866 }, { - "epoch": 2.068987209058503, - "grad_norm": 3.1566105636787505, - "learning_rate": 4.640508771426616e-06, - "loss": 0.1316, + "epoch": 1.393250494210675, + "grad_norm": 3.525782843010651, + "learning_rate": 1.1631461500249199e-05, + "loss": 0.6295, "step": 9867 }, { - "epoch": 2.0691968966240304, - "grad_norm": 4.13807875272563, - "learning_rate": 4.6385976235774074e-06, - "loss": 0.1587, + "epoch": 1.3933916972606608, + "grad_norm": 3.345715714935897, + "learning_rate": 1.1629957453095013e-05, + "loss": 0.5684, "step": 9868 }, { - "epoch": 2.0694065841895575, - "grad_norm": 5.010358970112285, - "learning_rate": 4.636686750509366e-06, - "loss": 0.1607, + "epoch": 1.3935329003106467, + "grad_norm": 3.5043972621294537, + "learning_rate": 1.1628453368061315e-05, + "loss": 0.5934, "step": 9869 }, { - "epoch": 2.069616271755085, - "grad_norm": 3.6861103172486995, - "learning_rate": 4.634776152320435e-06, - "loss": 0.1493, + "epoch": 1.3936741033606326, + "grad_norm": 3.5506528786630382, + "learning_rate": 1.1626949245183061e-05, + "loss": 0.6043, "step": 9870 }, { - "epoch": 2.069825959320612, - "grad_norm": 3.3430396165443814, - "learning_rate": 4.632865829108532e-06, - "loss": 0.1546, + "epoch": 1.3938153064106185, + "grad_norm": 2.8280871187474528, + "learning_rate": 1.1625445084495213e-05, + "loss": 0.5489, "step": 9871 }, { - "epoch": 2.0700356468861396, - "grad_norm": 3.4497702429447634, - "learning_rate": 4.630955780971559e-06, - "loss": 0.1405, + "epoch": 1.3939565094606043, + "grad_norm": 3.251375902078435, + "learning_rate": 1.1623940886032723e-05, + "loss": 0.5236, "step": 9872 }, { - "epoch": 2.070245334451667, - "grad_norm": 3.979309856941839, - "learning_rate": 4.629046008007418e-06, - "loss": 0.1586, + "epoch": 1.3940977125105902, + "grad_norm": 3.015023220784678, + "learning_rate": 1.1622436649830546e-05, + "loss": 0.5119, "step": 9873 }, { - "epoch": 2.0704550220171942, - "grad_norm": 4.326740114553557, - "learning_rate": 4.627136510313986e-06, - "loss": 0.1258, + "epoch": 1.394238915560576, + "grad_norm": 3.36756802486958, + "learning_rate": 1.1620932375923644e-05, + "loss": 0.505, "step": 9874 }, { - "epoch": 2.0706647095827218, - "grad_norm": 3.4459829598510385, - "learning_rate": 4.6252272879891225e-06, - "loss": 0.1297, + "epoch": 1.394380118610562, + "grad_norm": 3.5814695906214515, + "learning_rate": 1.1619428064346973e-05, + "loss": 0.6922, "step": 9875 }, { - "epoch": 2.0708743971482493, - "grad_norm": 3.717413220616672, - "learning_rate": 4.623318341130682e-06, - "loss": 0.142, + "epoch": 1.3945213216605479, + "grad_norm": 3.743606131725301, + "learning_rate": 1.1617923715135493e-05, + "loss": 0.6426, "step": 9876 }, { - "epoch": 2.0710840847137764, - "grad_norm": 4.312745311703431, - "learning_rate": 4.621409669836505e-06, - "loss": 0.1741, + "epoch": 1.3946625247105338, + "grad_norm": 3.433415906677097, + "learning_rate": 1.1616419328324166e-05, + "loss": 0.5339, "step": 9877 }, { - "epoch": 2.071293772279304, - "grad_norm": 4.855753641294632, - "learning_rate": 4.619501274204412e-06, - "loss": 0.1343, + "epoch": 1.3948037277605196, + "grad_norm": 3.2092886784737016, + "learning_rate": 1.1614914903947952e-05, + "loss": 0.5567, "step": 9878 }, { - "epoch": 2.0715034598448314, - "grad_norm": 5.193885223637972, - "learning_rate": 4.617593154332207e-06, - "loss": 0.1436, + "epoch": 1.3949449308105055, + "grad_norm": 4.074211293431233, + "learning_rate": 1.1613410442041808e-05, + "loss": 0.6666, "step": 9879 }, { - "epoch": 2.0717131474103585, - "grad_norm": 4.0394066322979585, - "learning_rate": 4.615685310317688e-06, - "loss": 0.1435, + "epoch": 1.3950861338604914, + "grad_norm": 4.291510472790171, + "learning_rate": 1.1611905942640707e-05, + "loss": 0.7636, "step": 9880 }, { - "epoch": 2.071922834975886, - "grad_norm": 3.8073050398408, - "learning_rate": 4.6137777422586395e-06, - "loss": 0.1384, + "epoch": 1.3952273369104773, + "grad_norm": 4.035418183227724, + "learning_rate": 1.1610401405779608e-05, + "loss": 0.6795, "step": 9881 }, { - "epoch": 2.072132522541413, - "grad_norm": 5.276837357466768, - "learning_rate": 4.611870450252823e-06, - "loss": 0.1864, + "epoch": 1.3953685399604632, + "grad_norm": 4.96995317458963, + "learning_rate": 1.1608896831493475e-05, + "loss": 0.8271, "step": 9882 }, { - "epoch": 2.0723422101069406, - "grad_norm": 4.653807942958038, - "learning_rate": 4.609963434397989e-06, - "loss": 0.1766, + "epoch": 1.395509743010449, + "grad_norm": 2.627076894427599, + "learning_rate": 1.1607392219817272e-05, + "loss": 0.4173, "step": 9883 }, { - "epoch": 2.072551897672468, - "grad_norm": 4.718646870600865, - "learning_rate": 4.608056694791879e-06, - "loss": 0.181, + "epoch": 1.395650946060435, + "grad_norm": 3.7438005211037546, + "learning_rate": 1.1605887570785972e-05, + "loss": 0.5769, "step": 9884 }, { - "epoch": 2.0727615852379953, - "grad_norm": 4.627845992212738, - "learning_rate": 4.6061502315322124e-06, - "loss": 0.1795, + "epoch": 1.3957921491104208, + "grad_norm": 3.5761898545115387, + "learning_rate": 1.1604382884434537e-05, + "loss": 0.5281, "step": 9885 }, { - "epoch": 2.072971272803523, - "grad_norm": 4.516202143251554, - "learning_rate": 4.604244044716706e-06, - "loss": 0.1688, + "epoch": 1.3959333521604067, + "grad_norm": 3.6553671431407366, + "learning_rate": 1.1602878160797936e-05, + "loss": 0.5084, "step": 9886 }, { - "epoch": 2.0731809603690503, - "grad_norm": 3.2774125610284326, - "learning_rate": 4.60233813444305e-06, - "loss": 0.1009, + "epoch": 1.3960745552103926, + "grad_norm": 4.344755588560155, + "learning_rate": 1.1601373399911137e-05, + "loss": 0.7339, "step": 9887 }, { - "epoch": 2.0733906479345774, - "grad_norm": 3.228192073765901, - "learning_rate": 4.600432500808924e-06, - "loss": 0.1448, + "epoch": 1.3962157582603785, + "grad_norm": 3.5602518300063393, + "learning_rate": 1.1599868601809114e-05, + "loss": 0.5599, "step": 9888 }, { - "epoch": 2.073600335500105, - "grad_norm": 3.2864884333600557, - "learning_rate": 4.598527143911995e-06, - "loss": 0.1285, + "epoch": 1.3963569613103644, + "grad_norm": 3.430855151090691, + "learning_rate": 1.1598363766526834e-05, + "loss": 0.6009, "step": 9889 }, { - "epoch": 2.073810023065632, - "grad_norm": 4.850350734629302, - "learning_rate": 4.596622063849927e-06, - "loss": 0.1386, + "epoch": 1.3964981643603502, + "grad_norm": 3.635641645994789, + "learning_rate": 1.1596858894099272e-05, + "loss": 0.5932, "step": 9890 }, { - "epoch": 2.0740197106311595, - "grad_norm": 6.47227884411318, - "learning_rate": 4.594717260720343e-06, - "loss": 0.2033, + "epoch": 1.3966393674103361, + "grad_norm": 3.5617751593296947, + "learning_rate": 1.15953539845614e-05, + "loss": 0.6612, "step": 9891 }, { - "epoch": 2.074229398196687, - "grad_norm": 4.810446587143289, - "learning_rate": 4.592812734620874e-06, - "loss": 0.1771, + "epoch": 1.396780570460322, + "grad_norm": 3.1035933100114614, + "learning_rate": 1.1593849037948189e-05, + "loss": 0.5431, "step": 9892 }, { - "epoch": 2.074439085762214, - "grad_norm": 3.0483492659857463, - "learning_rate": 4.5909084856491336e-06, - "loss": 0.1004, + "epoch": 1.3969217735103079, + "grad_norm": 3.245455322386706, + "learning_rate": 1.1592344054294613e-05, + "loss": 0.5808, "step": 9893 }, { - "epoch": 2.0746487733277417, - "grad_norm": 4.033444521840543, - "learning_rate": 4.589004513902716e-06, - "loss": 0.1394, + "epoch": 1.3970629765602938, + "grad_norm": 3.319015716889081, + "learning_rate": 1.1590839033635652e-05, + "loss": 0.5563, "step": 9894 }, { - "epoch": 2.074858460893269, - "grad_norm": 4.196624074670315, - "learning_rate": 4.587100819479198e-06, - "loss": 0.1446, + "epoch": 1.3972041796102797, + "grad_norm": 2.985883387348687, + "learning_rate": 1.1589333976006278e-05, + "loss": 0.4987, "step": 9895 }, { - "epoch": 2.0750681484587963, - "grad_norm": 3.7960259992944523, - "learning_rate": 4.585197402476155e-06, - "loss": 0.1301, + "epoch": 1.3973453826602655, + "grad_norm": 4.44666187870931, + "learning_rate": 1.1587828881441468e-05, + "loss": 0.7496, "step": 9896 }, { - "epoch": 2.075277836024324, - "grad_norm": 3.243663392528349, - "learning_rate": 4.583294262991132e-06, - "loss": 0.1278, + "epoch": 1.3974865857102514, + "grad_norm": 4.6670398033800735, + "learning_rate": 1.1586323749976201e-05, + "loss": 0.655, "step": 9897 }, { - "epoch": 2.0754875235898513, - "grad_norm": 4.534585056756628, - "learning_rate": 4.5813914011216775e-06, - "loss": 0.1475, + "epoch": 1.3976277887602373, + "grad_norm": 2.7928571337114008, + "learning_rate": 1.1584818581645453e-05, + "loss": 0.4452, "step": 9898 }, { - "epoch": 2.0756972111553784, - "grad_norm": 4.629777471828104, - "learning_rate": 4.579488816965312e-06, - "loss": 0.1521, + "epoch": 1.3977689918102232, + "grad_norm": 5.164255213268622, + "learning_rate": 1.1583313376484209e-05, + "loss": 0.8635, "step": 9899 }, { - "epoch": 2.075906898720906, - "grad_norm": 4.478844623276444, - "learning_rate": 4.577586510619541e-06, - "loss": 0.1685, + "epoch": 1.3979101948602088, + "grad_norm": 4.089959308270347, + "learning_rate": 1.1581808134527443e-05, + "loss": 0.531, "step": 9900 }, { - "epoch": 2.076116586286433, - "grad_norm": 5.330111625981263, - "learning_rate": 4.575684482181867e-06, - "loss": 0.1854, + "epoch": 1.3980513979101947, + "grad_norm": 3.5520014738183843, + "learning_rate": 1.1580302855810142e-05, + "loss": 0.5803, "step": 9901 }, { - "epoch": 2.0763262738519606, - "grad_norm": 3.4182647480861466, - "learning_rate": 4.573782731749774e-06, - "loss": 0.1285, + "epoch": 1.3981926009601806, + "grad_norm": 2.6428292260988737, + "learning_rate": 1.1578797540367284e-05, + "loss": 0.4608, "step": 9902 }, { - "epoch": 2.076535961417488, - "grad_norm": 6.0336691125956605, - "learning_rate": 4.571881259420728e-06, - "loss": 0.2079, + "epoch": 1.3983338040101665, + "grad_norm": 3.681028921829734, + "learning_rate": 1.1577292188233853e-05, + "loss": 0.6104, "step": 9903 }, { - "epoch": 2.076745648983015, - "grad_norm": 4.139161254488462, - "learning_rate": 4.569980065292179e-06, - "loss": 0.1285, + "epoch": 1.3984750070601524, + "grad_norm": 2.7962695063828074, + "learning_rate": 1.157578679944483e-05, + "loss": 0.4772, "step": 9904 }, { - "epoch": 2.0769553365485427, - "grad_norm": 4.946687594002389, - "learning_rate": 4.568079149461573e-06, - "loss": 0.155, + "epoch": 1.3986162101101383, + "grad_norm": 4.163563947196601, + "learning_rate": 1.1574281374035206e-05, + "loss": 0.6053, "step": 9905 }, { - "epoch": 2.0771650241140702, - "grad_norm": 3.3972881555687384, - "learning_rate": 4.566178512026328e-06, - "loss": 0.1448, + "epoch": 1.3987574131601241, + "grad_norm": 2.986014200504435, + "learning_rate": 1.157277591203996e-05, + "loss": 0.5422, "step": 9906 }, { - "epoch": 2.0773747116795973, - "grad_norm": 4.418225157946723, - "learning_rate": 4.564278153083862e-06, - "loss": 0.163, + "epoch": 1.39889861621011, + "grad_norm": 2.9481756687840637, + "learning_rate": 1.1571270413494082e-05, + "loss": 0.4456, "step": 9907 }, { - "epoch": 2.077584399245125, - "grad_norm": 3.39876899006876, - "learning_rate": 4.562378072731568e-06, - "loss": 0.1162, + "epoch": 1.399039819260096, + "grad_norm": 4.063261320869723, + "learning_rate": 1.1569764878432559e-05, + "loss": 0.6013, "step": 9908 }, { - "epoch": 2.077794086810652, - "grad_norm": 4.768248416768718, - "learning_rate": 4.560478271066826e-06, - "loss": 0.141, + "epoch": 1.3991810223100818, + "grad_norm": 3.3496478110684778, + "learning_rate": 1.156825930689038e-05, + "loss": 0.514, "step": 9909 }, { - "epoch": 2.0780037743761794, - "grad_norm": 4.672334172216667, - "learning_rate": 4.558578748187007e-06, - "loss": 0.1995, + "epoch": 1.3993222253600677, + "grad_norm": 3.091898970426328, + "learning_rate": 1.1566753698902527e-05, + "loss": 0.4635, "step": 9910 }, { - "epoch": 2.078213461941707, - "grad_norm": 4.788753924688709, - "learning_rate": 4.556679504189472e-06, - "loss": 0.1901, + "epoch": 1.3994634284100536, + "grad_norm": 3.695572337695737, + "learning_rate": 1.1565248054503999e-05, + "loss": 0.6152, "step": 9911 }, { - "epoch": 2.078423149507234, - "grad_norm": 3.891542290172759, - "learning_rate": 4.554780539171546e-06, - "loss": 0.1998, + "epoch": 1.3996046314600394, + "grad_norm": 3.678411544056201, + "learning_rate": 1.156374237372978e-05, + "loss": 0.6309, "step": 9912 }, { - "epoch": 2.0786328370727616, - "grad_norm": 4.623149770055773, - "learning_rate": 4.552881853230562e-06, - "loss": 0.1946, + "epoch": 1.3997458345100253, + "grad_norm": 4.155175543974391, + "learning_rate": 1.1562236656614863e-05, + "loss": 0.5537, "step": 9913 }, { - "epoch": 2.078842524638289, - "grad_norm": 3.870663598053487, - "learning_rate": 4.550983446463834e-06, - "loss": 0.1294, + "epoch": 1.3998870375600112, + "grad_norm": 4.523804379150606, + "learning_rate": 1.1560730903194242e-05, + "loss": 0.7067, "step": 9914 }, { - "epoch": 2.079052212203816, - "grad_norm": 3.6186756055560627, - "learning_rate": 4.549085318968655e-06, - "loss": 0.154, + "epoch": 1.400028240609997, + "grad_norm": 3.4751366444823035, + "learning_rate": 1.155922511350291e-05, + "loss": 0.5602, "step": 9915 }, { - "epoch": 2.0792618997693437, - "grad_norm": 3.135469715721393, - "learning_rate": 4.547187470842304e-06, - "loss": 0.1257, + "epoch": 1.400169443659983, + "grad_norm": 3.1838686907039135, + "learning_rate": 1.1557719287575858e-05, + "loss": 0.4078, "step": 9916 }, { - "epoch": 2.0794715873348713, - "grad_norm": 2.862892687302075, - "learning_rate": 4.545289902182055e-06, - "loss": 0.0979, + "epoch": 1.4003106467099689, + "grad_norm": 3.9072861243559607, + "learning_rate": 1.1556213425448082e-05, + "loss": 0.4787, "step": 9917 }, { - "epoch": 2.0796812749003983, - "grad_norm": 4.638551248688577, - "learning_rate": 4.543392613085157e-06, - "loss": 0.1669, + "epoch": 1.4004518497599547, + "grad_norm": 3.6372154595961694, + "learning_rate": 1.155470752715458e-05, + "loss": 0.5769, "step": 9918 }, { - "epoch": 2.079890962465926, - "grad_norm": 2.9819532555855925, - "learning_rate": 4.541495603648854e-06, - "loss": 0.112, + "epoch": 1.4005930528099406, + "grad_norm": 4.080503965457572, + "learning_rate": 1.1553201592730345e-05, + "loss": 0.7389, "step": 9919 }, { - "epoch": 2.080100650031453, - "grad_norm": 4.422434782975081, - "learning_rate": 4.5395988739703704e-06, - "loss": 0.1794, + "epoch": 1.4007342558599265, + "grad_norm": 3.3875410333751046, + "learning_rate": 1.1551695622210377e-05, + "loss": 0.4923, "step": 9920 }, { - "epoch": 2.0803103375969805, - "grad_norm": 4.232637175638705, - "learning_rate": 4.53770242414691e-06, - "loss": 0.1386, + "epoch": 1.4008754589099124, + "grad_norm": 4.5441608103377185, + "learning_rate": 1.1550189615629672e-05, + "loss": 0.5013, "step": 9921 }, { - "epoch": 2.080520025162508, - "grad_norm": 5.084814883120491, - "learning_rate": 4.535806254275675e-06, - "loss": 0.2158, + "epoch": 1.4010166619598983, + "grad_norm": 3.7740130223879893, + "learning_rate": 1.1548683573023229e-05, + "loss": 0.5647, "step": 9922 }, { - "epoch": 2.080729712728035, - "grad_norm": 3.775504506821414, - "learning_rate": 4.533910364453851e-06, - "loss": 0.1392, + "epoch": 1.4011578650098842, + "grad_norm": 3.2370838240847157, + "learning_rate": 1.154717749442605e-05, + "loss": 0.5198, "step": 9923 }, { - "epoch": 2.0809394002935626, - "grad_norm": 4.084945256902815, - "learning_rate": 4.532014754778599e-06, - "loss": 0.1307, + "epoch": 1.40129906805987, + "grad_norm": 3.5835146748517595, + "learning_rate": 1.1545671379873134e-05, + "loss": 0.5716, "step": 9924 }, { - "epoch": 2.08114908785909, - "grad_norm": 4.605619107162844, - "learning_rate": 4.530119425347072e-06, - "loss": 0.1627, + "epoch": 1.401440271109856, + "grad_norm": 3.39535029704569, + "learning_rate": 1.1544165229399481e-05, + "loss": 0.5243, "step": 9925 }, { - "epoch": 2.0813587754246172, - "grad_norm": 3.751223623359653, - "learning_rate": 4.528224376256416e-06, - "loss": 0.1507, + "epoch": 1.4015814741598418, + "grad_norm": 4.358751874097533, + "learning_rate": 1.1542659043040097e-05, + "loss": 0.6843, "step": 9926 }, { - "epoch": 2.0815684629901448, - "grad_norm": 4.258587929674082, - "learning_rate": 4.526329607603748e-06, - "loss": 0.1673, + "epoch": 1.4017226772098277, + "grad_norm": 3.2639322635725927, + "learning_rate": 1.1541152820829984e-05, + "loss": 0.5403, "step": 9927 }, { - "epoch": 2.081778150555672, - "grad_norm": 3.6932002262239427, - "learning_rate": 4.524435119486183e-06, - "loss": 0.1369, + "epoch": 1.4018638802598136, + "grad_norm": 3.4067849244233837, + "learning_rate": 1.1539646562804143e-05, + "loss": 0.509, "step": 9928 }, { - "epoch": 2.0819878381211994, - "grad_norm": 3.218559333127295, - "learning_rate": 4.522540912000815e-06, - "loss": 0.1166, + "epoch": 1.4020050833097994, + "grad_norm": 3.0480837846178517, + "learning_rate": 1.1538140268997583e-05, + "loss": 0.4499, "step": 9929 }, { - "epoch": 2.082197525686727, - "grad_norm": 3.838510271577625, - "learning_rate": 4.520646985244723e-06, - "loss": 0.1485, + "epoch": 1.4021462863597853, + "grad_norm": 3.5205926500311415, + "learning_rate": 1.1536633939445302e-05, + "loss": 0.5647, "step": 9930 }, { - "epoch": 2.082407213252254, - "grad_norm": 4.3088347799024165, - "learning_rate": 4.518753339314976e-06, - "loss": 0.1358, + "epoch": 1.4022874894097712, + "grad_norm": 3.885079150439811, + "learning_rate": 1.1535127574182315e-05, + "loss": 0.6031, "step": 9931 }, { - "epoch": 2.0826169008177815, - "grad_norm": 4.658867148489126, - "learning_rate": 4.516859974308634e-06, - "loss": 0.1494, + "epoch": 1.402428692459757, + "grad_norm": 4.968488923984317, + "learning_rate": 1.153362117324363e-05, + "loss": 0.8495, "step": 9932 }, { - "epoch": 2.082826588383309, - "grad_norm": 4.68076465028237, - "learning_rate": 4.514966890322722e-06, - "loss": 0.1411, + "epoch": 1.402569895509743, + "grad_norm": 3.4678362147507134, + "learning_rate": 1.1532114736664247e-05, + "loss": 0.5386, "step": 9933 }, { - "epoch": 2.083036275948836, - "grad_norm": 4.824966080112759, - "learning_rate": 4.513074087454269e-06, - "loss": 0.1891, + "epoch": 1.4027110985597289, + "grad_norm": 3.920330613032662, + "learning_rate": 1.153060826447918e-05, + "loss": 0.6705, "step": 9934 }, { - "epoch": 2.0832459635143636, - "grad_norm": 4.055925530419778, - "learning_rate": 4.51118156580029e-06, - "loss": 0.1674, + "epoch": 1.4028523016097147, + "grad_norm": 4.407297759808663, + "learning_rate": 1.1529101756723437e-05, + "loss": 0.7682, "step": 9935 }, { - "epoch": 2.083455651079891, - "grad_norm": 5.672455172940869, - "learning_rate": 4.509289325457775e-06, - "loss": 0.169, + "epoch": 1.4029935046597006, + "grad_norm": 2.9995003349214144, + "learning_rate": 1.152759521343203e-05, + "loss": 0.4892, "step": 9936 }, { - "epoch": 2.0836653386454183, - "grad_norm": 4.639172933621322, - "learning_rate": 4.507397366523703e-06, - "loss": 0.163, + "epoch": 1.4031347077096865, + "grad_norm": 3.5563325660103, + "learning_rate": 1.1526088634639971e-05, + "loss": 0.6243, "step": 9937 }, { - "epoch": 2.083875026210946, - "grad_norm": 3.776853790233513, - "learning_rate": 4.5055056890950414e-06, - "loss": 0.1124, + "epoch": 1.4032759107596724, + "grad_norm": 3.3098449471199336, + "learning_rate": 1.1524582020382271e-05, + "loss": 0.563, "step": 9938 }, { - "epoch": 2.084084713776473, - "grad_norm": 4.771092267580428, - "learning_rate": 4.503614293268745e-06, - "loss": 0.1483, + "epoch": 1.4034171138096583, + "grad_norm": 3.6315818488780485, + "learning_rate": 1.1523075370693942e-05, + "loss": 0.6929, "step": 9939 }, { - "epoch": 2.0842944013420004, - "grad_norm": 3.218889847542544, - "learning_rate": 4.501723179141752e-06, - "loss": 0.1249, + "epoch": 1.4035583168596442, + "grad_norm": 3.4625189347739247, + "learning_rate": 1.1521568685610003e-05, + "loss": 0.5728, "step": 9940 }, { - "epoch": 2.084504088907528, - "grad_norm": 5.0095150094804035, - "learning_rate": 4.499832346810976e-06, - "loss": 0.2037, + "epoch": 1.40369951990963, + "grad_norm": 2.9052230762252265, + "learning_rate": 1.152006196516546e-05, + "loss": 0.5621, "step": 9941 }, { - "epoch": 2.084713776473055, - "grad_norm": 4.525541148791993, - "learning_rate": 4.4979417963733376e-06, - "loss": 0.1759, + "epoch": 1.403840722959616, + "grad_norm": 3.672861170918665, + "learning_rate": 1.1518555209395334e-05, + "loss": 0.8082, "step": 9942 }, { - "epoch": 2.0849234640385825, - "grad_norm": 4.718125371059388, - "learning_rate": 4.49605152792572e-06, - "loss": 0.1529, + "epoch": 1.4039819260096018, + "grad_norm": 3.422702997674202, + "learning_rate": 1.1517048418334644e-05, + "loss": 0.5315, "step": 9943 }, { - "epoch": 2.08513315160411, - "grad_norm": 4.975112082928179, - "learning_rate": 4.494161541565012e-06, - "loss": 0.1735, + "epoch": 1.4041231290595877, + "grad_norm": 3.424173657753212, + "learning_rate": 1.1515541592018402e-05, + "loss": 0.6116, "step": 9944 }, { - "epoch": 2.085342839169637, - "grad_norm": 3.6215337723336014, - "learning_rate": 4.4922718373880745e-06, - "loss": 0.1261, + "epoch": 1.4042643321095736, + "grad_norm": 3.348063860069627, + "learning_rate": 1.1514034730481627e-05, + "loss": 0.5376, "step": 9945 }, { - "epoch": 2.0855525267351647, - "grad_norm": 3.7784406929232937, - "learning_rate": 4.490382415491754e-06, - "loss": 0.1398, + "epoch": 1.4044055351595595, + "grad_norm": 3.242136339919406, + "learning_rate": 1.1512527833759339e-05, + "loss": 0.6332, "step": 9946 }, { - "epoch": 2.0857622143006918, - "grad_norm": 3.2708266560007107, - "learning_rate": 4.488493275972891e-06, - "loss": 0.1237, + "epoch": 1.4045467382095453, + "grad_norm": 2.8300872173050644, + "learning_rate": 1.1511020901886559e-05, + "loss": 0.4877, "step": 9947 }, { - "epoch": 2.0859719018662193, - "grad_norm": 3.6794083731370413, - "learning_rate": 4.48660441892831e-06, - "loss": 0.1611, + "epoch": 1.4046879412595312, + "grad_norm": 2.841581205226284, + "learning_rate": 1.1509513934898303e-05, + "loss": 0.5086, "step": 9948 }, { - "epoch": 2.086181589431747, - "grad_norm": 4.29806996072104, - "learning_rate": 4.484715844454816e-06, - "loss": 0.1537, + "epoch": 1.404829144309517, + "grad_norm": 3.8673580597270614, + "learning_rate": 1.1508006932829601e-05, + "loss": 0.5855, "step": 9949 }, { - "epoch": 2.086391276997274, - "grad_norm": 3.7580847087667655, - "learning_rate": 4.482827552649196e-06, - "loss": 0.1493, + "epoch": 1.404970347359503, + "grad_norm": 3.814007735173787, + "learning_rate": 1.1506499895715462e-05, + "loss": 0.5789, "step": 9950 }, { - "epoch": 2.0866009645628014, - "grad_norm": 4.61325294169398, - "learning_rate": 4.480939543608237e-06, - "loss": 0.1512, + "epoch": 1.4051115504094889, + "grad_norm": 3.223935578785833, + "learning_rate": 1.150499282359092e-05, + "loss": 0.4186, "step": 9951 }, { - "epoch": 2.086810652128329, - "grad_norm": 3.4669147318968245, - "learning_rate": 4.479051817428698e-06, - "loss": 0.1152, + "epoch": 1.4052527534594748, + "grad_norm": 4.250623921984275, + "learning_rate": 1.1503485716490994e-05, + "loss": 0.7245, "step": 9952 }, { - "epoch": 2.087020339693856, - "grad_norm": 3.747501903564905, - "learning_rate": 4.477164374207326e-06, - "loss": 0.1498, + "epoch": 1.4053939565094606, + "grad_norm": 3.461544245584893, + "learning_rate": 1.150197857445071e-05, + "loss": 0.5743, "step": 9953 }, { - "epoch": 2.0872300272593836, - "grad_norm": 3.8311283290221527, - "learning_rate": 4.475277214040863e-06, - "loss": 0.1421, + "epoch": 1.4055351595594465, + "grad_norm": 3.549416857556625, + "learning_rate": 1.1500471397505091e-05, + "loss": 0.5783, "step": 9954 }, { - "epoch": 2.087439714824911, - "grad_norm": 4.038068032023206, - "learning_rate": 4.47339033702602e-06, - "loss": 0.1276, + "epoch": 1.4056763626094324, + "grad_norm": 3.172461769115385, + "learning_rate": 1.1498964185689166e-05, + "loss": 0.4725, "step": 9955 }, { - "epoch": 2.087649402390438, - "grad_norm": 4.719244594934842, - "learning_rate": 4.471503743259512e-06, - "loss": 0.1604, + "epoch": 1.4058175656594183, + "grad_norm": 3.5471306267087424, + "learning_rate": 1.1497456939037957e-05, + "loss": 0.5747, "step": 9956 }, { - "epoch": 2.0878590899559657, - "grad_norm": 4.67082924505463, - "learning_rate": 4.469617432838026e-06, - "loss": 0.1336, + "epoch": 1.4059587687094042, + "grad_norm": 4.396020191509421, + "learning_rate": 1.14959496575865e-05, + "loss": 0.7524, "step": 9957 }, { - "epoch": 2.088068777521493, - "grad_norm": 3.834657396475897, - "learning_rate": 4.467731405858234e-06, - "loss": 0.1451, + "epoch": 1.40609997175939, + "grad_norm": 4.016762394884701, + "learning_rate": 1.1494442341369819e-05, + "loss": 0.4982, "step": 9958 }, { - "epoch": 2.0882784650870203, - "grad_norm": 3.870934619890367, - "learning_rate": 4.465845662416803e-06, - "loss": 0.1136, + "epoch": 1.406241174809376, + "grad_norm": 3.196397403382184, + "learning_rate": 1.149293499042294e-05, + "loss": 0.4967, "step": 9959 }, { - "epoch": 2.088488152652548, - "grad_norm": 4.569532634046341, - "learning_rate": 4.463960202610386e-06, - "loss": 0.1494, + "epoch": 1.4063823778593618, + "grad_norm": 3.3524463210519264, + "learning_rate": 1.1491427604780898e-05, + "loss": 0.471, "step": 9960 }, { - "epoch": 2.088697840218075, - "grad_norm": 4.156412597170068, - "learning_rate": 4.4620750265356084e-06, - "loss": 0.1248, + "epoch": 1.4065235809093477, + "grad_norm": 3.351595289088621, + "learning_rate": 1.1489920184478724e-05, + "loss": 0.6062, "step": 9961 }, { - "epoch": 2.0889075277836024, - "grad_norm": 4.2684462532874905, - "learning_rate": 4.4601901342890865e-06, - "loss": 0.1333, + "epoch": 1.4066647839593336, + "grad_norm": 3.006542705937755, + "learning_rate": 1.1488412729551449e-05, + "loss": 0.5358, "step": 9962 }, { - "epoch": 2.08911721534913, - "grad_norm": 4.085800677013555, - "learning_rate": 4.458305525967433e-06, - "loss": 0.138, + "epoch": 1.4068059870093195, + "grad_norm": 3.6891630825740758, + "learning_rate": 1.1486905240034103e-05, + "loss": 0.5835, "step": 9963 }, { - "epoch": 2.089326902914657, - "grad_norm": 3.805517237321966, - "learning_rate": 4.456421201667229e-06, - "loss": 0.1663, + "epoch": 1.4069471900593054, + "grad_norm": 3.6646470565168814, + "learning_rate": 1.1485397715961719e-05, + "loss": 0.5274, "step": 9964 }, { - "epoch": 2.0895365904801846, - "grad_norm": 4.455267385299155, - "learning_rate": 4.4545371614850575e-06, - "loss": 0.14, + "epoch": 1.4070883931092912, + "grad_norm": 3.2873355406246887, + "learning_rate": 1.1483890157369338e-05, + "loss": 0.5053, "step": 9965 }, { - "epoch": 2.0897462780457117, - "grad_norm": 3.6559890372218855, - "learning_rate": 4.4526534055174745e-06, - "loss": 0.1236, + "epoch": 1.4072295961592771, + "grad_norm": 3.347787165528886, + "learning_rate": 1.148238256429199e-05, + "loss": 0.5185, "step": 9966 }, { - "epoch": 2.089955965611239, - "grad_norm": 4.467697483521923, - "learning_rate": 4.4507699338610224e-06, - "loss": 0.1625, + "epoch": 1.407370799209263, + "grad_norm": 4.086498348153865, + "learning_rate": 1.1480874936764708e-05, + "loss": 0.6709, "step": 9967 }, { - "epoch": 2.0901656531767667, - "grad_norm": 5.261975311164107, - "learning_rate": 4.448886746612235e-06, - "loss": 0.1985, + "epoch": 1.4075120022592489, + "grad_norm": 3.286952228251929, + "learning_rate": 1.1479367274822535e-05, + "loss": 0.454, "step": 9968 }, { - "epoch": 2.090375340742294, - "grad_norm": 3.928433702441015, - "learning_rate": 4.4470038438676375e-06, - "loss": 0.1284, + "epoch": 1.4076532053092348, + "grad_norm": 4.231713885757386, + "learning_rate": 1.1477859578500505e-05, + "loss": 0.6515, "step": 9969 }, { - "epoch": 2.0905850283078213, - "grad_norm": 3.6287231985567, - "learning_rate": 4.445121225723716e-06, - "loss": 0.1163, + "epoch": 1.4077944083592207, + "grad_norm": 3.515755331629892, + "learning_rate": 1.1476351847833656e-05, + "loss": 0.5199, "step": 9970 }, { - "epoch": 2.090794715873349, - "grad_norm": 3.6665430717139262, - "learning_rate": 4.443238892276967e-06, - "loss": 0.1323, + "epoch": 1.4079356114092065, + "grad_norm": 3.3727204050160067, + "learning_rate": 1.1474844082857028e-05, + "loss": 0.5076, "step": 9971 }, { - "epoch": 2.091004403438876, - "grad_norm": 5.564435999506864, - "learning_rate": 4.4413568436238675e-06, - "loss": 0.184, + "epoch": 1.4080768144591924, + "grad_norm": 3.83093851023455, + "learning_rate": 1.1473336283605661e-05, + "loss": 0.5691, "step": 9972 }, { - "epoch": 2.0912140910044035, - "grad_norm": 5.66839727961225, - "learning_rate": 4.439475079860869e-06, - "loss": 0.2088, + "epoch": 1.4082180175091783, + "grad_norm": 3.5622267102619434, + "learning_rate": 1.1471828450114593e-05, + "loss": 0.5471, "step": 9973 }, { - "epoch": 2.091423778569931, - "grad_norm": 3.4699683617138626, - "learning_rate": 4.437593601084413e-06, - "loss": 0.1254, + "epoch": 1.4083592205591642, + "grad_norm": 4.346334946167332, + "learning_rate": 1.1470320582418873e-05, + "loss": 0.741, "step": 9974 }, { - "epoch": 2.091633466135458, - "grad_norm": 4.247013788116846, - "learning_rate": 4.435712407390937e-06, - "loss": 0.1628, + "epoch": 1.40850042360915, + "grad_norm": 5.3133415210162305, + "learning_rate": 1.1468812680553531e-05, + "loss": 0.8298, "step": 9975 }, { - "epoch": 2.0918431537009856, - "grad_norm": 4.587708598293912, - "learning_rate": 4.433831498876847e-06, - "loss": 0.1708, + "epoch": 1.408641626659136, + "grad_norm": 4.1181824749170115, + "learning_rate": 1.1467304744553618e-05, + "loss": 0.7347, "step": 9976 }, { - "epoch": 2.0920528412665127, - "grad_norm": 3.8954986759594097, - "learning_rate": 4.431950875638551e-06, - "loss": 0.1274, + "epoch": 1.4087828297091218, + "grad_norm": 3.5970778980601463, + "learning_rate": 1.1465796774454179e-05, + "loss": 0.5772, "step": 9977 }, { - "epoch": 2.0922625288320402, - "grad_norm": 5.066123601761499, - "learning_rate": 4.430070537772428e-06, - "loss": 0.1762, + "epoch": 1.4089240327591077, + "grad_norm": 4.183882615901438, + "learning_rate": 1.1464288770290255e-05, + "loss": 0.8161, "step": 9978 }, { - "epoch": 2.0924722163975678, - "grad_norm": 4.534376848446051, - "learning_rate": 4.428190485374849e-06, - "loss": 0.1489, + "epoch": 1.4090652358090936, + "grad_norm": 3.615891647298768, + "learning_rate": 1.1462780732096892e-05, + "loss": 0.6358, "step": 9979 }, { - "epoch": 2.092681903963095, - "grad_norm": 3.806205809818997, - "learning_rate": 4.42631071854217e-06, - "loss": 0.1564, + "epoch": 1.4092064388590795, + "grad_norm": 3.8267839590585075, + "learning_rate": 1.1461272659909137e-05, + "loss": 0.6225, "step": 9980 }, { - "epoch": 2.0928915915286224, - "grad_norm": 4.3962552628625975, - "learning_rate": 4.424431237370738e-06, - "loss": 0.1671, + "epoch": 1.4093476419090654, + "grad_norm": 4.167799870539445, + "learning_rate": 1.1459764553762036e-05, + "loss": 0.6346, "step": 9981 }, { - "epoch": 2.09310127909415, - "grad_norm": 4.254370730699198, - "learning_rate": 4.422552041956876e-06, - "loss": 0.1473, + "epoch": 1.4094888449590512, + "grad_norm": 4.490430220674449, + "learning_rate": 1.1458256413690634e-05, + "loss": 0.8326, "step": 9982 }, { - "epoch": 2.093310966659677, - "grad_norm": 4.013037289042238, - "learning_rate": 4.420673132396891e-06, - "loss": 0.1097, + "epoch": 1.4096300480090371, + "grad_norm": 3.511666082860179, + "learning_rate": 1.1456748239729988e-05, + "loss": 0.6215, "step": 9983 }, { - "epoch": 2.0935206542252045, - "grad_norm": 4.72473926245404, - "learning_rate": 4.418794508787088e-06, - "loss": 0.1456, + "epoch": 1.409771251059023, + "grad_norm": 3.717853277539238, + "learning_rate": 1.1455240031915139e-05, + "loss": 0.6476, "step": 9984 }, { - "epoch": 2.0937303417907316, - "grad_norm": 5.274460152966914, - "learning_rate": 4.416916171223744e-06, - "loss": 0.1625, + "epoch": 1.4099124541090087, + "grad_norm": 3.385886170262072, + "learning_rate": 1.1453731790281142e-05, + "loss": 0.5055, "step": 9985 }, { - "epoch": 2.093940029356259, - "grad_norm": 6.006779094458719, - "learning_rate": 4.415038119803132e-06, - "loss": 0.1896, + "epoch": 1.4100536571589946, + "grad_norm": 3.134590817649844, + "learning_rate": 1.1452223514863046e-05, + "loss": 0.514, "step": 9986 }, { - "epoch": 2.0941497169217866, - "grad_norm": 4.742135297336328, - "learning_rate": 4.413160354621504e-06, - "loss": 0.1732, + "epoch": 1.4101948602089804, + "grad_norm": 3.004481077025368, + "learning_rate": 1.14507152056959e-05, + "loss": 0.5499, "step": 9987 }, { - "epoch": 2.0943594044873137, - "grad_norm": 3.8943255812833786, - "learning_rate": 4.411282875775094e-06, - "loss": 0.1594, + "epoch": 1.4103360632589663, + "grad_norm": 3.259040859642275, + "learning_rate": 1.1449206862814762e-05, + "loss": 0.5499, "step": 9988 }, { - "epoch": 2.0945690920528413, - "grad_norm": 3.1545151698454053, - "learning_rate": 4.40940568336013e-06, - "loss": 0.1121, + "epoch": 1.4104772663089522, + "grad_norm": 3.2017786449153753, + "learning_rate": 1.1447698486254681e-05, + "loss": 0.6083, "step": 9989 }, { - "epoch": 2.094778779618369, - "grad_norm": 4.1747396683788605, - "learning_rate": 4.407528777472827e-06, - "loss": 0.1495, + "epoch": 1.410618469358938, + "grad_norm": 3.396536233214283, + "learning_rate": 1.144619007605071e-05, + "loss": 0.6517, "step": 9990 }, { - "epoch": 2.094988467183896, - "grad_norm": 3.3640843012403865, - "learning_rate": 4.405652158209368e-06, - "loss": 0.1366, + "epoch": 1.410759672408924, + "grad_norm": 3.2862296928033086, + "learning_rate": 1.1444681632237913e-05, + "loss": 0.5821, "step": 9991 }, { - "epoch": 2.0951981547494234, - "grad_norm": 3.381026101229787, - "learning_rate": 4.403775825665938e-06, - "loss": 0.1262, + "epoch": 1.4109008754589099, + "grad_norm": 3.1123118393496783, + "learning_rate": 1.1443173154851335e-05, + "loss": 0.5721, "step": 9992 }, { - "epoch": 2.095407842314951, - "grad_norm": 3.9147047962059527, - "learning_rate": 4.401899779938707e-06, - "loss": 0.1644, + "epoch": 1.4110420785088957, + "grad_norm": 3.4013941680864623, + "learning_rate": 1.1441664643926033e-05, + "loss": 0.6318, "step": 9993 }, { - "epoch": 2.095617529880478, - "grad_norm": 3.3383443449668775, - "learning_rate": 4.400024021123822e-06, - "loss": 0.133, + "epoch": 1.4111832815588816, + "grad_norm": 3.395551338655201, + "learning_rate": 1.1440156099497071e-05, + "loss": 0.6149, "step": 9994 }, { - "epoch": 2.0958272174460055, - "grad_norm": 5.169996080921641, - "learning_rate": 4.398148549317414e-06, - "loss": 0.184, + "epoch": 1.4113244846088675, + "grad_norm": 3.4671509784773, + "learning_rate": 1.1438647521599502e-05, + "loss": 0.5391, "step": 9995 }, { - "epoch": 2.0960369050115326, - "grad_norm": 5.169805831312984, - "learning_rate": 4.396273364615613e-06, - "loss": 0.1314, + "epoch": 1.4114656876588534, + "grad_norm": 3.5569540484970616, + "learning_rate": 1.1437138910268387e-05, + "loss": 0.5668, "step": 9996 }, { - "epoch": 2.09624659257706, - "grad_norm": 3.8376495245557405, - "learning_rate": 4.394398467114518e-06, - "loss": 0.1656, + "epoch": 1.4116068907088393, + "grad_norm": 2.682772077995674, + "learning_rate": 1.1435630265538783e-05, + "loss": 0.4719, "step": 9997 }, { - "epoch": 2.0964562801425877, - "grad_norm": 4.1428463030487634, - "learning_rate": 4.392523856910227e-06, - "loss": 0.173, + "epoch": 1.4117480937588252, + "grad_norm": 3.3146192991063566, + "learning_rate": 1.1434121587445752e-05, + "loss": 0.7314, "step": 9998 }, { - "epoch": 2.0966659677081148, - "grad_norm": 4.86425368801479, - "learning_rate": 4.390649534098811e-06, - "loss": 0.1633, + "epoch": 1.411889296808811, + "grad_norm": 3.7208281612050618, + "learning_rate": 1.1432612876024351e-05, + "loss": 0.6564, "step": 9999 }, { - "epoch": 2.0968756552736423, - "grad_norm": 4.2475605699460255, - "learning_rate": 4.388775498776338e-06, - "loss": 0.1181, + "epoch": 1.412030499858797, + "grad_norm": 3.1413816015659046, + "learning_rate": 1.1431104131309654e-05, + "loss": 0.4828, "step": 10000 }, { - "epoch": 2.09708534283917, - "grad_norm": 4.19297273951789, - "learning_rate": 4.38690175103885e-06, - "loss": 0.1253, + "epoch": 1.4121717029087828, + "grad_norm": 3.6349313832083703, + "learning_rate": 1.1429595353336707e-05, + "loss": 0.5336, "step": 10001 }, { - "epoch": 2.097295030404697, - "grad_norm": 4.134736487489113, - "learning_rate": 4.385028290982386e-06, - "loss": 0.1401, + "epoch": 1.4123129059587687, + "grad_norm": 3.5651510400409356, + "learning_rate": 1.1428086542140587e-05, + "loss": 0.6487, "step": 10002 }, { - "epoch": 2.0975047179702244, - "grad_norm": 4.949889406507961, - "learning_rate": 4.383155118702961e-06, - "loss": 0.1513, + "epoch": 1.4124541090087546, + "grad_norm": 3.9647051327805367, + "learning_rate": 1.1426577697756349e-05, + "loss": 0.6683, "step": 10003 }, { - "epoch": 2.0977144055357515, - "grad_norm": 4.730273154717187, - "learning_rate": 4.381282234296575e-06, - "loss": 0.1527, + "epoch": 1.4125953120587404, + "grad_norm": 3.6574314298222608, + "learning_rate": 1.1425068820219063e-05, + "loss": 0.5625, "step": 10004 }, { - "epoch": 2.097924093101279, - "grad_norm": 4.900188113642948, - "learning_rate": 4.379409637859218e-06, - "loss": 0.1763, + "epoch": 1.4127365151087263, + "grad_norm": 3.776228587647339, + "learning_rate": 1.1423559909563792e-05, + "loss": 0.6115, "step": 10005 }, { - "epoch": 2.0981337806668066, - "grad_norm": 5.012756724253606, - "learning_rate": 4.3775373294868705e-06, - "loss": 0.1767, + "epoch": 1.4128777181587122, + "grad_norm": 3.3667811015005817, + "learning_rate": 1.1422050965825603e-05, + "loss": 0.5398, "step": 10006 }, { - "epoch": 2.0983434682323336, - "grad_norm": 4.596162706064852, - "learning_rate": 4.375665309275485e-06, - "loss": 0.1955, + "epoch": 1.413018921208698, + "grad_norm": 3.3994103835766443, + "learning_rate": 1.1420541989039565e-05, + "loss": 0.5416, "step": 10007 }, { - "epoch": 2.098553155797861, - "grad_norm": 3.98645206954477, - "learning_rate": 4.3737935773210046e-06, - "loss": 0.1545, + "epoch": 1.413160124258684, + "grad_norm": 3.8719629145732593, + "learning_rate": 1.1419032979240748e-05, + "loss": 0.6466, "step": 10008 }, { - "epoch": 2.0987628433633887, - "grad_norm": 3.130811775476905, - "learning_rate": 4.371922133719367e-06, - "loss": 0.1133, + "epoch": 1.4133013273086699, + "grad_norm": 3.539220745006238, + "learning_rate": 1.1417523936464212e-05, + "loss": 0.5222, "step": 10009 }, { - "epoch": 2.098972530928916, - "grad_norm": 3.2543905763251506, - "learning_rate": 4.370050978566476e-06, - "loss": 0.1209, + "epoch": 1.4134425303586557, + "grad_norm": 4.365083169059222, + "learning_rate": 1.1416014860745032e-05, + "loss": 0.7139, "step": 10010 }, { - "epoch": 2.0991822184944433, - "grad_norm": 5.4301024990181, - "learning_rate": 4.36818011195824e-06, - "loss": 0.18, + "epoch": 1.4135837334086416, + "grad_norm": 3.3852775372234736, + "learning_rate": 1.1414505752118282e-05, + "loss": 0.5866, "step": 10011 }, { - "epoch": 2.099391906059971, - "grad_norm": 3.4528343859996635, - "learning_rate": 4.366309533990543e-06, - "loss": 0.1319, + "epoch": 1.4137249364586275, + "grad_norm": 3.525910146370708, + "learning_rate": 1.1412996610619028e-05, + "loss": 0.5339, "step": 10012 }, { - "epoch": 2.099601593625498, - "grad_norm": 3.7322682151174993, - "learning_rate": 4.364439244759248e-06, - "loss": 0.1258, + "epoch": 1.4138661395086134, + "grad_norm": 3.4110828728229583, + "learning_rate": 1.141148743628234e-05, + "loss": 0.6024, "step": 10013 }, { - "epoch": 2.0998112811910254, - "grad_norm": 4.189320251718574, - "learning_rate": 4.3625692443602204e-06, - "loss": 0.1496, + "epoch": 1.4140073425585993, + "grad_norm": 4.044285463390223, + "learning_rate": 1.1409978229143297e-05, + "loss": 0.6896, "step": 10014 }, { - "epoch": 2.1000209687565525, - "grad_norm": 3.8661448303702968, - "learning_rate": 4.3606995328892965e-06, - "loss": 0.1065, + "epoch": 1.4141485456085852, + "grad_norm": 3.795701719462897, + "learning_rate": 1.1408468989236967e-05, + "loss": 0.6615, "step": 10015 }, { - "epoch": 2.10023065632208, - "grad_norm": 5.070207055072522, - "learning_rate": 4.358830110442299e-06, - "loss": 0.1733, + "epoch": 1.414289748658571, + "grad_norm": 3.5849166246125104, + "learning_rate": 1.1406959716598424e-05, + "loss": 0.5201, "step": 10016 }, { - "epoch": 2.1004403438876076, - "grad_norm": 4.242060838787802, - "learning_rate": 4.356960977115043e-06, - "loss": 0.1394, + "epoch": 1.414430951708557, + "grad_norm": 3.659608138703216, + "learning_rate": 1.140545041126275e-05, + "loss": 0.6697, "step": 10017 }, { - "epoch": 2.1006500314531347, - "grad_norm": 5.0036563898385635, - "learning_rate": 4.355092133003328e-06, - "loss": 0.1445, + "epoch": 1.4145721547585428, + "grad_norm": 3.7727706551832108, + "learning_rate": 1.1403941073265014e-05, + "loss": 0.6302, "step": 10018 }, { - "epoch": 2.100859719018662, - "grad_norm": 4.459394411770401, - "learning_rate": 4.353223578202931e-06, - "loss": 0.1281, + "epoch": 1.4147133578085287, + "grad_norm": 3.829956282098092, + "learning_rate": 1.140243170264029e-05, + "loss": 0.5471, "step": 10019 }, { - "epoch": 2.1010694065841897, - "grad_norm": 3.196069591355032, - "learning_rate": 4.351355312809615e-06, - "loss": 0.12, + "epoch": 1.4148545608585146, + "grad_norm": 3.4829474118955654, + "learning_rate": 1.1400922299423663e-05, + "loss": 0.6554, "step": 10020 }, { - "epoch": 2.101279094149717, - "grad_norm": 3.957574425210128, - "learning_rate": 4.349487336919141e-06, - "loss": 0.1489, + "epoch": 1.4149957639085005, + "grad_norm": 3.471874382662149, + "learning_rate": 1.1399412863650205e-05, + "loss": 0.4447, "step": 10021 }, { - "epoch": 2.1014887817152443, - "grad_norm": 4.139046573621775, - "learning_rate": 4.3476196506272385e-06, - "loss": 0.1412, + "epoch": 1.4151369669584863, + "grad_norm": 4.081544652349606, + "learning_rate": 1.1397903395354996e-05, + "loss": 0.6764, "step": 10022 }, { - "epoch": 2.101698469280772, - "grad_norm": 3.448373125361909, - "learning_rate": 4.345752254029635e-06, - "loss": 0.1441, + "epoch": 1.4152781700084722, + "grad_norm": 3.833129327185525, + "learning_rate": 1.1396393894573116e-05, + "loss": 0.6219, "step": 10023 }, { - "epoch": 2.101908156846299, - "grad_norm": 3.9455727134949856, - "learning_rate": 4.343885147222036e-06, - "loss": 0.1689, + "epoch": 1.415419373058458, + "grad_norm": 2.9448576279083647, + "learning_rate": 1.1394884361339647e-05, + "loss": 0.5228, "step": 10024 }, { - "epoch": 2.1021178444118265, - "grad_norm": 5.735466844701449, - "learning_rate": 4.342018330300129e-06, - "loss": 0.2101, + "epoch": 1.415560576108444, + "grad_norm": 4.503364054908912, + "learning_rate": 1.1393374795689666e-05, + "loss": 0.6854, "step": 10025 }, { - "epoch": 2.1023275319773536, - "grad_norm": 5.4001390241609935, - "learning_rate": 4.340151803359596e-06, - "loss": 0.1604, + "epoch": 1.4157017791584299, + "grad_norm": 3.3516196025328155, + "learning_rate": 1.1391865197658256e-05, + "loss": 0.5391, "step": 10026 }, { - "epoch": 2.102537219542881, - "grad_norm": 5.203769880860915, - "learning_rate": 4.338285566496103e-06, - "loss": 0.1766, + "epoch": 1.4158429822084158, + "grad_norm": 3.455169752340693, + "learning_rate": 1.13903555672805e-05, + "loss": 0.5352, "step": 10027 }, { - "epoch": 2.1027469071084086, - "grad_norm": 4.069182238778444, - "learning_rate": 4.336419619805294e-06, - "loss": 0.1407, + "epoch": 1.4159841852584016, + "grad_norm": 3.526703076122184, + "learning_rate": 1.1388845904591482e-05, + "loss": 0.6047, "step": 10028 }, { - "epoch": 2.1029565946739357, - "grad_norm": 5.591594051309537, - "learning_rate": 4.3345539633828e-06, - "loss": 0.2007, + "epoch": 1.4161253883083875, + "grad_norm": 4.1404653219438154, + "learning_rate": 1.1387336209626287e-05, + "loss": 0.738, "step": 10029 }, { - "epoch": 2.1031662822394632, - "grad_norm": 4.064749381653318, - "learning_rate": 4.332688597324245e-06, - "loss": 0.1432, + "epoch": 1.4162665913583734, + "grad_norm": 3.447765148310987, + "learning_rate": 1.1385826482419993e-05, + "loss": 0.6092, "step": 10030 }, { - "epoch": 2.1033759698049908, - "grad_norm": 4.5442640731596375, - "learning_rate": 4.330823521725228e-06, - "loss": 0.1735, + "epoch": 1.4164077944083593, + "grad_norm": 3.3899178421102487, + "learning_rate": 1.138431672300769e-05, + "loss": 0.5192, "step": 10031 }, { - "epoch": 2.103585657370518, - "grad_norm": 10.377040990467885, - "learning_rate": 4.328958736681335e-06, - "loss": 0.1784, + "epoch": 1.4165489974583452, + "grad_norm": 2.9975048522453926, + "learning_rate": 1.1382806931424468e-05, + "loss": 0.488, "step": 10032 }, { - "epoch": 2.1037953449360454, - "grad_norm": 4.303831628518501, - "learning_rate": 4.327094242288147e-06, - "loss": 0.1731, + "epoch": 1.416690200508331, + "grad_norm": 3.3878732852401536, + "learning_rate": 1.1381297107705407e-05, + "loss": 0.5937, "step": 10033 }, { - "epoch": 2.1040050325015724, - "grad_norm": 4.787479799134013, - "learning_rate": 4.325230038641214e-06, - "loss": 0.1404, + "epoch": 1.416831403558317, + "grad_norm": 3.4790842219086735, + "learning_rate": 1.1379787251885603e-05, + "loss": 0.5662, "step": 10034 }, { - "epoch": 2.1042147200671, - "grad_norm": 5.029567796113582, - "learning_rate": 4.323366125836088e-06, - "loss": 0.1774, + "epoch": 1.4169726066083028, + "grad_norm": 3.703196528754753, + "learning_rate": 1.1378277364000133e-05, + "loss": 0.632, "step": 10035 }, { - "epoch": 2.1044244076326275, - "grad_norm": 4.642825288977709, - "learning_rate": 4.321502503968294e-06, - "loss": 0.1596, + "epoch": 1.4171138096582885, + "grad_norm": 3.3248357693739647, + "learning_rate": 1.1376767444084096e-05, + "loss": 0.4813, "step": 10036 }, { - "epoch": 2.1046340951981546, - "grad_norm": 4.152967027850551, - "learning_rate": 4.31963917313334e-06, - "loss": 0.1421, + "epoch": 1.4172550127082744, + "grad_norm": 3.755396631752553, + "learning_rate": 1.1375257492172575e-05, + "loss": 0.6783, "step": 10037 }, { - "epoch": 2.104843782763682, - "grad_norm": 4.192653242644203, - "learning_rate": 4.317776133426732e-06, - "loss": 0.1323, + "epoch": 1.4173962157582602, + "grad_norm": 4.302061202132109, + "learning_rate": 1.1373747508300668e-05, + "loss": 0.7003, "step": 10038 }, { - "epoch": 2.1050534703292096, - "grad_norm": 3.3844298206748666, - "learning_rate": 4.315913384943955e-06, - "loss": 0.1057, + "epoch": 1.4175374188082461, + "grad_norm": 3.343482062875269, + "learning_rate": 1.1372237492503459e-05, + "loss": 0.5764, "step": 10039 }, { - "epoch": 2.1052631578947367, - "grad_norm": 4.172212549325048, - "learning_rate": 4.314050927780477e-06, - "loss": 0.1573, + "epoch": 1.417678621858232, + "grad_norm": 3.6256899506041567, + "learning_rate": 1.1370727444816045e-05, + "loss": 0.6389, "step": 10040 }, { - "epoch": 2.1054728454602643, - "grad_norm": 4.2614245866097695, - "learning_rate": 4.312188762031746e-06, - "loss": 0.1451, + "epoch": 1.417819824908218, + "grad_norm": 3.460323848004478, + "learning_rate": 1.1369217365273517e-05, + "loss": 0.4948, "step": 10041 }, { - "epoch": 2.105682533025792, - "grad_norm": 4.721069661333385, - "learning_rate": 4.310326887793211e-06, - "loss": 0.1811, + "epoch": 1.4179610279582038, + "grad_norm": 3.7989541442836186, + "learning_rate": 1.1367707253910969e-05, + "loss": 0.6031, "step": 10042 }, { - "epoch": 2.105892220591319, - "grad_norm": 4.969801811031622, - "learning_rate": 4.308465305160285e-06, - "loss": 0.1261, + "epoch": 1.4181022310081897, + "grad_norm": 3.4381903073420172, + "learning_rate": 1.1366197110763493e-05, + "loss": 0.5904, "step": 10043 }, { - "epoch": 2.1061019081568464, - "grad_norm": 3.223448270226393, - "learning_rate": 4.30660401422839e-06, - "loss": 0.1055, + "epoch": 1.4182434340581755, + "grad_norm": 3.4553998578044767, + "learning_rate": 1.1364686935866186e-05, + "loss": 0.5903, "step": 10044 }, { - "epoch": 2.1063115957223735, - "grad_norm": 4.730753480152726, - "learning_rate": 4.304743015092912e-06, - "loss": 0.1876, + "epoch": 1.4183846371081614, + "grad_norm": 3.598849443496455, + "learning_rate": 1.1363176729254147e-05, + "loss": 0.5836, "step": 10045 }, { - "epoch": 2.106521283287901, - "grad_norm": 4.301974230073461, - "learning_rate": 4.3028823078492295e-06, - "loss": 0.176, + "epoch": 1.4185258401581473, + "grad_norm": 4.125552027602078, + "learning_rate": 1.1361666490962468e-05, + "loss": 0.6509, "step": 10046 }, { - "epoch": 2.1067309708534285, - "grad_norm": 5.555576769350366, - "learning_rate": 4.301021892592711e-06, - "loss": 0.172, + "epoch": 1.4186670432081332, + "grad_norm": 4.127785072037561, + "learning_rate": 1.1360156221026246e-05, + "loss": 0.6187, "step": 10047 }, { - "epoch": 2.1069406584189556, - "grad_norm": 4.747807657954561, - "learning_rate": 4.29916176941871e-06, - "loss": 0.1685, + "epoch": 1.418808246258119, + "grad_norm": 4.025195119537113, + "learning_rate": 1.1358645919480585e-05, + "loss": 0.6796, "step": 10048 }, { - "epoch": 2.107150345984483, - "grad_norm": 4.4915366419652845, - "learning_rate": 4.29730193842255e-06, - "loss": 0.155, + "epoch": 1.418949449308105, + "grad_norm": 4.417630974708647, + "learning_rate": 1.1357135586360575e-05, + "loss": 0.7052, "step": 10049 }, { - "epoch": 2.1073600335500107, - "grad_norm": 4.678464012342243, - "learning_rate": 4.295442399699557e-06, - "loss": 0.1812, + "epoch": 1.4190906523580908, + "grad_norm": 3.3595057860822877, + "learning_rate": 1.1355625221701321e-05, + "loss": 0.5599, "step": 10050 }, { - "epoch": 2.1075697211155378, - "grad_norm": 3.2853257189271052, - "learning_rate": 4.293583153345036e-06, - "loss": 0.118, + "epoch": 1.4192318554080767, + "grad_norm": 3.3233873671940732, + "learning_rate": 1.1354114825537925e-05, + "loss": 0.5443, "step": 10051 }, { - "epoch": 2.1077794086810653, - "grad_norm": 4.795455820534092, - "learning_rate": 4.291724199454278e-06, - "loss": 0.1766, + "epoch": 1.4193730584580626, + "grad_norm": 3.5449895236963265, + "learning_rate": 1.1352604397905485e-05, + "loss": 0.5999, "step": 10052 }, { - "epoch": 2.1079890962465924, - "grad_norm": 4.687575093402977, - "learning_rate": 4.289865538122552e-06, - "loss": 0.2009, + "epoch": 1.4195142615080485, + "grad_norm": 3.5816043286985435, + "learning_rate": 1.1351093938839099e-05, + "loss": 0.6738, "step": 10053 }, { - "epoch": 2.10819878381212, - "grad_norm": 4.784512134719495, - "learning_rate": 4.288007169445123e-06, - "loss": 0.1826, + "epoch": 1.4196554645580344, + "grad_norm": 4.801135317371865, + "learning_rate": 1.1349583448373878e-05, + "loss": 0.6476, "step": 10054 }, { - "epoch": 2.1084084713776474, - "grad_norm": 4.619477417222696, - "learning_rate": 4.286149093517231e-06, - "loss": 0.1801, + "epoch": 1.4197966676080203, + "grad_norm": 3.5187197591562525, + "learning_rate": 1.134807292654492e-05, + "loss": 0.604, "step": 10055 }, { - "epoch": 2.1086181589431745, - "grad_norm": 3.9323330535721217, - "learning_rate": 4.284291310434111e-06, - "loss": 0.1707, + "epoch": 1.4199378706580061, + "grad_norm": 4.033554471543881, + "learning_rate": 1.1346562373387326e-05, + "loss": 0.6779, "step": 10056 }, { - "epoch": 2.108827846508702, - "grad_norm": 5.149530038697762, - "learning_rate": 4.2824338202909754e-06, - "loss": 0.1776, + "epoch": 1.420079073707992, + "grad_norm": 3.170373876497829, + "learning_rate": 1.1345051788936206e-05, + "loss": 0.5381, "step": 10057 }, { - "epoch": 2.1090375340742296, - "grad_norm": 4.955989889463565, - "learning_rate": 4.280576623183019e-06, - "loss": 0.1552, + "epoch": 1.420220276757978, + "grad_norm": 3.2004358732909672, + "learning_rate": 1.1343541173226664e-05, + "loss": 0.4171, "step": 10058 }, { - "epoch": 2.1092472216397566, - "grad_norm": 4.591079139635268, - "learning_rate": 4.278719719205431e-06, - "loss": 0.1391, + "epoch": 1.4203614798079638, + "grad_norm": 4.034171207031646, + "learning_rate": 1.1342030526293804e-05, + "loss": 0.6205, "step": 10059 }, { - "epoch": 2.109456909205284, - "grad_norm": 3.969278193064446, - "learning_rate": 4.276863108453384e-06, - "loss": 0.1348, + "epoch": 1.4205026828579497, + "grad_norm": 3.602893136333748, + "learning_rate": 1.1340519848172735e-05, + "loss": 0.5757, "step": 10060 }, { - "epoch": 2.1096665967708117, - "grad_norm": 4.602309732614145, - "learning_rate": 4.275006791022028e-06, - "loss": 0.1621, + "epoch": 1.4206438859079356, + "grad_norm": 3.548121028516786, + "learning_rate": 1.1339009138898564e-05, + "loss": 0.611, "step": 10061 }, { - "epoch": 2.109876284336339, - "grad_norm": 4.12566193899036, - "learning_rate": 4.273150767006501e-06, - "loss": 0.1738, + "epoch": 1.4207850889579214, + "grad_norm": 3.5531648574565677, + "learning_rate": 1.1337498398506397e-05, + "loss": 0.527, "step": 10062 }, { - "epoch": 2.1100859719018663, - "grad_norm": 3.4809042918180038, - "learning_rate": 4.271295036501929e-06, - "loss": 0.095, + "epoch": 1.4209262920079073, + "grad_norm": 3.280239510184984, + "learning_rate": 1.133598762703135e-05, + "loss": 0.4548, "step": 10063 }, { - "epoch": 2.1102956594673934, - "grad_norm": 4.82748247878098, - "learning_rate": 4.269439599603427e-06, - "loss": 0.1582, + "epoch": 1.4210674950578932, + "grad_norm": 3.1523811278898277, + "learning_rate": 1.133447682450852e-05, + "loss": 0.5064, "step": 10064 }, { - "epoch": 2.110505347032921, - "grad_norm": 6.328851581587421, - "learning_rate": 4.2675844564060845e-06, - "loss": 0.1776, + "epoch": 1.421208698107879, + "grad_norm": 3.7126644776368534, + "learning_rate": 1.1332965990973028e-05, + "loss": 0.5538, "step": 10065 }, { - "epoch": 2.1107150345984484, - "grad_norm": 4.463349460662841, - "learning_rate": 4.265729607004977e-06, - "loss": 0.1609, + "epoch": 1.421349901157865, + "grad_norm": 3.483237396566373, + "learning_rate": 1.1331455126459983e-05, + "loss": 0.5783, "step": 10066 }, { - "epoch": 2.1109247221639755, - "grad_norm": 3.6917617360512276, - "learning_rate": 4.263875051495176e-06, - "loss": 0.128, + "epoch": 1.4214911042078509, + "grad_norm": 4.209117729853742, + "learning_rate": 1.1329944231004494e-05, + "loss": 0.8424, "step": 10067 }, { - "epoch": 2.111134409729503, - "grad_norm": 5.4606544466636615, - "learning_rate": 4.262020789971724e-06, - "loss": 0.1832, + "epoch": 1.4216323072578367, + "grad_norm": 3.8785652487225786, + "learning_rate": 1.132843330464168e-05, + "loss": 0.7277, "step": 10068 }, { - "epoch": 2.1113440972950306, - "grad_norm": 5.008342063081339, - "learning_rate": 4.260166822529662e-06, - "loss": 0.1571, + "epoch": 1.4217735103078226, + "grad_norm": 3.8448678844708275, + "learning_rate": 1.1326922347406645e-05, + "loss": 0.68, "step": 10069 }, { - "epoch": 2.1115537848605577, - "grad_norm": 3.878436257630092, - "learning_rate": 4.258313149264006e-06, - "loss": 0.1345, + "epoch": 1.4219147133578085, + "grad_norm": 4.171553367440104, + "learning_rate": 1.1325411359334509e-05, + "loss": 0.6629, "step": 10070 }, { - "epoch": 2.111763472426085, - "grad_norm": 4.419352769981897, - "learning_rate": 4.256459770269755e-06, - "loss": 0.1438, + "epoch": 1.4220559164077944, + "grad_norm": 3.65608230804267, + "learning_rate": 1.1323900340460385e-05, + "loss": 0.5878, "step": 10071 }, { - "epoch": 2.1119731599916127, - "grad_norm": 5.05863491708066, - "learning_rate": 4.254606685641905e-06, - "loss": 0.1855, + "epoch": 1.4221971194577803, + "grad_norm": 3.7803653923456317, + "learning_rate": 1.1322389290819391e-05, + "loss": 0.7111, "step": 10072 }, { - "epoch": 2.11218284755714, - "grad_norm": 3.827106328373883, - "learning_rate": 4.252753895475432e-06, - "loss": 0.1574, + "epoch": 1.4223383225077662, + "grad_norm": 3.1434950651420817, + "learning_rate": 1.132087821044664e-05, + "loss": 0.5101, "step": 10073 }, { - "epoch": 2.1123925351226673, - "grad_norm": 4.461140350349457, - "learning_rate": 4.250901399865282e-06, - "loss": 0.1406, + "epoch": 1.422479525557752, + "grad_norm": 3.363581981107003, + "learning_rate": 1.1319367099377248e-05, + "loss": 0.5507, "step": 10074 }, { - "epoch": 2.1126022226881944, - "grad_norm": 4.842328730228752, - "learning_rate": 4.2490491989064085e-06, - "loss": 0.1588, + "epoch": 1.422620728607738, + "grad_norm": 4.813689993234532, + "learning_rate": 1.1317855957646335e-05, + "loss": 0.8702, "step": 10075 }, { - "epoch": 2.112811910253722, - "grad_norm": 4.132685969345667, - "learning_rate": 4.2471972926937425e-06, - "loss": 0.1462, + "epoch": 1.4227619316577238, + "grad_norm": 3.390249432997044, + "learning_rate": 1.1316344785289022e-05, + "loss": 0.5283, "step": 10076 }, { - "epoch": 2.1130215978192495, - "grad_norm": 5.006251543968417, - "learning_rate": 4.245345681322191e-06, - "loss": 0.178, + "epoch": 1.4229031347077097, + "grad_norm": 3.134231867583827, + "learning_rate": 1.131483358234042e-05, + "loss": 0.5832, "step": 10077 }, { - "epoch": 2.1132312853847766, - "grad_norm": 4.823379335753255, - "learning_rate": 4.243494364886651e-06, - "loss": 0.2049, + "epoch": 1.4230443377576956, + "grad_norm": 3.512508328350173, + "learning_rate": 1.1313322348835658e-05, + "loss": 0.6358, "step": 10078 }, { - "epoch": 2.113440972950304, - "grad_norm": 3.2864399851430655, - "learning_rate": 4.241643343482013e-06, - "loss": 0.1302, + "epoch": 1.4231855408076814, + "grad_norm": 3.1585828766481856, + "learning_rate": 1.1311811084809847e-05, + "loss": 0.5076, "step": 10079 }, { - "epoch": 2.1136506605158316, - "grad_norm": 4.585328712074158, - "learning_rate": 4.2397926172031366e-06, - "loss": 0.1598, + "epoch": 1.4233267438576673, + "grad_norm": 3.6376497920883653, + "learning_rate": 1.1310299790298118e-05, + "loss": 0.5494, "step": 10080 }, { - "epoch": 2.1138603480813587, - "grad_norm": 4.164435191724604, - "learning_rate": 4.237942186144884e-06, - "loss": 0.1359, + "epoch": 1.4234679469076532, + "grad_norm": 3.1739506470476964, + "learning_rate": 1.1308788465335583e-05, + "loss": 0.588, "step": 10081 }, { - "epoch": 2.1140700356468862, - "grad_norm": 4.924046823841652, - "learning_rate": 4.236092050402087e-06, - "loss": 0.2007, + "epoch": 1.423609149957639, + "grad_norm": 3.7061683579348417, + "learning_rate": 1.130727710995737e-05, + "loss": 0.6534, "step": 10082 }, { - "epoch": 2.1142797232124133, - "grad_norm": 3.935538945533204, - "learning_rate": 4.2342422100695665e-06, - "loss": 0.1543, + "epoch": 1.423750353007625, + "grad_norm": 3.5890849433233787, + "learning_rate": 1.1305765724198603e-05, + "loss": 0.6273, "step": 10083 }, { - "epoch": 2.114489410777941, - "grad_norm": 3.878665338551989, - "learning_rate": 4.232392665242133e-06, - "loss": 0.1327, + "epoch": 1.4238915560576109, + "grad_norm": 3.698203003315032, + "learning_rate": 1.1304254308094405e-05, + "loss": 0.6656, "step": 10084 }, { - "epoch": 2.1146990983434684, - "grad_norm": 4.956920865290055, - "learning_rate": 4.230543416014582e-06, - "loss": 0.1683, + "epoch": 1.4240327591075967, + "grad_norm": 3.1788128681781886, + "learning_rate": 1.1302742861679898e-05, + "loss": 0.5996, "step": 10085 }, { - "epoch": 2.1149087859089954, - "grad_norm": 4.278696452348014, - "learning_rate": 4.2286944624816885e-06, - "loss": 0.141, + "epoch": 1.4241739621575826, + "grad_norm": 4.001386634996595, + "learning_rate": 1.1301231384990213e-05, + "loss": 0.6156, "step": 10086 }, { - "epoch": 2.115118473474523, - "grad_norm": 4.401658686377236, - "learning_rate": 4.226845804738209e-06, - "loss": 0.1528, + "epoch": 1.4243151652075685, + "grad_norm": 3.4250401478291472, + "learning_rate": 1.1299719878060469e-05, + "loss": 0.5627, "step": 10087 }, { - "epoch": 2.1153281610400505, - "grad_norm": 4.196914613907652, - "learning_rate": 4.224997442878898e-06, - "loss": 0.1774, + "epoch": 1.4244563682575544, + "grad_norm": 3.6127853973090347, + "learning_rate": 1.1298208340925798e-05, + "loss": 0.6111, "step": 10088 }, { - "epoch": 2.1155378486055776, - "grad_norm": 3.926638885187505, - "learning_rate": 4.223149376998482e-06, - "loss": 0.1407, + "epoch": 1.4245975713075403, + "grad_norm": 3.184735396888158, + "learning_rate": 1.1296696773621327e-05, + "loss": 0.594, "step": 10089 }, { - "epoch": 2.115747536171105, - "grad_norm": 3.96611485764776, - "learning_rate": 4.221301607191684e-06, - "loss": 0.1753, + "epoch": 1.4247387743575262, + "grad_norm": 3.8256713583121384, + "learning_rate": 1.129518517618218e-05, + "loss": 0.6449, "step": 10090 }, { - "epoch": 2.1159572237366326, - "grad_norm": 4.358617314683359, - "learning_rate": 4.219454133553199e-06, - "loss": 0.1544, + "epoch": 1.424879977407512, + "grad_norm": 4.012636465211117, + "learning_rate": 1.1293673548643492e-05, + "loss": 0.6778, "step": 10091 }, { - "epoch": 2.1161669113021597, - "grad_norm": 4.546203519763934, - "learning_rate": 4.217606956177713e-06, - "loss": 0.1647, + "epoch": 1.425021180457498, + "grad_norm": 3.0650595520790453, + "learning_rate": 1.1292161891040388e-05, + "loss": 0.5679, "step": 10092 }, { - "epoch": 2.1163765988676873, - "grad_norm": 5.119517897537984, - "learning_rate": 4.215760075159903e-06, - "loss": 0.1683, + "epoch": 1.4251623835074838, + "grad_norm": 3.510698717776447, + "learning_rate": 1.1290650203407998e-05, + "loss": 0.6625, "step": 10093 }, { - "epoch": 2.1165862864332143, - "grad_norm": 3.607397407775089, - "learning_rate": 4.213913490594423e-06, - "loss": 0.1464, + "epoch": 1.4253035865574697, + "grad_norm": 3.4067238145420546, + "learning_rate": 1.1289138485781456e-05, + "loss": 0.4823, "step": 10094 }, { - "epoch": 2.116795973998742, - "grad_norm": 5.405919391867583, - "learning_rate": 4.212067202575907e-06, - "loss": 0.187, + "epoch": 1.4254447896074556, + "grad_norm": 4.328913928382474, + "learning_rate": 1.1287626738195895e-05, + "loss": 0.4883, "step": 10095 }, { - "epoch": 2.1170056615642694, - "grad_norm": 3.2929630485244346, - "learning_rate": 4.210221211198986e-06, - "loss": 0.1476, + "epoch": 1.4255859926574415, + "grad_norm": 3.9056028516413677, + "learning_rate": 1.128611496068644e-05, + "loss": 0.7912, "step": 10096 }, { - "epoch": 2.1172153491297965, - "grad_norm": 3.3083251566275638, - "learning_rate": 4.2083755165582726e-06, - "loss": 0.1376, + "epoch": 1.4257271957074273, + "grad_norm": 4.669399952348939, + "learning_rate": 1.1284603153288232e-05, + "loss": 0.6673, "step": 10097 }, { - "epoch": 2.117425036695324, - "grad_norm": 3.924909992352462, - "learning_rate": 4.20653011874836e-06, - "loss": 0.1685, + "epoch": 1.4258683987574132, + "grad_norm": 3.9558370532856517, + "learning_rate": 1.12830913160364e-05, + "loss": 0.581, "step": 10098 }, { - "epoch": 2.1176347242608515, - "grad_norm": 4.089057822028018, - "learning_rate": 4.204685017863824e-06, - "loss": 0.1511, + "epoch": 1.426009601807399, + "grad_norm": 3.568023375904364, + "learning_rate": 1.128157944896608e-05, + "loss": 0.6443, "step": 10099 }, { - "epoch": 2.1178444118263786, - "grad_norm": 4.3237819931346335, - "learning_rate": 4.2028402139992345e-06, - "loss": 0.1598, + "epoch": 1.426150804857385, + "grad_norm": 3.8817606320590374, + "learning_rate": 1.1280067552112408e-05, + "loss": 0.6459, "step": 10100 }, { - "epoch": 2.118054099391906, - "grad_norm": 4.385749711986025, - "learning_rate": 4.200995707249136e-06, - "loss": 0.1774, + "epoch": 1.4262920079073709, + "grad_norm": 3.162502847560901, + "learning_rate": 1.1278555625510519e-05, + "loss": 0.5225, "step": 10101 }, { - "epoch": 2.1182637869574332, - "grad_norm": 4.733460921938973, - "learning_rate": 4.199151497708068e-06, - "loss": 0.1938, + "epoch": 1.4264332109573568, + "grad_norm": 3.9475545005016386, + "learning_rate": 1.1277043669195549e-05, + "loss": 0.5131, "step": 10102 }, { - "epoch": 2.1184734745229608, - "grad_norm": 3.685812668143906, - "learning_rate": 4.197307585470547e-06, - "loss": 0.143, + "epoch": 1.4265744140073426, + "grad_norm": 3.5782800979875784, + "learning_rate": 1.1275531683202634e-05, + "loss": 0.5818, "step": 10103 }, { - "epoch": 2.1186831620884883, - "grad_norm": 4.952481969675868, - "learning_rate": 4.195463970631074e-06, - "loss": 0.1575, + "epoch": 1.4267156170573285, + "grad_norm": 4.568194632289159, + "learning_rate": 1.1274019667566913e-05, + "loss": 0.7814, "step": 10104 }, { - "epoch": 2.1188928496540154, - "grad_norm": 4.63013552708414, - "learning_rate": 4.193620653284138e-06, - "loss": 0.1598, + "epoch": 1.4268568201073144, + "grad_norm": 3.222619447638465, + "learning_rate": 1.127250762232353e-05, + "loss": 0.5888, "step": 10105 }, { - "epoch": 2.119102537219543, - "grad_norm": 5.882114104879191, - "learning_rate": 4.19177763352422e-06, - "loss": 0.1788, + "epoch": 1.4269980231573003, + "grad_norm": 3.447640974094161, + "learning_rate": 1.1270995547507617e-05, + "loss": 0.5915, "step": 10106 }, { - "epoch": 2.1193122247850704, - "grad_norm": 3.960542581436256, - "learning_rate": 4.189934911445771e-06, - "loss": 0.1638, + "epoch": 1.4271392262072862, + "grad_norm": 4.246528452627301, + "learning_rate": 1.1269483443154314e-05, + "loss": 0.7003, "step": 10107 }, { - "epoch": 2.1195219123505975, - "grad_norm": 4.66233653293019, - "learning_rate": 4.188092487143232e-06, - "loss": 0.1732, + "epoch": 1.427280429257272, + "grad_norm": 4.087662469252202, + "learning_rate": 1.1267971309298767e-05, + "loss": 0.6286, "step": 10108 }, { - "epoch": 2.119731599916125, - "grad_norm": 4.375271387495915, - "learning_rate": 4.186250360711035e-06, - "loss": 0.1728, + "epoch": 1.427421632307258, + "grad_norm": 3.534723640275289, + "learning_rate": 1.1266459145976114e-05, + "loss": 0.4541, "step": 10109 }, { - "epoch": 2.1199412874816526, - "grad_norm": 4.989013791253462, - "learning_rate": 4.1844085322435924e-06, - "loss": 0.1604, + "epoch": 1.4275628353572438, + "grad_norm": 3.021244897788777, + "learning_rate": 1.1264946953221496e-05, + "loss": 0.5568, "step": 10110 }, { - "epoch": 2.1201509750471796, - "grad_norm": 2.9117646234746917, - "learning_rate": 4.182567001835295e-06, - "loss": 0.1229, + "epoch": 1.4277040384072297, + "grad_norm": 3.0020911783715216, + "learning_rate": 1.1263434731070058e-05, + "loss": 0.5255, "step": 10111 }, { - "epoch": 2.120360662612707, - "grad_norm": 4.316369253319347, - "learning_rate": 4.180725769580531e-06, - "loss": 0.1794, + "epoch": 1.4278452414572156, + "grad_norm": 3.362516578569044, + "learning_rate": 1.1261922479556944e-05, + "loss": 0.6224, "step": 10112 }, { - "epoch": 2.1205703501782343, - "grad_norm": 4.67364265328549, - "learning_rate": 4.1788848355736625e-06, - "loss": 0.1846, + "epoch": 1.4279864445072015, + "grad_norm": 3.3896484959221698, + "learning_rate": 1.1260410198717291e-05, + "loss": 0.6175, "step": 10113 }, { - "epoch": 2.120780037743762, - "grad_norm": 3.5326081342207303, - "learning_rate": 4.177044199909045e-06, - "loss": 0.1469, + "epoch": 1.4281276475571874, + "grad_norm": 5.803728744142781, + "learning_rate": 1.1258897888586256e-05, + "loss": 0.9258, "step": 10114 }, { - "epoch": 2.1209897253092893, - "grad_norm": 6.465095292409778, - "learning_rate": 4.175203862681012e-06, - "loss": 0.215, + "epoch": 1.4282688506071732, + "grad_norm": 4.068852270699901, + "learning_rate": 1.1257385549198976e-05, + "loss": 0.6771, "step": 10115 }, { - "epoch": 2.1211994128748164, - "grad_norm": 3.773283828322142, - "learning_rate": 4.17336382398388e-06, - "loss": 0.1343, + "epoch": 1.4284100536571591, + "grad_norm": 3.557138896162329, + "learning_rate": 1.1255873180590595e-05, + "loss": 0.5702, "step": 10116 }, { - "epoch": 2.121409100440344, - "grad_norm": 3.243115020698188, - "learning_rate": 4.171524083911957e-06, - "loss": 0.1119, + "epoch": 1.428551256707145, + "grad_norm": 3.185678366725922, + "learning_rate": 1.1254360782796268e-05, + "loss": 0.5307, "step": 10117 }, { - "epoch": 2.1216187880058714, - "grad_norm": 4.545699634466251, - "learning_rate": 4.169684642559538e-06, - "loss": 0.1574, + "epoch": 1.4286924597571309, + "grad_norm": 3.6473374255597992, + "learning_rate": 1.1252848355851136e-05, + "loss": 0.6067, "step": 10118 }, { - "epoch": 2.1218284755713985, - "grad_norm": 4.5269057540890385, - "learning_rate": 4.167845500020894e-06, - "loss": 0.1781, + "epoch": 1.4288336628071168, + "grad_norm": 4.239508292707983, + "learning_rate": 1.125133589979035e-05, + "loss": 0.7621, "step": 10119 }, { - "epoch": 2.122038163136926, - "grad_norm": 3.8445812718207533, - "learning_rate": 4.166006656390279e-06, - "loss": 0.1271, + "epoch": 1.4289748658571026, + "grad_norm": 3.7802713305141937, + "learning_rate": 1.124982341464906e-05, + "loss": 0.6425, "step": 10120 }, { - "epoch": 2.122247850702453, - "grad_norm": 5.203158609751385, - "learning_rate": 4.164168111761942e-06, - "loss": 0.1877, + "epoch": 1.4291160689070883, + "grad_norm": 4.268602868263709, + "learning_rate": 1.124831090046241e-05, + "loss": 0.6077, "step": 10121 }, { - "epoch": 2.1224575382679807, - "grad_norm": 3.8834020061099523, - "learning_rate": 4.162329866230114e-06, - "loss": 0.1195, + "epoch": 1.4292572719570742, + "grad_norm": 3.772950591973506, + "learning_rate": 1.1246798357265554e-05, + "loss": 0.6318, "step": 10122 }, { - "epoch": 2.122667225833508, - "grad_norm": 4.380457757023012, - "learning_rate": 4.1604919198890045e-06, - "loss": 0.1543, + "epoch": 1.42939847500706, + "grad_norm": 3.759970235997846, + "learning_rate": 1.1245285785093646e-05, + "loss": 0.4728, "step": 10123 }, { - "epoch": 2.1228769133990353, - "grad_norm": 4.668119681085, - "learning_rate": 4.15865427283281e-06, - "loss": 0.1631, + "epoch": 1.429539678057046, + "grad_norm": 2.9890596734883546, + "learning_rate": 1.124377318398183e-05, + "loss": 0.4803, "step": 10124 }, { - "epoch": 2.123086600964563, - "grad_norm": 4.182660033264631, - "learning_rate": 4.156816925155719e-06, - "loss": 0.1416, + "epoch": 1.4296808811070318, + "grad_norm": 3.51049368271252, + "learning_rate": 1.1242260553965265e-05, + "loss": 0.6963, "step": 10125 }, { - "epoch": 2.1232962885300903, - "grad_norm": 4.902637677481055, - "learning_rate": 4.15497987695189e-06, - "loss": 0.1795, + "epoch": 1.4298220841570177, + "grad_norm": 3.0291971980100865, + "learning_rate": 1.12407478950791e-05, + "loss": 0.6131, "step": 10126 }, { - "epoch": 2.1235059760956174, - "grad_norm": 3.9500994807432015, - "learning_rate": 4.153143128315486e-06, - "loss": 0.1616, + "epoch": 1.4299632872070036, + "grad_norm": 3.852998759122205, + "learning_rate": 1.1239235207358492e-05, + "loss": 0.7303, "step": 10127 }, { - "epoch": 2.123715663661145, - "grad_norm": 3.4586483507034287, - "learning_rate": 4.151306679340636e-06, - "loss": 0.1165, + "epoch": 1.4301044902569895, + "grad_norm": 3.6568276061424685, + "learning_rate": 1.1237722490838592e-05, + "loss": 0.5816, "step": 10128 }, { - "epoch": 2.1239253512266725, - "grad_norm": 4.336625512197504, - "learning_rate": 4.149470530121459e-06, - "loss": 0.1723, + "epoch": 1.4302456933069754, + "grad_norm": 4.3713562866131115, + "learning_rate": 1.1236209745554554e-05, + "loss": 0.8152, "step": 10129 }, { - "epoch": 2.1241350387921996, - "grad_norm": 4.558059093789056, - "learning_rate": 4.147634680752064e-06, - "loss": 0.15, + "epoch": 1.4303868963569613, + "grad_norm": 3.2415836764171906, + "learning_rate": 1.1234696971541534e-05, + "loss": 0.4835, "step": 10130 }, { - "epoch": 2.124344726357727, - "grad_norm": 4.177252090244682, - "learning_rate": 4.14579913132655e-06, - "loss": 0.1231, + "epoch": 1.4305280994069471, + "grad_norm": 3.019477648177745, + "learning_rate": 1.1233184168834694e-05, + "loss": 0.5517, "step": 10131 }, { - "epoch": 2.124554413923254, - "grad_norm": 4.038110633553171, - "learning_rate": 4.143963881938977e-06, - "loss": 0.151, + "epoch": 1.430669302456933, + "grad_norm": 3.1334642147951657, + "learning_rate": 1.1231671337469185e-05, + "loss": 0.592, "step": 10132 }, { - "epoch": 2.1247641014887817, - "grad_norm": 3.701671377285011, - "learning_rate": 4.142128932683412e-06, - "loss": 0.1377, + "epoch": 1.430810505506919, + "grad_norm": 4.087925308437173, + "learning_rate": 1.1230158477480165e-05, + "loss": 0.7543, "step": 10133 }, { - "epoch": 2.1249737890543092, - "grad_norm": 3.3738755226836123, - "learning_rate": 4.1402942836539015e-06, - "loss": 0.0839, + "epoch": 1.4309517085569048, + "grad_norm": 3.521174549983525, + "learning_rate": 1.1228645588902793e-05, + "loss": 0.688, "step": 10134 }, { - "epoch": 2.1251834766198363, - "grad_norm": 4.063641748781456, - "learning_rate": 4.138459934944473e-06, - "loss": 0.1769, + "epoch": 1.4310929116068907, + "grad_norm": 4.721521847604285, + "learning_rate": 1.1227132671772232e-05, + "loss": 0.7504, "step": 10135 }, { - "epoch": 2.125393164185364, - "grad_norm": 3.931585666586951, - "learning_rate": 4.136625886649136e-06, - "loss": 0.1602, + "epoch": 1.4312341146568766, + "grad_norm": 4.140792224424472, + "learning_rate": 1.1225619726123632e-05, + "loss": 0.7295, "step": 10136 }, { - "epoch": 2.1256028517508914, - "grad_norm": 4.545788331137572, - "learning_rate": 4.134792138861895e-06, - "loss": 0.2054, + "epoch": 1.4313753177068624, + "grad_norm": 3.1125455060753127, + "learning_rate": 1.1224106751992164e-05, + "loss": 0.4761, "step": 10137 }, { - "epoch": 2.1258125393164184, - "grad_norm": 3.991689931292914, - "learning_rate": 4.132958691676726e-06, - "loss": 0.1393, + "epoch": 1.4315165207568483, + "grad_norm": 4.0107455294605225, + "learning_rate": 1.1222593749412982e-05, + "loss": 0.7552, "step": 10138 }, { - "epoch": 2.126022226881946, - "grad_norm": 4.340126259739262, - "learning_rate": 4.131125545187604e-06, - "loss": 0.1624, + "epoch": 1.4316577238068342, + "grad_norm": 4.197765774506653, + "learning_rate": 1.1221080718421247e-05, + "loss": 0.806, "step": 10139 }, { - "epoch": 2.126231914447473, - "grad_norm": 3.78455174407542, - "learning_rate": 4.129292699488477e-06, - "loss": 0.1476, + "epoch": 1.43179892685682, + "grad_norm": 3.722371905268063, + "learning_rate": 1.1219567659052126e-05, + "loss": 0.5966, "step": 10140 }, { - "epoch": 2.1264416020130006, - "grad_norm": 4.1655703222570395, - "learning_rate": 4.127460154673276e-06, - "loss": 0.1446, + "epoch": 1.431940129906806, + "grad_norm": 3.30589107116539, + "learning_rate": 1.1218054571340778e-05, + "loss": 0.5849, "step": 10141 }, { - "epoch": 2.126651289578528, - "grad_norm": 3.937201620851793, - "learning_rate": 4.125627910835929e-06, - "loss": 0.1443, + "epoch": 1.4320813329567919, + "grad_norm": 3.6538551289050565, + "learning_rate": 1.1216541455322367e-05, + "loss": 0.5685, "step": 10142 }, { - "epoch": 2.126860977144055, - "grad_norm": 4.927761700925233, - "learning_rate": 4.1237959680703435e-06, - "loss": 0.1052, + "epoch": 1.4322225360067777, + "grad_norm": 3.687562027237578, + "learning_rate": 1.1215028311032059e-05, + "loss": 0.5663, "step": 10143 }, { - "epoch": 2.1270706647095827, - "grad_norm": 4.354194799920581, - "learning_rate": 4.121964326470407e-06, - "loss": 0.1543, + "epoch": 1.4323637390567636, + "grad_norm": 3.5327948574719334, + "learning_rate": 1.121351513850502e-05, + "loss": 0.455, "step": 10144 }, { - "epoch": 2.1272803522751103, - "grad_norm": 5.2418347524631415, - "learning_rate": 4.120132986129989e-06, - "loss": 0.1853, + "epoch": 1.4325049421067495, + "grad_norm": 2.8241258553594357, + "learning_rate": 1.1212001937776412e-05, + "loss": 0.4577, "step": 10145 }, { - "epoch": 2.1274900398406373, - "grad_norm": 4.837597365494546, - "learning_rate": 4.1183019471429576e-06, - "loss": 0.198, + "epoch": 1.4326461451567354, + "grad_norm": 3.111218165323433, + "learning_rate": 1.12104887088814e-05, + "loss": 0.5555, "step": 10146 }, { - "epoch": 2.127699727406165, - "grad_norm": 3.377443962018991, - "learning_rate": 4.116471209603149e-06, - "loss": 0.1309, + "epoch": 1.4327873482067213, + "grad_norm": 3.674938050227375, + "learning_rate": 1.1208975451855152e-05, + "loss": 0.6037, "step": 10147 }, { - "epoch": 2.1279094149716924, - "grad_norm": 4.428109567897319, - "learning_rate": 4.1146407736043995e-06, - "loss": 0.1438, + "epoch": 1.4329285512567072, + "grad_norm": 2.950362479360618, + "learning_rate": 1.1207462166732844e-05, + "loss": 0.5096, "step": 10148 }, { - "epoch": 2.1281191025372195, - "grad_norm": 3.907099874430981, - "learning_rate": 4.112810639240517e-06, - "loss": 0.1663, + "epoch": 1.433069754306693, + "grad_norm": 3.00214144613208, + "learning_rate": 1.1205948853549631e-05, + "loss": 0.4827, "step": 10149 }, { - "epoch": 2.128328790102747, - "grad_norm": 4.637491224717953, - "learning_rate": 4.110980806605297e-06, - "loss": 0.1613, + "epoch": 1.433210957356679, + "grad_norm": 3.312204463432061, + "learning_rate": 1.1204435512340688e-05, + "loss": 0.527, "step": 10150 }, { - "epoch": 2.128538477668274, - "grad_norm": 5.351478131527192, - "learning_rate": 4.109151275792525e-06, - "loss": 0.1913, + "epoch": 1.4333521604066648, + "grad_norm": 3.6016499685967798, + "learning_rate": 1.1202922143141185e-05, + "loss": 0.5302, "step": 10151 }, { - "epoch": 2.1287481652338016, - "grad_norm": 5.0326791702639095, - "learning_rate": 4.107322046895974e-06, - "loss": 0.1671, + "epoch": 1.4334933634566507, + "grad_norm": 3.641478649800596, + "learning_rate": 1.120140874598629e-05, + "loss": 0.5605, "step": 10152 }, { - "epoch": 2.128957852799329, - "grad_norm": 3.5578423333137064, - "learning_rate": 4.10549312000938e-06, - "loss": 0.1264, + "epoch": 1.4336345665066366, + "grad_norm": 3.461178219723336, + "learning_rate": 1.1199895320911174e-05, + "loss": 0.5483, "step": 10153 }, { - "epoch": 2.1291675403648562, - "grad_norm": 3.582022682650813, - "learning_rate": 4.103664495226489e-06, - "loss": 0.1536, + "epoch": 1.4337757695566224, + "grad_norm": 4.3829749981566195, + "learning_rate": 1.119838186795101e-05, + "loss": 0.7078, "step": 10154 }, { - "epoch": 2.1293772279303838, - "grad_norm": 4.257911599169649, - "learning_rate": 4.1018361726410224e-06, - "loss": 0.1477, + "epoch": 1.4339169726066083, + "grad_norm": 4.288655171451108, + "learning_rate": 1.119686838714097e-05, + "loss": 0.7457, "step": 10155 }, { - "epoch": 2.1295869154959113, - "grad_norm": 3.9466131342053834, - "learning_rate": 4.100008152346682e-06, - "loss": 0.1515, + "epoch": 1.4340581756565942, + "grad_norm": 3.126774155741125, + "learning_rate": 1.1195354878516222e-05, + "loss": 0.4628, "step": 10156 }, { - "epoch": 2.1297966030614384, - "grad_norm": 5.447938962424122, - "learning_rate": 4.098180434437154e-06, - "loss": 0.1633, + "epoch": 1.43419937870658, + "grad_norm": 3.5600594018434517, + "learning_rate": 1.1193841342111947e-05, + "loss": 0.571, "step": 10157 }, { - "epoch": 2.130006290626966, - "grad_norm": 4.211310278372867, - "learning_rate": 4.096353019006117e-06, - "loss": 0.1506, + "epoch": 1.434340581756566, + "grad_norm": 3.464086245115906, + "learning_rate": 1.1192327777963313e-05, + "loss": 0.6437, "step": 10158 }, { - "epoch": 2.130215978192493, - "grad_norm": 3.217935311611867, - "learning_rate": 4.094525906147225e-06, - "loss": 0.1206, + "epoch": 1.4344817848065519, + "grad_norm": 3.672665205264537, + "learning_rate": 1.1190814186105495e-05, + "loss": 0.5995, "step": 10159 }, { - "epoch": 2.1304256657580205, - "grad_norm": 4.308970804048729, - "learning_rate": 4.092699095954125e-06, - "loss": 0.1325, + "epoch": 1.4346229878565377, + "grad_norm": 3.204834062967515, + "learning_rate": 1.118930056657367e-05, + "loss": 0.5026, "step": 10160 }, { - "epoch": 2.130635353323548, - "grad_norm": 5.301004060217339, - "learning_rate": 4.090872588520442e-06, - "loss": 0.158, + "epoch": 1.4347641909065236, + "grad_norm": 4.001264861690151, + "learning_rate": 1.1187786919403017e-05, + "loss": 0.6692, "step": 10161 }, { - "epoch": 2.130845040889075, - "grad_norm": 4.9455068216367675, - "learning_rate": 4.089046383939784e-06, - "loss": 0.189, + "epoch": 1.4349053939565095, + "grad_norm": 3.8138128473344217, + "learning_rate": 1.1186273244628705e-05, + "loss": 0.5745, "step": 10162 }, { - "epoch": 2.1310547284546026, - "grad_norm": 4.205910351829512, - "learning_rate": 4.087220482305752e-06, - "loss": 0.143, + "epoch": 1.4350465970064954, + "grad_norm": 4.08058845709455, + "learning_rate": 1.1184759542285917e-05, + "loss": 0.5837, "step": 10163 }, { - "epoch": 2.13126441602013, - "grad_norm": 4.204392619142861, - "learning_rate": 4.085394883711929e-06, - "loss": 0.1732, + "epoch": 1.4351878000564813, + "grad_norm": 2.5467754537744303, + "learning_rate": 1.1183245812409828e-05, + "loss": 0.418, "step": 10164 }, { - "epoch": 2.1314741035856573, - "grad_norm": 4.78614389960782, - "learning_rate": 4.083569588251876e-06, - "loss": 0.1869, + "epoch": 1.4353290031064672, + "grad_norm": 3.20132441965069, + "learning_rate": 1.118173205503562e-05, + "loss": 0.5481, "step": 10165 }, { - "epoch": 2.131683791151185, - "grad_norm": 4.757047615400579, - "learning_rate": 4.0817445960191385e-06, - "loss": 0.1452, + "epoch": 1.435470206156453, + "grad_norm": 3.080664964595875, + "learning_rate": 1.1180218270198466e-05, + "loss": 0.4625, "step": 10166 }, { - "epoch": 2.1318934787167123, - "grad_norm": 4.912649891276758, - "learning_rate": 4.079919907107261e-06, - "loss": 0.1227, + "epoch": 1.435611409206439, + "grad_norm": 3.51871012702738, + "learning_rate": 1.1178704457933546e-05, + "loss": 0.5679, "step": 10167 }, { - "epoch": 2.1321031662822394, - "grad_norm": 3.887111947164488, - "learning_rate": 4.07809552160975e-06, - "loss": 0.1421, + "epoch": 1.4357526122564248, + "grad_norm": 3.1157088700753324, + "learning_rate": 1.117719061827605e-05, + "loss": 0.5397, "step": 10168 }, { - "epoch": 2.132312853847767, - "grad_norm": 3.876528291462925, - "learning_rate": 4.07627143962012e-06, - "loss": 0.1552, + "epoch": 1.4358938153064107, + "grad_norm": 3.495509725769545, + "learning_rate": 1.1175676751261151e-05, + "loss": 0.5509, "step": 10169 }, { - "epoch": 2.132522541413294, - "grad_norm": 4.810092149880191, - "learning_rate": 4.074447661231853e-06, - "loss": 0.173, + "epoch": 1.4360350183563966, + "grad_norm": 3.7507170194994073, + "learning_rate": 1.1174162856924029e-05, + "loss": 0.5533, "step": 10170 }, { - "epoch": 2.1327322289788215, - "grad_norm": 3.8063352203234495, - "learning_rate": 4.0726241865384165e-06, - "loss": 0.1653, + "epoch": 1.4361762214063825, + "grad_norm": 4.5732246375444765, + "learning_rate": 1.1172648935299872e-05, + "loss": 0.7592, "step": 10171 }, { - "epoch": 2.132941916544349, - "grad_norm": 5.490173873852995, - "learning_rate": 4.070801015633274e-06, - "loss": 0.1847, + "epoch": 1.4363174244563681, + "grad_norm": 3.278282511128118, + "learning_rate": 1.1171134986423859e-05, + "loss": 0.5738, "step": 10172 }, { - "epoch": 2.133151604109876, - "grad_norm": 4.8859261421511375, - "learning_rate": 4.068978148609863e-06, - "loss": 0.1878, + "epoch": 1.436458627506354, + "grad_norm": 3.4152127451451264, + "learning_rate": 1.1169621010331176e-05, + "loss": 0.589, "step": 10173 }, { - "epoch": 2.1333612916754037, - "grad_norm": 3.4211518556287617, - "learning_rate": 4.0671555855616065e-06, - "loss": 0.1323, + "epoch": 1.4365998305563399, + "grad_norm": 4.348242870296071, + "learning_rate": 1.1168107007057006e-05, + "loss": 0.7292, "step": 10174 }, { - "epoch": 2.133570979240931, - "grad_norm": 4.388274629598768, - "learning_rate": 4.065333326581915e-06, - "loss": 0.1571, + "epoch": 1.4367410336063258, + "grad_norm": 4.014098025095221, + "learning_rate": 1.1166592976636532e-05, + "loss": 0.5757, "step": 10175 }, { - "epoch": 2.1337806668064583, - "grad_norm": 4.757774281818063, - "learning_rate": 4.063511371764188e-06, - "loss": 0.115, + "epoch": 1.4368822366563117, + "grad_norm": 3.333658710563332, + "learning_rate": 1.1165078919104942e-05, + "loss": 0.5061, "step": 10176 }, { - "epoch": 2.133990354371986, - "grad_norm": 3.5775026591364267, - "learning_rate": 4.0616897212017996e-06, - "loss": 0.127, + "epoch": 1.4370234397062975, + "grad_norm": 3.9786729519539175, + "learning_rate": 1.116356483449742e-05, + "loss": 0.6587, "step": 10177 }, { - "epoch": 2.134200041937513, - "grad_norm": 4.7226057520241955, - "learning_rate": 4.0598683749881085e-06, - "loss": 0.1322, + "epoch": 1.4371646427562834, + "grad_norm": 3.778595706799023, + "learning_rate": 1.1162050722849153e-05, + "loss": 0.5312, "step": 10178 }, { - "epoch": 2.1344097295030404, - "grad_norm": 6.709355641660494, - "learning_rate": 4.058047333216469e-06, - "loss": 0.1866, + "epoch": 1.4373058458062693, + "grad_norm": 3.209460634321353, + "learning_rate": 1.1160536584195332e-05, + "loss": 0.5323, "step": 10179 }, { - "epoch": 2.134619417068568, - "grad_norm": 3.327732615023194, - "learning_rate": 4.056226595980207e-06, - "loss": 0.1068, + "epoch": 1.4374470488562552, + "grad_norm": 4.055262128367164, + "learning_rate": 1.115902241857114e-05, + "loss": 0.5667, "step": 10180 }, { - "epoch": 2.134829104634095, - "grad_norm": 4.443679062064012, - "learning_rate": 4.054406163372644e-06, - "loss": 0.1806, + "epoch": 1.437588251906241, + "grad_norm": 3.3614654244154565, + "learning_rate": 1.1157508226011768e-05, + "loss": 0.5814, "step": 10181 }, { - "epoch": 2.1350387921996226, - "grad_norm": 4.115710777901885, - "learning_rate": 4.052586035487074e-06, - "loss": 0.155, + "epoch": 1.437729454956227, + "grad_norm": 3.240685209552864, + "learning_rate": 1.1155994006552406e-05, + "loss": 0.5573, "step": 10182 }, { - "epoch": 2.13524847976515, - "grad_norm": 3.162295644815646, - "learning_rate": 4.050766212416788e-06, - "loss": 0.1166, + "epoch": 1.4378706580062128, + "grad_norm": 3.4900926883220764, + "learning_rate": 1.1154479760228242e-05, + "loss": 0.5752, "step": 10183 }, { - "epoch": 2.135458167330677, - "grad_norm": 3.521417018396423, - "learning_rate": 4.048946694255048e-06, - "loss": 0.1495, + "epoch": 1.4380118610561987, + "grad_norm": 3.880972088849451, + "learning_rate": 1.1152965487074466e-05, + "loss": 0.5338, "step": 10184 }, { - "epoch": 2.1356678548962047, - "grad_norm": 3.008685069421243, - "learning_rate": 4.047127481095116e-06, - "loss": 0.1208, + "epoch": 1.4381530641061846, + "grad_norm": 3.0956490516356077, + "learning_rate": 1.115145118712627e-05, + "loss": 0.5092, "step": 10185 }, { - "epoch": 2.1358775424617322, - "grad_norm": 3.9943109226530047, - "learning_rate": 4.045308573030224e-06, - "loss": 0.1571, + "epoch": 1.4382942671561705, + "grad_norm": 3.8312963897792622, + "learning_rate": 1.1149936860418846e-05, + "loss": 0.6713, "step": 10186 }, { - "epoch": 2.1360872300272593, - "grad_norm": 3.4089055212668744, - "learning_rate": 4.0434899701535935e-06, - "loss": 0.1364, + "epoch": 1.4384354702061564, + "grad_norm": 3.6708463790489816, + "learning_rate": 1.1148422506987385e-05, + "loss": 0.6105, "step": 10187 }, { - "epoch": 2.136296917592787, - "grad_norm": 6.292299313163938, - "learning_rate": 4.041671672558432e-06, - "loss": 0.2154, + "epoch": 1.4385766732561422, + "grad_norm": 3.589515968594199, + "learning_rate": 1.1146908126867082e-05, + "loss": 0.5822, "step": 10188 }, { - "epoch": 2.136506605158314, - "grad_norm": 4.144706815610816, - "learning_rate": 4.03985368033794e-06, - "loss": 0.1283, + "epoch": 1.4387178763061281, + "grad_norm": 3.8727669037905716, + "learning_rate": 1.114539372009313e-05, + "loss": 0.5173, "step": 10189 }, { - "epoch": 2.1367162927238414, - "grad_norm": 3.355070961173317, - "learning_rate": 4.038035993585275e-06, - "loss": 0.138, + "epoch": 1.438859079356114, + "grad_norm": 3.362661750179107, + "learning_rate": 1.1143879286700723e-05, + "loss": 0.5349, "step": 10190 }, { - "epoch": 2.136925980289369, - "grad_norm": 3.7280083949988345, - "learning_rate": 4.0362186123936075e-06, - "loss": 0.1265, + "epoch": 1.4390002824061, + "grad_norm": 3.109731969504989, + "learning_rate": 1.1142364826725055e-05, + "loss": 0.5878, "step": 10191 }, { - "epoch": 2.137135667854896, - "grad_norm": 4.056093289675683, - "learning_rate": 4.034401536856083e-06, - "loss": 0.1255, + "epoch": 1.4391414854560858, + "grad_norm": 3.0927715764901578, + "learning_rate": 1.1140850340201319e-05, + "loss": 0.4969, "step": 10192 }, { - "epoch": 2.1373453554204236, - "grad_norm": 4.050370649405686, - "learning_rate": 4.032584767065827e-06, - "loss": 0.1442, + "epoch": 1.4392826885060717, + "grad_norm": 3.9690771930073305, + "learning_rate": 1.1139335827164715e-05, + "loss": 0.6086, "step": 10193 }, { - "epoch": 2.137555042985951, - "grad_norm": 4.681924258824081, - "learning_rate": 4.030768303115947e-06, - "loss": 0.1692, + "epoch": 1.4394238915560575, + "grad_norm": 4.150455120590869, + "learning_rate": 1.1137821287650438e-05, + "loss": 0.6051, "step": 10194 }, { - "epoch": 2.137764730551478, - "grad_norm": 5.099969483550425, - "learning_rate": 4.028952145099549e-06, - "loss": 0.1699, + "epoch": 1.4395650946060434, + "grad_norm": 5.291669510513383, + "learning_rate": 1.1136306721693688e-05, + "loss": 0.7183, "step": 10195 }, { - "epoch": 2.1379744181170057, - "grad_norm": 3.577646197429735, - "learning_rate": 4.027136293109705e-06, - "loss": 0.1136, + "epoch": 1.4397062976560293, + "grad_norm": 3.3108907927954006, + "learning_rate": 1.113479212932966e-05, + "loss": 0.6306, "step": 10196 }, { - "epoch": 2.138184105682533, - "grad_norm": 4.8386777355703385, - "learning_rate": 4.02532074723949e-06, - "loss": 0.1738, + "epoch": 1.4398475007060152, + "grad_norm": 4.428329472629895, + "learning_rate": 1.1133277510593552e-05, + "loss": 0.7193, "step": 10197 }, { - "epoch": 2.1383937932480603, - "grad_norm": 4.750642685954514, - "learning_rate": 4.023505507581949e-06, - "loss": 0.1286, + "epoch": 1.439988703756001, + "grad_norm": 2.9972137173000797, + "learning_rate": 1.1131762865520566e-05, + "loss": 0.5454, "step": 10198 }, { - "epoch": 2.138603480813588, - "grad_norm": 5.453403422583522, - "learning_rate": 4.0216905742301125e-06, - "loss": 0.1932, + "epoch": 1.440129906805987, + "grad_norm": 3.9618604168423452, + "learning_rate": 1.1130248194145898e-05, + "loss": 0.6817, "step": 10199 }, { - "epoch": 2.138813168379115, - "grad_norm": 4.3806140621999905, - "learning_rate": 4.019875947277005e-06, - "loss": 0.1281, + "epoch": 1.4402711098559728, + "grad_norm": 3.3085697962349294, + "learning_rate": 1.1128733496504751e-05, + "loss": 0.5125, "step": 10200 }, { - "epoch": 2.1390228559446425, - "grad_norm": 3.51464371284746, - "learning_rate": 4.018061626815628e-06, - "loss": 0.1315, + "epoch": 1.4404123129059587, + "grad_norm": 3.4743132199286686, + "learning_rate": 1.1127218772632323e-05, + "loss": 0.5296, "step": 10201 }, { - "epoch": 2.13923254351017, - "grad_norm": 4.704765480258965, - "learning_rate": 4.016247612938969e-06, - "loss": 0.1655, + "epoch": 1.4405535159559446, + "grad_norm": 3.3512081699143645, + "learning_rate": 1.112570402256382e-05, + "loss": 0.6092, "step": 10202 }, { - "epoch": 2.139442231075697, - "grad_norm": 3.710578710753982, - "learning_rate": 4.014433905739994e-06, - "loss": 0.1208, + "epoch": 1.4406947190059305, + "grad_norm": 3.068384931849448, + "learning_rate": 1.1124189246334441e-05, + "loss": 0.5967, "step": 10203 }, { - "epoch": 2.1396519186412246, - "grad_norm": 2.824973012176952, - "learning_rate": 4.012620505311667e-06, - "loss": 0.1048, + "epoch": 1.4408359220559164, + "grad_norm": 3.4967763505332807, + "learning_rate": 1.1122674443979387e-05, + "loss": 0.5594, "step": 10204 }, { - "epoch": 2.139861606206752, - "grad_norm": 3.304212871752741, - "learning_rate": 4.01080741174692e-06, - "loss": 0.1333, + "epoch": 1.4409771251059023, + "grad_norm": 3.2500715496935157, + "learning_rate": 1.1121159615533865e-05, + "loss": 0.4761, "step": 10205 }, { - "epoch": 2.1400712937722792, - "grad_norm": 4.00001511207721, - "learning_rate": 4.008994625138684e-06, - "loss": 0.1345, + "epoch": 1.4411183281558881, + "grad_norm": 3.034916105626154, + "learning_rate": 1.1119644761033079e-05, + "loss": 0.4467, "step": 10206 }, { - "epoch": 2.1402809813378068, - "grad_norm": 4.2161891127346784, - "learning_rate": 4.0071821455798655e-06, - "loss": 0.1463, + "epoch": 1.441259531205874, + "grad_norm": 3.0998088830214057, + "learning_rate": 1.1118129880512233e-05, + "loss": 0.5207, "step": 10207 }, { - "epoch": 2.140490668903334, - "grad_norm": 3.8138949309434116, - "learning_rate": 4.005369973163351e-06, - "loss": 0.1548, + "epoch": 1.44140073425586, + "grad_norm": 3.7484412171881596, + "learning_rate": 1.111661497400653e-05, + "loss": 0.569, "step": 10208 }, { - "epoch": 2.1407003564688614, - "grad_norm": 4.366078323030283, - "learning_rate": 4.0035581079820244e-06, - "loss": 0.1652, + "epoch": 1.4415419373058458, + "grad_norm": 3.994928821852951, + "learning_rate": 1.1115100041551179e-05, + "loss": 0.724, "step": 10209 }, { - "epoch": 2.140910044034389, - "grad_norm": 3.7201252453507907, - "learning_rate": 4.00174655012875e-06, - "loss": 0.1329, + "epoch": 1.4416831403558317, + "grad_norm": 3.4519019507425375, + "learning_rate": 1.111358508318138e-05, + "loss": 0.6123, "step": 10210 }, { - "epoch": 2.141119731599916, - "grad_norm": 4.22883419120911, - "learning_rate": 3.999935299696363e-06, - "loss": 0.177, + "epoch": 1.4418243434058176, + "grad_norm": 3.646120758335199, + "learning_rate": 1.1112070098932348e-05, + "loss": 0.5522, "step": 10211 }, { - "epoch": 2.1413294191654435, - "grad_norm": 3.474785895417658, - "learning_rate": 3.998124356777697e-06, - "loss": 0.1242, + "epoch": 1.4419655464558034, + "grad_norm": 4.281977412438733, + "learning_rate": 1.1110555088839289e-05, + "loss": 0.7513, "step": 10212 }, { - "epoch": 2.141539106730971, - "grad_norm": 6.831029604933241, - "learning_rate": 3.996313721465572e-06, - "loss": 0.1633, + "epoch": 1.4421067495057893, + "grad_norm": 4.129462842693757, + "learning_rate": 1.1109040052937405e-05, + "loss": 0.6076, "step": 10213 }, { - "epoch": 2.141748794296498, - "grad_norm": 5.004839838913437, - "learning_rate": 3.994503393852781e-06, - "loss": 0.1742, + "epoch": 1.4422479525557752, + "grad_norm": 3.751064814271135, + "learning_rate": 1.1107524991261913e-05, + "loss": 0.668, "step": 10214 }, { - "epoch": 2.1419584818620256, - "grad_norm": 5.448940202300236, - "learning_rate": 3.992693374032104e-06, - "loss": 0.1694, + "epoch": 1.442389155605761, + "grad_norm": 3.1525467752235694, + "learning_rate": 1.1106009903848016e-05, + "loss": 0.5598, "step": 10215 }, { - "epoch": 2.1421681694275527, - "grad_norm": 3.97149105611284, - "learning_rate": 3.990883662096314e-06, - "loss": 0.1518, + "epoch": 1.442530358655747, + "grad_norm": 3.699244231313742, + "learning_rate": 1.1104494790730929e-05, + "loss": 0.5881, "step": 10216 }, { - "epoch": 2.1423778569930803, - "grad_norm": 4.060765155734436, - "learning_rate": 3.9890742581381545e-06, - "loss": 0.1706, + "epoch": 1.4426715617057329, + "grad_norm": 2.7778617164859587, + "learning_rate": 1.1102979651945858e-05, + "loss": 0.5036, "step": 10217 }, { - "epoch": 2.142587544558608, - "grad_norm": 3.2430490894378527, - "learning_rate": 3.98726516225037e-06, - "loss": 0.1362, + "epoch": 1.4428127647557187, + "grad_norm": 3.5449157844731625, + "learning_rate": 1.1101464487528017e-05, + "loss": 0.5324, "step": 10218 }, { - "epoch": 2.142797232124135, - "grad_norm": 4.474665573475698, - "learning_rate": 3.9854563745256734e-06, - "loss": 0.1348, + "epoch": 1.4429539678057046, + "grad_norm": 3.609013425091384, + "learning_rate": 1.1099949297512614e-05, + "loss": 0.4805, "step": 10219 }, { - "epoch": 2.1430069196896624, - "grad_norm": 3.6961830178120034, - "learning_rate": 3.983647895056766e-06, - "loss": 0.1207, + "epoch": 1.4430951708556905, + "grad_norm": 3.859529008220305, + "learning_rate": 1.1098434081934871e-05, + "loss": 0.5877, "step": 10220 }, { - "epoch": 2.14321660725519, - "grad_norm": 4.937679099466792, - "learning_rate": 3.981839723936338e-06, - "loss": 0.1862, + "epoch": 1.4432363739056764, + "grad_norm": 4.185219422689172, + "learning_rate": 1.1096918840829987e-05, + "loss": 0.6904, "step": 10221 }, { - "epoch": 2.143426294820717, - "grad_norm": 3.690501765546378, - "learning_rate": 3.980031861257066e-06, - "loss": 0.128, + "epoch": 1.4433775769556623, + "grad_norm": 3.248507218942643, + "learning_rate": 1.1095403574233185e-05, + "loss": 0.5747, "step": 10222 }, { - "epoch": 2.1436359823862445, - "grad_norm": 4.3130680932160095, - "learning_rate": 3.978224307111603e-06, - "loss": 0.1468, + "epoch": 1.4435187800056481, + "grad_norm": 3.890015793755085, + "learning_rate": 1.109388828217968e-05, + "loss": 0.6095, "step": 10223 }, { - "epoch": 2.143845669951772, - "grad_norm": 4.945284649204034, - "learning_rate": 3.976417061592585e-06, - "loss": 0.1742, + "epoch": 1.443659983055634, + "grad_norm": 3.7417523024313013, + "learning_rate": 1.1092372964704681e-05, + "loss": 0.6152, "step": 10224 }, { - "epoch": 2.144055357517299, - "grad_norm": 3.727374764555077, - "learning_rate": 3.974610124792642e-06, - "loss": 0.1529, + "epoch": 1.44380118610562, + "grad_norm": 3.1791941541936537, + "learning_rate": 1.1090857621843407e-05, + "loss": 0.5531, "step": 10225 }, { - "epoch": 2.1442650450828267, - "grad_norm": 4.5375334455905065, - "learning_rate": 3.972803496804378e-06, - "loss": 0.1637, + "epoch": 1.4439423891556058, + "grad_norm": 3.5683323594358862, + "learning_rate": 1.1089342253631074e-05, + "loss": 0.5281, "step": 10226 }, { - "epoch": 2.1444747326483538, - "grad_norm": 3.709471930166969, - "learning_rate": 3.970997177720392e-06, - "loss": 0.141, + "epoch": 1.4440835922055917, + "grad_norm": 3.345598021586362, + "learning_rate": 1.1087826860102895e-05, + "loss": 0.504, "step": 10227 }, { - "epoch": 2.1446844202138813, - "grad_norm": 4.079453304295589, - "learning_rate": 3.969191167633257e-06, - "loss": 0.1257, + "epoch": 1.4442247952555776, + "grad_norm": 2.976035220109421, + "learning_rate": 1.108631144129409e-05, + "loss": 0.478, "step": 10228 }, { - "epoch": 2.144894107779409, - "grad_norm": 4.544578940941523, - "learning_rate": 3.967385466635531e-06, - "loss": 0.1664, + "epoch": 1.4443659983055634, + "grad_norm": 3.342457219337774, + "learning_rate": 1.108479599723988e-05, + "loss": 0.5642, "step": 10229 }, { - "epoch": 2.145103795344936, - "grad_norm": 4.439339712354099, - "learning_rate": 3.9655800748197645e-06, - "loss": 0.1424, + "epoch": 1.4445072013555493, + "grad_norm": 3.3091616659156737, + "learning_rate": 1.1083280527975475e-05, + "loss": 0.5695, "step": 10230 }, { - "epoch": 2.1453134829104634, - "grad_norm": 3.951014624152122, - "learning_rate": 3.963774992278491e-06, - "loss": 0.1577, + "epoch": 1.4446484044055352, + "grad_norm": 3.6218192690139, + "learning_rate": 1.10817650335361e-05, + "loss": 0.6577, "step": 10231 }, { - "epoch": 2.145523170475991, - "grad_norm": 3.303602404309969, - "learning_rate": 3.961970219104212e-06, - "loss": 0.12, + "epoch": 1.444789607455521, + "grad_norm": 3.1510252401785306, + "learning_rate": 1.1080249513956973e-05, + "loss": 0.4891, "step": 10232 }, { - "epoch": 2.145732858041518, - "grad_norm": 3.541266713995233, - "learning_rate": 3.960165755389431e-06, - "loss": 0.144, + "epoch": 1.444930810505507, + "grad_norm": 2.599431962433065, + "learning_rate": 1.1078733969273315e-05, + "loss": 0.5103, "step": 10233 }, { - "epoch": 2.1459425456070456, - "grad_norm": 3.6086508076778285, - "learning_rate": 3.958361601226635e-06, - "loss": 0.146, + "epoch": 1.4450720135554929, + "grad_norm": 3.582409456931109, + "learning_rate": 1.1077218399520344e-05, + "loss": 0.5087, "step": 10234 }, { - "epoch": 2.1461522331725726, - "grad_norm": 3.071972091181923, - "learning_rate": 3.956557756708285e-06, - "loss": 0.1234, + "epoch": 1.4452132166054787, + "grad_norm": 3.1594310607993124, + "learning_rate": 1.107570280473328e-05, + "loss": 0.5941, "step": 10235 }, { - "epoch": 2.1463619207381, - "grad_norm": 5.557180803789155, - "learning_rate": 3.954754221926827e-06, - "loss": 0.2322, + "epoch": 1.4453544196554646, + "grad_norm": 3.160235914729992, + "learning_rate": 1.1074187184947351e-05, + "loss": 0.5561, "step": 10236 }, { - "epoch": 2.1465716083036277, - "grad_norm": 4.645234789793375, - "learning_rate": 3.952950996974706e-06, - "loss": 0.1665, + "epoch": 1.4454956227054505, + "grad_norm": 3.8734880272602736, + "learning_rate": 1.1072671540197777e-05, + "loss": 0.5561, "step": 10237 }, { - "epoch": 2.146781295869155, - "grad_norm": 4.644390479842836, - "learning_rate": 3.9511480819443295e-06, - "loss": 0.1568, + "epoch": 1.4456368257554364, + "grad_norm": 3.6081771343340634, + "learning_rate": 1.1071155870519777e-05, + "loss": 0.5966, "step": 10238 }, { - "epoch": 2.1469909834346823, - "grad_norm": 3.844798241102887, - "learning_rate": 3.949345476928109e-06, - "loss": 0.1726, + "epoch": 1.4457780288054223, + "grad_norm": 3.6605425834216976, + "learning_rate": 1.1069640175948577e-05, + "loss": 0.7157, "step": 10239 }, { - "epoch": 2.14720067100021, - "grad_norm": 4.0235761240114645, - "learning_rate": 3.947543182018427e-06, - "loss": 0.1423, + "epoch": 1.4459192318554082, + "grad_norm": 3.3878699621693844, + "learning_rate": 1.1068124456519402e-05, + "loss": 0.6234, "step": 10240 }, { - "epoch": 2.147410358565737, - "grad_norm": 5.305409190296219, - "learning_rate": 3.945741197307651e-06, - "loss": 0.1742, + "epoch": 1.446060434905394, + "grad_norm": 3.844983757105961, + "learning_rate": 1.1066608712267475e-05, + "loss": 0.6527, "step": 10241 }, { - "epoch": 2.1476200461312644, - "grad_norm": 3.12150027658663, - "learning_rate": 3.943939522888138e-06, - "loss": 0.1212, + "epoch": 1.44620163795538, + "grad_norm": 4.155255481025409, + "learning_rate": 1.1065092943228024e-05, + "loss": 0.6436, "step": 10242 }, { - "epoch": 2.147829733696792, - "grad_norm": 4.4521856609745525, - "learning_rate": 3.942138158852233e-06, - "loss": 0.1695, + "epoch": 1.4463428410053658, + "grad_norm": 3.5204935428486217, + "learning_rate": 1.1063577149436274e-05, + "loss": 0.6684, "step": 10243 }, { - "epoch": 2.148039421262319, - "grad_norm": 4.350284112977845, - "learning_rate": 3.940337105292255e-06, - "loss": 0.17, + "epoch": 1.4464840440553517, + "grad_norm": 3.7783460641116573, + "learning_rate": 1.1062061330927445e-05, + "loss": 0.685, "step": 10244 }, { - "epoch": 2.1482491088278466, - "grad_norm": 3.623826233435792, - "learning_rate": 3.938536362300505e-06, - "loss": 0.1371, + "epoch": 1.4466252471053376, + "grad_norm": 3.6386803765460276, + "learning_rate": 1.1060545487736772e-05, + "loss": 0.5434, "step": 10245 }, { - "epoch": 2.1484587963933737, - "grad_norm": 4.881080766673594, - "learning_rate": 3.936735929969281e-06, - "loss": 0.1974, + "epoch": 1.4467664501553235, + "grad_norm": 3.3790685106411202, + "learning_rate": 1.1059029619899483e-05, + "loss": 0.5858, "step": 10246 }, { - "epoch": 2.148668483958901, - "grad_norm": 5.026858077527963, - "learning_rate": 3.93493580839086e-06, - "loss": 0.1772, + "epoch": 1.4469076532053093, + "grad_norm": 3.218739877938669, + "learning_rate": 1.1057513727450798e-05, + "loss": 0.5692, "step": 10247 }, { - "epoch": 2.1488781715244287, - "grad_norm": 2.7366228822996934, - "learning_rate": 3.933135997657497e-06, - "loss": 0.0839, + "epoch": 1.4470488562552952, + "grad_norm": 4.138984876204369, + "learning_rate": 1.1055997810425954e-05, + "loss": 0.7225, "step": 10248 }, { - "epoch": 2.149087859089956, - "grad_norm": 4.393154899593718, - "learning_rate": 3.931336497861433e-06, - "loss": 0.1595, + "epoch": 1.447190059305281, + "grad_norm": 3.4874890445087234, + "learning_rate": 1.1054481868860177e-05, + "loss": 0.5904, "step": 10249 }, { - "epoch": 2.1492975466554833, - "grad_norm": 3.771485766371555, - "learning_rate": 3.929537309094904e-06, - "loss": 0.1356, + "epoch": 1.447331262355267, + "grad_norm": 4.085315244710668, + "learning_rate": 1.1052965902788694e-05, + "loss": 0.6542, "step": 10250 }, { - "epoch": 2.149507234221011, - "grad_norm": 4.106900801554359, - "learning_rate": 3.927738431450116e-06, - "loss": 0.1572, + "epoch": 1.4474724654052529, + "grad_norm": 3.4982887998917516, + "learning_rate": 1.1051449912246742e-05, + "loss": 0.4717, "step": 10251 }, { - "epoch": 2.149716921786538, - "grad_norm": 4.268596240966985, - "learning_rate": 3.925939865019262e-06, - "loss": 0.1492, + "epoch": 1.4476136684552388, + "grad_norm": 3.5071305921382145, + "learning_rate": 1.1049933897269547e-05, + "loss": 0.6296, "step": 10252 }, { - "epoch": 2.1499266093520655, - "grad_norm": 4.860702241878373, - "learning_rate": 3.924141609894528e-06, - "loss": 0.1902, + "epoch": 1.4477548715052246, + "grad_norm": 3.641597912661041, + "learning_rate": 1.1048417857892339e-05, + "loss": 0.6142, "step": 10253 }, { - "epoch": 2.1501362969175926, - "grad_norm": 4.520183363697384, - "learning_rate": 3.92234366616807e-06, - "loss": 0.1666, + "epoch": 1.4478960745552105, + "grad_norm": 3.3152501380191715, + "learning_rate": 1.1046901794150358e-05, + "loss": 0.5459, "step": 10254 }, { - "epoch": 2.15034598448312, - "grad_norm": 3.8261630062810554, - "learning_rate": 3.920546033932044e-06, - "loss": 0.1639, + "epoch": 1.4480372776051964, + "grad_norm": 3.3479763042900834, + "learning_rate": 1.1045385706078826e-05, + "loss": 0.5681, "step": 10255 }, { - "epoch": 2.1505556720486476, - "grad_norm": 3.8025096376025047, - "learning_rate": 3.918748713278579e-06, - "loss": 0.1308, + "epoch": 1.4481784806551823, + "grad_norm": 3.8582218447196195, + "learning_rate": 1.1043869593712984e-05, + "loss": 0.6682, "step": 10256 }, { - "epoch": 2.1507653596141747, - "grad_norm": 3.368423962803322, - "learning_rate": 3.916951704299786e-06, - "loss": 0.145, + "epoch": 1.448319683705168, + "grad_norm": 3.802552777334054, + "learning_rate": 1.1042353457088061e-05, + "loss": 0.6307, "step": 10257 }, { - "epoch": 2.1509750471797022, - "grad_norm": 3.604569715272275, - "learning_rate": 3.91515500708777e-06, - "loss": 0.1484, + "epoch": 1.4484608867551538, + "grad_norm": 3.3372462847401443, + "learning_rate": 1.1040837296239298e-05, + "loss": 0.6404, "step": 10258 }, { - "epoch": 2.1511847347452298, - "grad_norm": 4.399770835695097, - "learning_rate": 3.913358621734615e-06, - "loss": 0.1305, + "epoch": 1.4486020898051397, + "grad_norm": 3.801508859395669, + "learning_rate": 1.1039321111201925e-05, + "loss": 0.6327, "step": 10259 }, { - "epoch": 2.151394422310757, - "grad_norm": 3.866625956503527, - "learning_rate": 3.9115625483323885e-06, - "loss": 0.1026, + "epoch": 1.4487432928551256, + "grad_norm": 3.7170545622961635, + "learning_rate": 1.1037804902011175e-05, + "loss": 0.6207, "step": 10260 }, { - "epoch": 2.1516041098762844, - "grad_norm": 4.513341007626147, - "learning_rate": 3.909766786973137e-06, - "loss": 0.158, + "epoch": 1.4488844959051115, + "grad_norm": 3.7115005010479125, + "learning_rate": 1.103628866870229e-05, + "loss": 0.5959, "step": 10261 }, { - "epoch": 2.151813797441812, - "grad_norm": 4.601239239501308, - "learning_rate": 3.907971337748904e-06, - "loss": 0.158, + "epoch": 1.4490256989550974, + "grad_norm": 3.7759117772848207, + "learning_rate": 1.1034772411310503e-05, + "loss": 0.5902, "step": 10262 }, { - "epoch": 2.152023485007339, - "grad_norm": 3.4288479114325234, - "learning_rate": 3.9061762007517045e-06, - "loss": 0.1248, + "epoch": 1.4491669020050832, + "grad_norm": 3.4630420107690827, + "learning_rate": 1.1033256129871053e-05, + "loss": 0.5829, "step": 10263 }, { - "epoch": 2.1522331725728665, - "grad_norm": 4.668719233752378, - "learning_rate": 3.904381376073546e-06, - "loss": 0.1449, + "epoch": 1.4493081050550691, + "grad_norm": 3.586183584217599, + "learning_rate": 1.1031739824419175e-05, + "loss": 0.5029, "step": 10264 }, { - "epoch": 2.1524428601383936, - "grad_norm": 3.8480362376539543, - "learning_rate": 3.9025868638064135e-06, - "loss": 0.1363, + "epoch": 1.449449308105055, + "grad_norm": 3.876543347961651, + "learning_rate": 1.103022349499011e-05, + "loss": 0.6651, "step": 10265 }, { - "epoch": 2.152652547703921, - "grad_norm": 4.643328068394935, - "learning_rate": 3.900792664042278e-06, - "loss": 0.1526, + "epoch": 1.449590511155041, + "grad_norm": 2.796508523258564, + "learning_rate": 1.1028707141619095e-05, + "loss": 0.4626, "step": 10266 }, { - "epoch": 2.1528622352694486, - "grad_norm": 3.577727022641396, - "learning_rate": 3.898998776873095e-06, - "loss": 0.1487, + "epoch": 1.4497317142050268, + "grad_norm": 4.148759062646071, + "learning_rate": 1.102719076434137e-05, + "loss": 0.6524, "step": 10267 }, { - "epoch": 2.1530719228349757, - "grad_norm": 5.218745700254782, - "learning_rate": 3.897205202390815e-06, - "loss": 0.1854, + "epoch": 1.4498729172550127, + "grad_norm": 3.1981412789287296, + "learning_rate": 1.1025674363192179e-05, + "loss": 0.5829, "step": 10268 }, { - "epoch": 2.1532816104005033, - "grad_norm": 3.925820989668583, - "learning_rate": 3.895411940687345e-06, - "loss": 0.1617, + "epoch": 1.4500141203049985, + "grad_norm": 3.336529985561878, + "learning_rate": 1.1024157938206755e-05, + "loss": 0.623, "step": 10269 }, { - "epoch": 2.153491297966031, - "grad_norm": 4.708899920666233, - "learning_rate": 3.893618991854602e-06, - "loss": 0.181, + "epoch": 1.4501553233549844, + "grad_norm": 3.37614490203797, + "learning_rate": 1.1022641489420342e-05, + "loss": 0.6336, "step": 10270 }, { - "epoch": 2.153700985531558, - "grad_norm": 4.994359743371025, - "learning_rate": 3.891826355984479e-06, - "loss": 0.1592, + "epoch": 1.4502965264049703, + "grad_norm": 3.4216815206577635, + "learning_rate": 1.1021125016868189e-05, + "loss": 0.5397, "step": 10271 }, { - "epoch": 2.1539106730970854, - "grad_norm": 4.9125148361272615, - "learning_rate": 3.89003403316885e-06, - "loss": 0.2269, + "epoch": 1.4504377294549562, + "grad_norm": 4.081607858482453, + "learning_rate": 1.1019608520585525e-05, + "loss": 0.7116, "step": 10272 }, { - "epoch": 2.1541203606626125, - "grad_norm": 3.7447386156147924, - "learning_rate": 3.88824202349957e-06, - "loss": 0.1523, + "epoch": 1.450578932504942, + "grad_norm": 3.3634227207403797, + "learning_rate": 1.1018092000607599e-05, + "loss": 0.5857, "step": 10273 }, { - "epoch": 2.15433004822814, - "grad_norm": 4.1215467607407845, - "learning_rate": 3.886450327068491e-06, - "loss": 0.1349, + "epoch": 1.450720135554928, + "grad_norm": 3.2964944355924493, + "learning_rate": 1.1016575456969658e-05, + "loss": 0.5394, "step": 10274 }, { - "epoch": 2.1545397357936675, - "grad_norm": 3.3658178408843935, - "learning_rate": 3.884658943967432e-06, - "loss": 0.1401, + "epoch": 1.4508613386049138, + "grad_norm": 3.4346688129993446, + "learning_rate": 1.1015058889706942e-05, + "loss": 0.6222, "step": 10275 }, { - "epoch": 2.1547494233591946, - "grad_norm": 4.544619638715648, - "learning_rate": 3.882867874288212e-06, - "loss": 0.1624, + "epoch": 1.4510025416548997, + "grad_norm": 3.456330243041741, + "learning_rate": 1.1013542298854696e-05, + "loss": 0.6402, "step": 10276 }, { - "epoch": 2.154959110924722, - "grad_norm": 5.357227598319061, - "learning_rate": 3.8810771181226235e-06, - "loss": 0.1793, + "epoch": 1.4511437447048856, + "grad_norm": 3.1236848332178835, + "learning_rate": 1.1012025684448162e-05, + "loss": 0.4698, "step": 10277 }, { - "epoch": 2.1551687984902497, - "grad_norm": 3.6002854624584697, - "learning_rate": 3.879286675562441e-06, - "loss": 0.143, + "epoch": 1.4512849477548715, + "grad_norm": 2.8406817123197703, + "learning_rate": 1.101050904652259e-05, + "loss": 0.5285, "step": 10278 }, { - "epoch": 2.1553784860557768, - "grad_norm": 3.9447058019985612, - "learning_rate": 3.877496546699433e-06, - "loss": 0.1259, + "epoch": 1.4514261508048574, + "grad_norm": 3.5626459349515405, + "learning_rate": 1.1008992385113224e-05, + "loss": 0.5626, "step": 10279 }, { - "epoch": 2.1555881736213043, - "grad_norm": 5.090011686312419, - "learning_rate": 3.875706731625349e-06, - "loss": 0.193, + "epoch": 1.4515673538548433, + "grad_norm": 4.292306082624468, + "learning_rate": 1.1007475700255313e-05, + "loss": 0.7603, "step": 10280 }, { - "epoch": 2.155797861186832, - "grad_norm": 4.0802611252451655, - "learning_rate": 3.8739172304319185e-06, - "loss": 0.1469, + "epoch": 1.4517085569048291, + "grad_norm": 3.236743585812309, + "learning_rate": 1.1005958991984096e-05, + "loss": 0.4301, "step": 10281 }, { - "epoch": 2.156007548752359, - "grad_norm": 6.063055375557348, - "learning_rate": 3.87212804321085e-06, - "loss": 0.1772, + "epoch": 1.451849759954815, + "grad_norm": 4.105005606777591, + "learning_rate": 1.1004442260334832e-05, + "loss": 0.6253, "step": 10282 }, { - "epoch": 2.1562172363178864, - "grad_norm": 4.621646894552943, - "learning_rate": 3.870339170053851e-06, - "loss": 0.1436, + "epoch": 1.451990963004801, + "grad_norm": 3.1024464325030516, + "learning_rate": 1.1002925505342761e-05, + "loss": 0.4616, "step": 10283 }, { - "epoch": 2.156426923883414, - "grad_norm": 4.3106142211867855, - "learning_rate": 3.868550611052598e-06, - "loss": 0.1449, + "epoch": 1.4521321660547868, + "grad_norm": 2.8474071568495902, + "learning_rate": 1.1001408727043135e-05, + "loss": 0.5353, "step": 10284 }, { - "epoch": 2.156636611448941, - "grad_norm": 4.019818729487891, - "learning_rate": 3.8667623662987655e-06, - "loss": 0.1374, + "epoch": 1.4522733691047727, + "grad_norm": 3.704881012967332, + "learning_rate": 1.0999891925471205e-05, + "loss": 0.6117, "step": 10285 }, { - "epoch": 2.1568462990144686, - "grad_norm": 4.967431408251424, - "learning_rate": 3.864974435883997e-06, - "loss": 0.1427, + "epoch": 1.4524145721547586, + "grad_norm": 3.711059401069895, + "learning_rate": 1.0998375100662215e-05, + "loss": 0.5509, "step": 10286 }, { - "epoch": 2.1570559865799956, - "grad_norm": 3.894330024542563, - "learning_rate": 3.863186819899927e-06, - "loss": 0.158, + "epoch": 1.4525557752047444, + "grad_norm": 4.105082232695406, + "learning_rate": 1.0996858252651419e-05, + "loss": 0.6737, "step": 10287 }, { - "epoch": 2.157265674145523, - "grad_norm": 4.3751328910075875, - "learning_rate": 3.861399518438176e-06, - "loss": 0.1656, + "epoch": 1.4526969782547303, + "grad_norm": 4.124078518220901, + "learning_rate": 1.099534138147407e-05, + "loss": 0.6554, "step": 10288 }, { - "epoch": 2.1574753617110507, - "grad_norm": 3.5823785221965263, - "learning_rate": 3.859612531590353e-06, - "loss": 0.1329, + "epoch": 1.4528381813047162, + "grad_norm": 4.378122190321998, + "learning_rate": 1.0993824487165416e-05, + "loss": 0.7573, "step": 10289 }, { - "epoch": 2.157685049276578, - "grad_norm": 3.889814684066467, - "learning_rate": 3.857825859448031e-06, - "loss": 0.1489, + "epoch": 1.452979384354702, + "grad_norm": 3.3598573413751107, + "learning_rate": 1.0992307569760709e-05, + "loss": 0.5607, "step": 10290 }, { - "epoch": 2.1578947368421053, - "grad_norm": 4.9848592709399915, - "learning_rate": 3.856039502102788e-06, - "loss": 0.2092, + "epoch": 1.453120587404688, + "grad_norm": 3.4125949641752857, + "learning_rate": 1.0990790629295204e-05, + "loss": 0.5449, "step": 10291 }, { - "epoch": 2.1581044244076324, - "grad_norm": 3.7051199753307573, - "learning_rate": 3.854253459646179e-06, - "loss": 0.1072, + "epoch": 1.4532617904546739, + "grad_norm": 4.044745567078359, + "learning_rate": 1.0989273665804154e-05, + "loss": 0.6825, "step": 10292 }, { - "epoch": 2.15831411197316, - "grad_norm": 3.321550239048246, - "learning_rate": 3.852467732169739e-06, - "loss": 0.1103, + "epoch": 1.4534029935046597, + "grad_norm": 3.29475211094661, + "learning_rate": 1.0987756679322807e-05, + "loss": 0.527, "step": 10293 }, { - "epoch": 2.1585237995386874, - "grad_norm": 4.333261889831508, - "learning_rate": 3.850682319764988e-06, - "loss": 0.1382, + "epoch": 1.4535441965546456, + "grad_norm": 3.216781855239586, + "learning_rate": 1.0986239669886425e-05, + "loss": 0.4855, "step": 10294 }, { - "epoch": 2.1587334871042145, - "grad_norm": 4.9737240282806345, - "learning_rate": 3.848897222523436e-06, - "loss": 0.176, + "epoch": 1.4536853996046315, + "grad_norm": 3.961603309861623, + "learning_rate": 1.0984722637530258e-05, + "loss": 0.6702, "step": 10295 }, { - "epoch": 2.158943174669742, - "grad_norm": 5.8139843942408485, - "learning_rate": 3.847112440536567e-06, - "loss": 0.1637, + "epoch": 1.4538266026546174, + "grad_norm": 4.012340623267666, + "learning_rate": 1.0983205582289563e-05, + "loss": 0.7655, "step": 10296 }, { - "epoch": 2.1591528622352696, - "grad_norm": 4.893167442342231, - "learning_rate": 3.845327973895859e-06, - "loss": 0.1298, + "epoch": 1.4539678057046033, + "grad_norm": 3.522880308302813, + "learning_rate": 1.0981688504199595e-05, + "loss": 0.5083, "step": 10297 }, { - "epoch": 2.1593625498007967, - "grad_norm": 4.183142974799366, - "learning_rate": 3.843543822692769e-06, - "loss": 0.1383, + "epoch": 1.4541090087545891, + "grad_norm": 2.904759395936941, + "learning_rate": 1.098017140329561e-05, + "loss": 0.4356, "step": 10298 }, { - "epoch": 2.159572237366324, - "grad_norm": 3.5153568968269795, - "learning_rate": 3.841759987018731e-06, - "loss": 0.1328, + "epoch": 1.454250211804575, + "grad_norm": 4.222084561270195, + "learning_rate": 1.0978654279612862e-05, + "loss": 0.7376, "step": 10299 }, { - "epoch": 2.1597819249318517, - "grad_norm": 4.3158992023968485, - "learning_rate": 3.839976466965176e-06, - "loss": 0.1861, + "epoch": 1.454391414854561, + "grad_norm": 3.9992250996167775, + "learning_rate": 1.0977137133186613e-05, + "loss": 0.5776, "step": 10300 }, { - "epoch": 2.159991612497379, - "grad_norm": 3.869353492394298, - "learning_rate": 3.838193262623514e-06, - "loss": 0.1166, + "epoch": 1.4545326179045468, + "grad_norm": 3.9470818868904813, + "learning_rate": 1.0975619964052118e-05, + "loss": 0.6164, "step": 10301 }, { - "epoch": 2.1602013000629063, - "grad_norm": 3.833974048898893, - "learning_rate": 3.836410374085134e-06, - "loss": 0.1196, + "epoch": 1.4546738209545327, + "grad_norm": 3.2716421620549934, + "learning_rate": 1.0974102772244638e-05, + "loss": 0.6067, "step": 10302 }, { - "epoch": 2.160410987628434, - "grad_norm": 4.710967247269625, - "learning_rate": 3.834627801441409e-06, - "loss": 0.1544, + "epoch": 1.4548150240045186, + "grad_norm": 3.925522801048943, + "learning_rate": 1.097258555779943e-05, + "loss": 0.6954, "step": 10303 }, { - "epoch": 2.160620675193961, - "grad_norm": 4.500952119278785, - "learning_rate": 3.832845544783703e-06, - "loss": 0.1333, + "epoch": 1.4549562270545044, + "grad_norm": 4.230996507642602, + "learning_rate": 1.0971068320751753e-05, + "loss": 0.5994, "step": 10304 }, { - "epoch": 2.1608303627594885, - "grad_norm": 3.540504490872378, - "learning_rate": 3.831063604203363e-06, - "loss": 0.1148, + "epoch": 1.4550974301044903, + "grad_norm": 2.769565190211379, + "learning_rate": 1.096955106113687e-05, + "loss": 0.3795, "step": 10305 }, { - "epoch": 2.1610400503250156, - "grad_norm": 4.065927088944747, - "learning_rate": 3.829281979791711e-06, - "loss": 0.1458, + "epoch": 1.4552386331544762, + "grad_norm": 3.2421017341293767, + "learning_rate": 1.0968033778990038e-05, + "loss": 0.5702, "step": 10306 }, { - "epoch": 2.161249737890543, - "grad_norm": 3.5827022833170776, - "learning_rate": 3.8275006716400585e-06, - "loss": 0.1381, + "epoch": 1.455379836204462, + "grad_norm": 3.234909043921726, + "learning_rate": 1.0966516474346515e-05, + "loss": 0.5278, "step": 10307 }, { - "epoch": 2.1614594254560706, - "grad_norm": 3.7723839104165453, - "learning_rate": 3.825719679839706e-06, - "loss": 0.1517, + "epoch": 1.455521039254448, + "grad_norm": 3.3577658470041847, + "learning_rate": 1.096499914724157e-05, + "loss": 0.5446, "step": 10308 }, { - "epoch": 2.1616691130215977, - "grad_norm": 3.8250447005732915, - "learning_rate": 3.823939004481923e-06, - "loss": 0.1432, + "epoch": 1.4556622423044336, + "grad_norm": 3.407952393923366, + "learning_rate": 1.0963481797710465e-05, + "loss": 0.518, "step": 10309 }, { - "epoch": 2.1618788005871252, - "grad_norm": 3.9967357353104327, - "learning_rate": 3.822158645657983e-06, - "loss": 0.1532, + "epoch": 1.4558034453544195, + "grad_norm": 3.9776042585345466, + "learning_rate": 1.0961964425788452e-05, + "loss": 0.6584, "step": 10310 }, { - "epoch": 2.1620884881526528, - "grad_norm": 3.8634173472455173, - "learning_rate": 3.820378603459127e-06, - "loss": 0.1628, + "epoch": 1.4559446484044054, + "grad_norm": 3.9229993359686564, + "learning_rate": 1.0960447031510806e-05, + "loss": 0.6244, "step": 10311 }, { - "epoch": 2.16229817571818, - "grad_norm": 3.323198722801173, - "learning_rate": 3.818598877976583e-06, - "loss": 0.111, + "epoch": 1.4560858514543913, + "grad_norm": 3.7691702152409166, + "learning_rate": 1.0958929614912782e-05, + "loss": 0.5441, "step": 10312 }, { - "epoch": 2.1625078632837074, - "grad_norm": 3.580256053677794, - "learning_rate": 3.816819469301569e-06, - "loss": 0.1206, + "epoch": 1.4562270545043772, + "grad_norm": 4.336623781477096, + "learning_rate": 1.0957412176029654e-05, + "loss": 0.8348, "step": 10313 }, { - "epoch": 2.1627175508492344, - "grad_norm": 3.63978966824555, - "learning_rate": 3.815040377525283e-06, - "loss": 0.1163, + "epoch": 1.456368257554363, + "grad_norm": 3.1462450253665852, + "learning_rate": 1.0955894714896675e-05, + "loss": 0.5004, "step": 10314 }, { - "epoch": 2.162927238414762, - "grad_norm": 4.8457774877704285, - "learning_rate": 3.8132616027389e-06, - "loss": 0.1808, + "epoch": 1.456509460604349, + "grad_norm": 3.2509581267816925, + "learning_rate": 1.0954377231549118e-05, + "loss": 0.4721, "step": 10315 }, { - "epoch": 2.1631369259802895, - "grad_norm": 4.911369531649282, - "learning_rate": 3.811483145033592e-06, - "loss": 0.1801, + "epoch": 1.4566506636543348, + "grad_norm": 4.061972220006804, + "learning_rate": 1.0952859726022245e-05, + "loss": 0.6344, "step": 10316 }, { - "epoch": 2.1633466135458166, - "grad_norm": 4.1665466674770535, - "learning_rate": 3.809705004500509e-06, - "loss": 0.1739, + "epoch": 1.4567918667043207, + "grad_norm": 3.2250983500476575, + "learning_rate": 1.0951342198351323e-05, + "loss": 0.4898, "step": 10317 }, { - "epoch": 2.163556301111344, - "grad_norm": 4.1553721080007, - "learning_rate": 3.8079271812307807e-06, - "loss": 0.135, + "epoch": 1.4569330697543066, + "grad_norm": 4.172205771095662, + "learning_rate": 1.094982464857162e-05, + "loss": 0.7877, "step": 10318 }, { - "epoch": 2.1637659886768716, - "grad_norm": 6.522481596317578, - "learning_rate": 3.8061496753155204e-06, - "loss": 0.1679, + "epoch": 1.4570742728042925, + "grad_norm": 3.6020383806437155, + "learning_rate": 1.0948307076718402e-05, + "loss": 0.5291, "step": 10319 }, { - "epoch": 2.1639756762423987, - "grad_norm": 4.0296596464567855, - "learning_rate": 3.8043724868458364e-06, - "loss": 0.1581, + "epoch": 1.4572154758542784, + "grad_norm": 3.3860146948272223, + "learning_rate": 1.094678948282694e-05, + "loss": 0.5806, "step": 10320 }, { - "epoch": 2.1641853638079263, - "grad_norm": 2.910459594629989, - "learning_rate": 3.8025956159128043e-06, - "loss": 0.119, + "epoch": 1.4573566789042642, + "grad_norm": 4.156249286545769, + "learning_rate": 1.0945271866932496e-05, + "loss": 0.6285, "step": 10321 }, { - "epoch": 2.164395051373454, - "grad_norm": 4.526608802515305, - "learning_rate": 3.800819062607498e-06, - "loss": 0.1678, + "epoch": 1.4574978819542501, + "grad_norm": 2.797652314536296, + "learning_rate": 1.0943754229070344e-05, + "loss": 0.3955, "step": 10322 }, { - "epoch": 2.164604738938981, - "grad_norm": 3.4757800200782345, - "learning_rate": 3.799042827020968e-06, - "loss": 0.1369, + "epoch": 1.457639085004236, + "grad_norm": 4.019988930518867, + "learning_rate": 1.094223656927575e-05, + "loss": 0.6738, "step": 10323 }, { - "epoch": 2.1648144265045084, - "grad_norm": 5.003717586994313, - "learning_rate": 3.797266909244245e-06, - "loss": 0.1943, + "epoch": 1.4577802880542219, + "grad_norm": 2.845459398148412, + "learning_rate": 1.0940718887583985e-05, + "loss": 0.4818, "step": 10324 }, { - "epoch": 2.1650241140700355, - "grad_norm": 4.417448058040557, - "learning_rate": 3.7954913093683497e-06, - "loss": 0.1388, + "epoch": 1.4579214911042078, + "grad_norm": 4.095464728730396, + "learning_rate": 1.093920118403032e-05, + "loss": 0.5949, "step": 10325 }, { - "epoch": 2.165233801635563, - "grad_norm": 4.163842074623893, - "learning_rate": 3.79371602748429e-06, - "loss": 0.1761, + "epoch": 1.4580626941541937, + "grad_norm": 3.596800392040946, + "learning_rate": 1.0937683458650029e-05, + "loss": 0.6219, "step": 10326 }, { - "epoch": 2.1654434892010905, - "grad_norm": 4.955661949231234, - "learning_rate": 3.791941063683048e-06, - "loss": 0.1589, + "epoch": 1.4582038972041795, + "grad_norm": 3.62212557203208, + "learning_rate": 1.0936165711478373e-05, + "loss": 0.6237, "step": 10327 }, { - "epoch": 2.1656531767666176, - "grad_norm": 3.3814612606396084, - "learning_rate": 3.790166418055591e-06, - "loss": 0.1166, + "epoch": 1.4583451002541654, + "grad_norm": 3.8831305825837306, + "learning_rate": 1.093464794255063e-05, + "loss": 0.6654, "step": 10328 }, { - "epoch": 2.165862864332145, - "grad_norm": 3.3215147568184524, - "learning_rate": 3.7883920906928784e-06, - "loss": 0.1182, + "epoch": 1.4584863033041513, + "grad_norm": 4.221288618480987, + "learning_rate": 1.0933130151902077e-05, + "loss": 0.6433, "step": 10329 }, { - "epoch": 2.1660725518976727, - "grad_norm": 4.115026731281144, - "learning_rate": 3.7866180816858444e-06, - "loss": 0.1514, + "epoch": 1.4586275063541372, + "grad_norm": 4.903291535950068, + "learning_rate": 1.093161233956798e-05, + "loss": 0.8723, "step": 10330 }, { - "epoch": 2.1662822394631998, - "grad_norm": 3.9539540771018995, - "learning_rate": 3.7848443911254074e-06, - "loss": 0.1438, + "epoch": 1.458768709404123, + "grad_norm": 3.5842497495379657, + "learning_rate": 1.0930094505583615e-05, + "loss": 0.6363, "step": 10331 }, { - "epoch": 2.1664919270287273, - "grad_norm": 3.944924922828241, - "learning_rate": 3.7830710191024777e-06, - "loss": 0.1426, + "epoch": 1.458909912454109, + "grad_norm": 4.227973659363782, + "learning_rate": 1.0928576649984254e-05, + "loss": 0.6225, "step": 10332 }, { - "epoch": 2.1667016145942544, - "grad_norm": 5.219824693326552, - "learning_rate": 3.7812979657079385e-06, - "loss": 0.1548, + "epoch": 1.4590511155040948, + "grad_norm": 3.843970772672388, + "learning_rate": 1.0927058772805172e-05, + "loss": 0.5726, "step": 10333 }, { - "epoch": 2.166911302159782, - "grad_norm": 6.686378470468252, - "learning_rate": 3.779525231032667e-06, - "loss": 0.2362, + "epoch": 1.4591923185540807, + "grad_norm": 2.8340116964624062, + "learning_rate": 1.0925540874081649e-05, + "loss": 0.4943, "step": 10334 }, { - "epoch": 2.1671209897253094, - "grad_norm": 4.41325012428354, - "learning_rate": 3.777752815167517e-06, - "loss": 0.1304, + "epoch": 1.4593335216040666, + "grad_norm": 3.5688549562963168, + "learning_rate": 1.0924022953848951e-05, + "loss": 0.6283, "step": 10335 }, { - "epoch": 2.1673306772908365, - "grad_norm": 4.666946070962675, - "learning_rate": 3.7759807182033236e-06, - "loss": 0.1679, + "epoch": 1.4594747246540525, + "grad_norm": 3.456899256247618, + "learning_rate": 1.092250501214236e-05, + "loss": 0.5626, "step": 10336 }, { - "epoch": 2.167540364856364, - "grad_norm": 4.61871361230585, - "learning_rate": 3.7742089402309122e-06, - "loss": 0.1526, + "epoch": 1.4596159277040384, + "grad_norm": 3.409645630217186, + "learning_rate": 1.0920987048997153e-05, + "loss": 0.5695, "step": 10337 }, { - "epoch": 2.1677500524218916, - "grad_norm": 5.5695614505724365, - "learning_rate": 3.772437481341096e-06, - "loss": 0.1771, + "epoch": 1.4597571307540242, + "grad_norm": 3.21927827517015, + "learning_rate": 1.0919469064448604e-05, + "loss": 0.6086, "step": 10338 }, { - "epoch": 2.1679597399874186, - "grad_norm": 3.8917628856711217, - "learning_rate": 3.770666341624659e-06, - "loss": 0.1568, + "epoch": 1.4598983338040101, + "grad_norm": 3.1512295950067557, + "learning_rate": 1.091795105853199e-05, + "loss": 0.5323, "step": 10339 }, { - "epoch": 2.168169427552946, - "grad_norm": 3.9307803032551276, - "learning_rate": 3.768895521172373e-06, - "loss": 0.1312, + "epoch": 1.460039536853996, + "grad_norm": 4.263606246348635, + "learning_rate": 1.0916433031282592e-05, + "loss": 0.7008, "step": 10340 }, { - "epoch": 2.1683791151184737, - "grad_norm": 4.004573833706044, - "learning_rate": 3.767125020075002e-06, - "loss": 0.1644, + "epoch": 1.460180739903982, + "grad_norm": 3.6660234701842658, + "learning_rate": 1.0914914982735682e-05, + "loss": 0.6468, "step": 10341 }, { - "epoch": 2.168588802684001, - "grad_norm": 4.852865509553495, - "learning_rate": 3.765354838423281e-06, - "loss": 0.1489, + "epoch": 1.4603219429539678, + "grad_norm": 7.799598329527165, + "learning_rate": 1.0913396912926546e-05, + "loss": 0.6673, "step": 10342 }, { - "epoch": 2.1687984902495283, - "grad_norm": 4.266909237431339, - "learning_rate": 3.7635849763079413e-06, - "loss": 0.1487, + "epoch": 1.4604631460039537, + "grad_norm": 3.50623794955251, + "learning_rate": 1.0911878821890461e-05, + "loss": 0.5584, "step": 10343 }, { - "epoch": 2.1690081778150554, - "grad_norm": 3.600150534923006, - "learning_rate": 3.761815433819688e-06, - "loss": 0.1381, + "epoch": 1.4606043490539395, + "grad_norm": 2.973710745789418, + "learning_rate": 1.0910360709662701e-05, + "loss": 0.5324, "step": 10344 }, { - "epoch": 2.169217865380583, - "grad_norm": 3.8982235636233513, - "learning_rate": 3.7600462110492097e-06, - "loss": 0.1323, + "epoch": 1.4607455521039254, + "grad_norm": 3.1027784580377142, + "learning_rate": 1.0908842576278555e-05, + "loss": 0.5893, "step": 10345 }, { - "epoch": 2.1694275529461104, - "grad_norm": 4.332766841079706, - "learning_rate": 3.758277308087186e-06, - "loss": 0.13, + "epoch": 1.4608867551539113, + "grad_norm": 3.3180762187604924, + "learning_rate": 1.0907324421773302e-05, + "loss": 0.5423, "step": 10346 }, { - "epoch": 2.1696372405116375, - "grad_norm": 4.95122904396534, - "learning_rate": 3.7565087250242825e-06, - "loss": 0.1457, + "epoch": 1.4610279582038972, + "grad_norm": 4.162682565085654, + "learning_rate": 1.0905806246182218e-05, + "loss": 0.649, "step": 10347 }, { - "epoch": 2.169846928077165, - "grad_norm": 3.9682599464666954, - "learning_rate": 3.7547404619511294e-06, - "loss": 0.1554, + "epoch": 1.461169161253883, + "grad_norm": 3.458562571212591, + "learning_rate": 1.090428804954059e-05, + "loss": 0.5893, "step": 10348 }, { - "epoch": 2.1700566156426926, - "grad_norm": 4.095760828256584, - "learning_rate": 3.752972518958359e-06, - "loss": 0.148, + "epoch": 1.461310364303869, + "grad_norm": 3.164810450700185, + "learning_rate": 1.0902769831883697e-05, + "loss": 0.4923, "step": 10349 }, { - "epoch": 2.1702663032082197, - "grad_norm": 3.5423264474596796, - "learning_rate": 3.7512048961365855e-06, - "loss": 0.134, + "epoch": 1.4614515673538548, + "grad_norm": 4.0093631410551955, + "learning_rate": 1.0901251593246822e-05, + "loss": 0.6879, "step": 10350 }, { - "epoch": 2.170475990773747, - "grad_norm": 4.731927735995504, - "learning_rate": 3.7494375935763995e-06, - "loss": 0.1553, + "epoch": 1.4615927704038407, + "grad_norm": 3.3033322653133683, + "learning_rate": 1.0899733333665252e-05, + "loss": 0.5475, "step": 10351 }, { - "epoch": 2.1706856783392743, - "grad_norm": 4.617120888454631, - "learning_rate": 3.7476706113683737e-06, - "loss": 0.181, + "epoch": 1.4617339734538266, + "grad_norm": 3.8529186288983595, + "learning_rate": 1.0898215053174268e-05, + "loss": 0.5359, "step": 10352 }, { - "epoch": 2.170895365904802, - "grad_norm": 3.3312324097953536, - "learning_rate": 3.745903949603077e-06, - "loss": 0.1435, + "epoch": 1.4618751765038125, + "grad_norm": 4.098375724514003, + "learning_rate": 1.089669675180915e-05, + "loss": 0.8594, "step": 10353 }, { - "epoch": 2.1711050534703293, - "grad_norm": 4.191360713803817, - "learning_rate": 3.7441376083710457e-06, - "loss": 0.1841, + "epoch": 1.4620163795537984, + "grad_norm": 3.6184167574785615, + "learning_rate": 1.0895178429605189e-05, + "loss": 0.5819, "step": 10354 }, { - "epoch": 2.1713147410358564, - "grad_norm": 3.981329947814955, - "learning_rate": 3.7423715877628166e-06, - "loss": 0.1473, + "epoch": 1.4621575826037843, + "grad_norm": 3.8381203701209112, + "learning_rate": 1.0893660086597668e-05, + "loss": 0.7206, "step": 10355 }, { - "epoch": 2.171524428601384, - "grad_norm": 3.9653747961666825, - "learning_rate": 3.7406058878688967e-06, - "loss": 0.1599, + "epoch": 1.4622987856537701, + "grad_norm": 2.938402918949583, + "learning_rate": 1.0892141722821873e-05, + "loss": 0.4686, "step": 10356 }, { - "epoch": 2.1717341161669115, - "grad_norm": 5.672372827672104, - "learning_rate": 3.7388405087797786e-06, - "loss": 0.19, + "epoch": 1.462439988703756, + "grad_norm": 3.698528741292727, + "learning_rate": 1.0890623338313089e-05, + "loss": 0.6042, "step": 10357 }, { - "epoch": 2.1719438037324386, - "grad_norm": 3.163269765690655, - "learning_rate": 3.737075450585943e-06, - "loss": 0.1036, + "epoch": 1.462581191753742, + "grad_norm": 2.8452363322434056, + "learning_rate": 1.0889104933106604e-05, + "loss": 0.4726, "step": 10358 }, { - "epoch": 2.172153491297966, - "grad_norm": 4.604462060597182, - "learning_rate": 3.7353107133778566e-06, - "loss": 0.1403, + "epoch": 1.4627223948037278, + "grad_norm": 3.556340544864714, + "learning_rate": 1.0887586507237702e-05, + "loss": 0.6424, "step": 10359 }, { - "epoch": 2.1723631788634936, - "grad_norm": 3.697339097810392, - "learning_rate": 3.7335462972459623e-06, - "loss": 0.1517, + "epoch": 1.4628635978537137, + "grad_norm": 4.071606828981776, + "learning_rate": 1.0886068060741676e-05, + "loss": 0.7168, "step": 10360 }, { - "epoch": 2.1725728664290207, - "grad_norm": 4.660279528361429, - "learning_rate": 3.7317822022806847e-06, - "loss": 0.1895, + "epoch": 1.4630048009036996, + "grad_norm": 4.441757937489678, + "learning_rate": 1.0884549593653808e-05, + "loss": 0.7299, "step": 10361 }, { - "epoch": 2.1727825539945482, - "grad_norm": 3.6950173092489185, - "learning_rate": 3.730018428572445e-06, - "loss": 0.1476, + "epoch": 1.4631460039536854, + "grad_norm": 3.614522493534942, + "learning_rate": 1.0883031106009393e-05, + "loss": 0.6229, "step": 10362 }, { - "epoch": 2.1729922415600753, - "grad_norm": 5.608187412786769, - "learning_rate": 3.7282549762116317e-06, - "loss": 0.1959, + "epoch": 1.4632872070036713, + "grad_norm": 3.234600905190223, + "learning_rate": 1.0881512597843713e-05, + "loss": 0.4813, "step": 10363 }, { - "epoch": 2.173201929125603, - "grad_norm": 3.98769691249305, - "learning_rate": 3.726491845288632e-06, - "loss": 0.1447, + "epoch": 1.4634284100536572, + "grad_norm": 4.331661675565904, + "learning_rate": 1.0879994069192064e-05, + "loss": 0.6641, "step": 10364 }, { - "epoch": 2.1734116166911304, - "grad_norm": 3.950655426605788, - "learning_rate": 3.7247290358938036e-06, - "loss": 0.1522, + "epoch": 1.463569613103643, + "grad_norm": 3.4802010588176504, + "learning_rate": 1.0878475520089732e-05, + "loss": 0.5323, "step": 10365 }, { - "epoch": 2.1736213042566574, - "grad_norm": 4.6251148494752785, - "learning_rate": 3.7229665481174983e-06, - "loss": 0.1423, + "epoch": 1.463710816153629, + "grad_norm": 4.479205546729229, + "learning_rate": 1.0876956950572006e-05, + "loss": 0.8329, "step": 10366 }, { - "epoch": 2.173830991822185, - "grad_norm": 3.7487452702668933, - "learning_rate": 3.721204382050042e-06, - "loss": 0.1349, + "epoch": 1.4638520192036149, + "grad_norm": 3.134131170952976, + "learning_rate": 1.087543836067418e-05, + "loss": 0.5336, "step": 10367 }, { - "epoch": 2.1740406793877125, - "grad_norm": 3.7056118645306944, - "learning_rate": 3.7194425377817533e-06, - "loss": 0.134, + "epoch": 1.4639932222536007, + "grad_norm": 3.643354150070815, + "learning_rate": 1.0873919750431548e-05, + "loss": 0.6946, "step": 10368 }, { - "epoch": 2.1742503669532396, - "grad_norm": 4.557906263643729, - "learning_rate": 3.7176810154029288e-06, - "loss": 0.1685, + "epoch": 1.4641344253035866, + "grad_norm": 3.548535316032411, + "learning_rate": 1.0872401119879396e-05, + "loss": 0.5605, "step": 10369 }, { - "epoch": 2.174460054518767, - "grad_norm": 3.7236536372072893, - "learning_rate": 3.7159198150038445e-06, - "loss": 0.1173, + "epoch": 1.4642756283535725, + "grad_norm": 4.302161238258756, + "learning_rate": 1.0870882469053016e-05, + "loss": 0.6791, "step": 10370 }, { - "epoch": 2.174669742084294, - "grad_norm": 3.977368730173153, - "learning_rate": 3.714158936674769e-06, - "loss": 0.1521, + "epoch": 1.4644168314035584, + "grad_norm": 3.249436752740073, + "learning_rate": 1.0869363797987707e-05, + "loss": 0.5461, "step": 10371 }, { - "epoch": 2.1748794296498217, - "grad_norm": 3.728554326916154, - "learning_rate": 3.712398380505956e-06, - "loss": 0.1293, + "epoch": 1.4645580344535443, + "grad_norm": 3.4682971959672995, + "learning_rate": 1.0867845106718758e-05, + "loss": 0.5786, "step": 10372 }, { - "epoch": 2.1750891172153493, - "grad_norm": 4.019206693139448, - "learning_rate": 3.710638146587625e-06, - "loss": 0.1657, + "epoch": 1.4646992375035301, + "grad_norm": 3.2388828889311854, + "learning_rate": 1.0866326395281463e-05, + "loss": 0.5161, "step": 10373 }, { - "epoch": 2.1752988047808763, - "grad_norm": 3.291468484294927, - "learning_rate": 3.7088782350099985e-06, - "loss": 0.1265, + "epoch": 1.464840440553516, + "grad_norm": 3.751561565352993, + "learning_rate": 1.0864807663711118e-05, + "loss": 0.6248, "step": 10374 }, { - "epoch": 2.175508492346404, - "grad_norm": 4.509319147433582, - "learning_rate": 3.707118645863276e-06, - "loss": 0.1382, + "epoch": 1.464981643603502, + "grad_norm": 3.7928936501005586, + "learning_rate": 1.0863288912043016e-05, + "loss": 0.6161, "step": 10375 }, { - "epoch": 2.1757181799119314, - "grad_norm": 4.012120337987077, - "learning_rate": 3.7053593792376375e-06, - "loss": 0.1406, + "epoch": 1.4651228466534878, + "grad_norm": 4.038829385433803, + "learning_rate": 1.0861770140312449e-05, + "loss": 0.7093, "step": 10376 }, { - "epoch": 2.1759278674774585, - "grad_norm": 4.246031998919653, - "learning_rate": 3.7036004352232457e-06, - "loss": 0.1396, + "epoch": 1.4652640497034737, + "grad_norm": 4.0490856934253605, + "learning_rate": 1.0860251348554723e-05, + "loss": 0.7059, "step": 10377 }, { - "epoch": 2.176137555042986, - "grad_norm": 4.757468103740484, - "learning_rate": 3.7018418139102553e-06, - "loss": 0.1388, + "epoch": 1.4654052527534596, + "grad_norm": 3.0923054480654466, + "learning_rate": 1.085873253680512e-05, + "loss": 0.5296, "step": 10378 }, { - "epoch": 2.1763472426085135, - "grad_norm": 4.038885202611624, - "learning_rate": 3.7000835153887915e-06, - "loss": 0.1559, + "epoch": 1.4655464558034454, + "grad_norm": 3.3423014111151343, + "learning_rate": 1.0857213705098947e-05, + "loss": 0.5797, "step": 10379 }, { - "epoch": 2.1765569301740406, - "grad_norm": 4.803492447664633, - "learning_rate": 3.698325539748978e-06, - "loss": 0.1415, + "epoch": 1.4656876588534313, + "grad_norm": 4.0309226084984005, + "learning_rate": 1.0855694853471499e-05, + "loss": 0.5816, "step": 10380 }, { - "epoch": 2.176766617739568, - "grad_norm": 5.506047016950488, - "learning_rate": 3.69656788708091e-06, - "loss": 0.191, + "epoch": 1.4658288619034172, + "grad_norm": 3.38876769052743, + "learning_rate": 1.085417598195807e-05, + "loss": 0.6294, "step": 10381 }, { - "epoch": 2.1769763053050952, - "grad_norm": 4.214636155001907, - "learning_rate": 3.6948105574746674e-06, - "loss": 0.1758, + "epoch": 1.465970064953403, + "grad_norm": 3.46832708229239, + "learning_rate": 1.0852657090593961e-05, + "loss": 0.6476, "step": 10382 }, { - "epoch": 2.1771859928706228, - "grad_norm": 4.113497776597873, - "learning_rate": 3.693053551020319e-06, - "loss": 0.1314, + "epoch": 1.466111268003389, + "grad_norm": 4.39712011402426, + "learning_rate": 1.0851138179414471e-05, + "loss": 0.7632, "step": 10383 }, { - "epoch": 2.1773956804361503, - "grad_norm": 5.597044272255223, - "learning_rate": 3.69129686780792e-06, - "loss": 0.228, + "epoch": 1.4662524710533749, + "grad_norm": 3.6646474929537862, + "learning_rate": 1.0849619248454893e-05, + "loss": 0.5928, "step": 10384 }, { - "epoch": 2.1776053680016774, - "grad_norm": 4.7517241319113985, - "learning_rate": 3.689540507927497e-06, - "loss": 0.1742, + "epoch": 1.4663936741033607, + "grad_norm": 3.6290054584983404, + "learning_rate": 1.0848100297750535e-05, + "loss": 0.6257, "step": 10385 }, { - "epoch": 2.177815055567205, - "grad_norm": 3.7885796503509157, - "learning_rate": 3.6877844714690646e-06, - "loss": 0.1297, + "epoch": 1.4665348771533466, + "grad_norm": 3.7683273180857673, + "learning_rate": 1.0846581327336692e-05, + "loss": 0.6369, "step": 10386 }, { - "epoch": 2.1780247431327324, - "grad_norm": 4.771173721776659, - "learning_rate": 3.6860287585226294e-06, - "loss": 0.1831, + "epoch": 1.4666760802033325, + "grad_norm": 3.7830890593924082, + "learning_rate": 1.084506233724866e-05, + "loss": 0.5587, "step": 10387 }, { - "epoch": 2.1782344306982595, - "grad_norm": 4.434410180109572, - "learning_rate": 3.6842733691781686e-06, - "loss": 0.1447, + "epoch": 1.4668172832533184, + "grad_norm": 3.133506776080068, + "learning_rate": 1.0843543327521748e-05, + "loss": 0.4483, "step": 10388 }, { - "epoch": 2.178444118263787, - "grad_norm": 3.9918866551573475, - "learning_rate": 3.682518303525654e-06, - "loss": 0.1208, + "epoch": 1.4669584863033043, + "grad_norm": 3.4443633395274724, + "learning_rate": 1.0842024298191254e-05, + "loss": 0.5697, "step": 10389 }, { - "epoch": 2.178653805829314, - "grad_norm": 5.57443055857864, - "learning_rate": 3.680763561655034e-06, - "loss": 0.2128, + "epoch": 1.4670996893532902, + "grad_norm": 3.323986688336242, + "learning_rate": 1.0840505249292477e-05, + "loss": 0.5318, "step": 10390 }, { - "epoch": 2.1788634933948416, - "grad_norm": 3.9806744170939776, - "learning_rate": 3.679009143656238e-06, - "loss": 0.1491, + "epoch": 1.467240892403276, + "grad_norm": 3.511022936738072, + "learning_rate": 1.0838986180860722e-05, + "loss": 0.5561, "step": 10391 }, { - "epoch": 2.179073180960369, - "grad_norm": 3.8824567439935964, - "learning_rate": 3.6772550496191906e-06, - "loss": 0.1511, + "epoch": 1.467382095453262, + "grad_norm": 3.2610959828646617, + "learning_rate": 1.083746709293129e-05, + "loss": 0.5336, "step": 10392 }, { - "epoch": 2.1792828685258963, - "grad_norm": 4.97616985103004, - "learning_rate": 3.6755012796337885e-06, - "loss": 0.2004, + "epoch": 1.4675232985032478, + "grad_norm": 3.501655613180448, + "learning_rate": 1.0835947985539483e-05, + "loss": 0.6355, "step": 10393 }, { - "epoch": 2.179492556091424, - "grad_norm": 5.217769227177019, - "learning_rate": 3.67374783378991e-06, - "loss": 0.1823, + "epoch": 1.4676645015532335, + "grad_norm": 2.7594802884987515, + "learning_rate": 1.0834428858720608e-05, + "loss": 0.42, "step": 10394 }, { - "epoch": 2.1797022436569513, - "grad_norm": 4.620773382319504, - "learning_rate": 3.671994712177429e-06, - "loss": 0.1647, + "epoch": 1.4678057046032194, + "grad_norm": 4.308681262876709, + "learning_rate": 1.0832909712509969e-05, + "loss": 0.6767, "step": 10395 }, { - "epoch": 2.1799119312224784, - "grad_norm": 4.562600439559105, - "learning_rate": 3.6702419148861966e-06, - "loss": 0.173, + "epoch": 1.4679469076532052, + "grad_norm": 4.1245905150373865, + "learning_rate": 1.083139054694286e-05, + "loss": 0.7199, "step": 10396 }, { - "epoch": 2.180121618788006, - "grad_norm": 3.4777209003527654, - "learning_rate": 3.668489442006046e-06, - "loss": 0.1294, + "epoch": 1.4680881107031911, + "grad_norm": 3.166668807628487, + "learning_rate": 1.0829871362054601e-05, + "loss": 0.6161, "step": 10397 }, { - "epoch": 2.1803313063535334, - "grad_norm": 4.433330056566309, - "learning_rate": 3.6667372936267888e-06, - "loss": 0.1412, + "epoch": 1.468229313753177, + "grad_norm": 4.394860796622902, + "learning_rate": 1.0828352157880489e-05, + "loss": 0.7449, "step": 10398 }, { - "epoch": 2.1805409939190605, - "grad_norm": 4.073083087577789, - "learning_rate": 3.6649854698382325e-06, - "loss": 0.1654, + "epoch": 1.4683705168031629, + "grad_norm": 3.5970071894644957, + "learning_rate": 1.0826832934455828e-05, + "loss": 0.527, "step": 10399 }, { - "epoch": 2.180750681484588, - "grad_norm": 4.66692724539925, - "learning_rate": 3.6632339707301557e-06, - "loss": 0.1535, + "epoch": 1.4685117198531488, + "grad_norm": 3.5337563681169595, + "learning_rate": 1.0825313691815928e-05, + "loss": 0.5587, "step": 10400 }, { - "epoch": 2.180960369050115, - "grad_norm": 4.700690783896078, - "learning_rate": 3.6614827963923315e-06, - "loss": 0.1802, + "epoch": 1.4686529229031346, + "grad_norm": 3.6965098036515394, + "learning_rate": 1.0823794429996094e-05, + "loss": 0.5124, "step": 10401 }, { - "epoch": 2.1811700566156427, - "grad_norm": 4.1870644202961085, - "learning_rate": 3.6597319469145075e-06, - "loss": 0.1553, + "epoch": 1.4687941259531205, + "grad_norm": 3.8904744486095564, + "learning_rate": 1.0822275149031635e-05, + "loss": 0.5851, "step": 10402 }, { - "epoch": 2.18137974418117, - "grad_norm": 3.196730751605853, - "learning_rate": 3.6579814223864153e-06, - "loss": 0.1029, + "epoch": 1.4689353290031064, + "grad_norm": 3.6872827611950143, + "learning_rate": 1.0820755848957855e-05, + "loss": 0.5208, "step": 10403 }, { - "epoch": 2.1815894317466973, - "grad_norm": 4.148012508562863, - "learning_rate": 3.656231222897774e-06, - "loss": 0.1324, + "epoch": 1.4690765320530923, + "grad_norm": 4.33146482257058, + "learning_rate": 1.0819236529810062e-05, + "loss": 0.7343, "step": 10404 }, { - "epoch": 2.181799119312225, - "grad_norm": 5.71753239469698, - "learning_rate": 3.654481348538289e-06, - "loss": 0.1787, + "epoch": 1.4692177351030782, + "grad_norm": 3.5183057383562026, + "learning_rate": 1.0817717191623569e-05, + "loss": 0.6118, "step": 10405 }, { - "epoch": 2.1820088068777523, - "grad_norm": 5.034997349278202, - "learning_rate": 3.6527317993976397e-06, - "loss": 0.1643, + "epoch": 1.469358938153064, + "grad_norm": 3.445767871033962, + "learning_rate": 1.081619783443368e-05, + "loss": 0.595, "step": 10406 }, { - "epoch": 2.1822184944432794, - "grad_norm": 4.509004937635389, - "learning_rate": 3.6509825755654926e-06, - "loss": 0.1372, + "epoch": 1.46950014120305, + "grad_norm": 3.835817735510652, + "learning_rate": 1.0814678458275705e-05, + "loss": 0.6636, "step": 10407 }, { - "epoch": 2.182428182008807, - "grad_norm": 4.805080597776622, - "learning_rate": 3.649233677131503e-06, - "loss": 0.1609, + "epoch": 1.4696413442530358, + "grad_norm": 3.093060476196578, + "learning_rate": 1.0813159063184958e-05, + "loss": 0.495, "step": 10408 }, { - "epoch": 2.182637869574334, - "grad_norm": 3.7318194085011305, - "learning_rate": 3.647485104185302e-06, - "loss": 0.1396, + "epoch": 1.4697825473030217, + "grad_norm": 3.852423528651246, + "learning_rate": 1.081163964919674e-05, + "loss": 0.5267, "step": 10409 }, { - "epoch": 2.1828475571398616, - "grad_norm": 4.289761972737748, - "learning_rate": 3.645736856816504e-06, - "loss": 0.1703, + "epoch": 1.4699237503530076, + "grad_norm": 3.302644399578199, + "learning_rate": 1.0810120216346368e-05, + "loss": 0.4996, "step": 10410 }, { - "epoch": 2.183057244705389, - "grad_norm": 3.474863217855607, - "learning_rate": 3.6439889351147174e-06, - "loss": 0.092, + "epoch": 1.4700649534029935, + "grad_norm": 3.3701716090886054, + "learning_rate": 1.0808600764669158e-05, + "loss": 0.5387, "step": 10411 }, { - "epoch": 2.183266932270916, - "grad_norm": 4.602450605675988, - "learning_rate": 3.6422413391695165e-06, - "loss": 0.1704, + "epoch": 1.4702061564529794, + "grad_norm": 3.036843921020384, + "learning_rate": 1.0807081294200413e-05, + "loss": 0.5244, "step": 10412 }, { - "epoch": 2.1834766198364437, - "grad_norm": 4.936071297251656, - "learning_rate": 3.640494069070479e-06, - "loss": 0.1557, + "epoch": 1.4703473595029652, + "grad_norm": 3.4710049001811765, + "learning_rate": 1.0805561804975443e-05, + "loss": 0.5501, "step": 10413 }, { - "epoch": 2.1836863074019712, - "grad_norm": 3.226485422913305, - "learning_rate": 3.6387471249071505e-06, - "loss": 0.1169, + "epoch": 1.4704885625529511, + "grad_norm": 3.5341725858566346, + "learning_rate": 1.0804042297029567e-05, + "loss": 0.6908, "step": 10414 }, { - "epoch": 2.1838959949674983, - "grad_norm": 4.978268739947345, - "learning_rate": 3.6370005067690606e-06, - "loss": 0.1645, + "epoch": 1.470629765602937, + "grad_norm": 3.4580664701748196, + "learning_rate": 1.0802522770398096e-05, + "loss": 0.4999, "step": 10415 }, { - "epoch": 2.184105682533026, - "grad_norm": 3.9998159338279216, - "learning_rate": 3.6352542147457302e-06, - "loss": 0.1241, + "epoch": 1.470770968652923, + "grad_norm": 3.6156021207366114, + "learning_rate": 1.0801003225116341e-05, + "loss": 0.7137, "step": 10416 }, { - "epoch": 2.1843153700985534, - "grad_norm": 4.0094225157654195, - "learning_rate": 3.633508248926665e-06, - "loss": 0.1462, + "epoch": 1.4709121717029088, + "grad_norm": 3.620213872791416, + "learning_rate": 1.0799483661219618e-05, + "loss": 0.7032, "step": 10417 }, { - "epoch": 2.1845250576640804, - "grad_norm": 4.04747342666115, - "learning_rate": 3.6317626094013436e-06, - "loss": 0.13, + "epoch": 1.4710533747528947, + "grad_norm": 4.898069680793037, + "learning_rate": 1.0797964078743241e-05, + "loss": 0.6199, "step": 10418 }, { - "epoch": 2.184734745229608, - "grad_norm": 3.787128988596868, - "learning_rate": 3.6300172962592307e-06, - "loss": 0.114, + "epoch": 1.4711945778028805, + "grad_norm": 4.200809871991145, + "learning_rate": 1.0796444477722522e-05, + "loss": 0.6432, "step": 10419 }, { - "epoch": 2.184944432795135, - "grad_norm": 4.966833053551608, - "learning_rate": 3.628272309589783e-06, - "loss": 0.1617, + "epoch": 1.4713357808528664, + "grad_norm": 4.293409899271141, + "learning_rate": 1.0794924858192779e-05, + "loss": 0.6598, "step": 10420 }, { - "epoch": 2.1851541203606626, - "grad_norm": 4.400481545456757, - "learning_rate": 3.6265276494824265e-06, - "loss": 0.1531, + "epoch": 1.4714769839028523, + "grad_norm": 3.5658779674426793, + "learning_rate": 1.0793405220189321e-05, + "loss": 0.5144, "step": 10421 }, { - "epoch": 2.18536380792619, - "grad_norm": 3.709960444847731, - "learning_rate": 3.6247833160265867e-06, - "loss": 0.1316, + "epoch": 1.4716181869528382, + "grad_norm": 3.884556252460006, + "learning_rate": 1.0791885563747472e-05, + "loss": 0.6591, "step": 10422 }, { - "epoch": 2.185573495491717, - "grad_norm": 4.313501832960126, - "learning_rate": 3.6230393093116578e-06, - "loss": 0.1478, + "epoch": 1.471759390002824, + "grad_norm": 3.6834960834569905, + "learning_rate": 1.0790365888902548e-05, + "loss": 0.6348, "step": 10423 }, { - "epoch": 2.1857831830572447, - "grad_norm": 3.596884656255381, - "learning_rate": 3.621295629427022e-06, - "loss": 0.1414, + "epoch": 1.47190059305281, + "grad_norm": 4.256157278418224, + "learning_rate": 1.0788846195689856e-05, + "loss": 0.7412, "step": 10424 }, { - "epoch": 2.1859928706227723, - "grad_norm": 4.74164049459647, - "learning_rate": 3.619552276462048e-06, - "loss": 0.169, + "epoch": 1.4720417961027958, + "grad_norm": 3.162951852510564, + "learning_rate": 1.078732648414472e-05, + "loss": 0.4989, "step": 10425 }, { - "epoch": 2.1862025581882993, - "grad_norm": 4.803127498930869, - "learning_rate": 3.61780925050609e-06, - "loss": 0.1552, + "epoch": 1.4721829991527817, + "grad_norm": 3.5988432162041213, + "learning_rate": 1.078580675430246e-05, + "loss": 0.5783, "step": 10426 }, { - "epoch": 2.186412245753827, - "grad_norm": 5.133960379966977, - "learning_rate": 3.616066551648478e-06, - "loss": 0.1366, + "epoch": 1.4723242022027676, + "grad_norm": 3.688778344833739, + "learning_rate": 1.0784287006198386e-05, + "loss": 0.5468, "step": 10427 }, { - "epoch": 2.186621933319354, - "grad_norm": 4.637919501072803, - "learning_rate": 3.6143241799785234e-06, - "loss": 0.1734, + "epoch": 1.4724654052527535, + "grad_norm": 3.418627873227131, + "learning_rate": 1.0782767239867824e-05, + "loss": 0.5452, "step": 10428 }, { - "epoch": 2.1868316208848815, - "grad_norm": 4.177126832482457, - "learning_rate": 3.6125821355855293e-06, - "loss": 0.1318, + "epoch": 1.4726066083027394, + "grad_norm": 3.9318323875691275, + "learning_rate": 1.078124745534609e-05, + "loss": 0.5106, "step": 10429 }, { - "epoch": 2.187041308450409, - "grad_norm": 4.603232760229832, - "learning_rate": 3.6108404185587877e-06, - "loss": 0.163, + "epoch": 1.4727478113527253, + "grad_norm": 2.9535645132165262, + "learning_rate": 1.0779727652668496e-05, + "loss": 0.4616, "step": 10430 }, { - "epoch": 2.187250996015936, - "grad_norm": 4.582153423055365, - "learning_rate": 3.609099028987547e-06, - "loss": 0.194, + "epoch": 1.4728890144027111, + "grad_norm": 3.4650808317254467, + "learning_rate": 1.0778207831870375e-05, + "loss": 0.6312, "step": 10431 }, { - "epoch": 2.1874606835814636, - "grad_norm": 5.33181598546689, - "learning_rate": 3.607357966961067e-06, - "loss": 0.1913, + "epoch": 1.473030217452697, + "grad_norm": 3.391749201837755, + "learning_rate": 1.0776687992987038e-05, + "loss": 0.5626, "step": 10432 }, { - "epoch": 2.187670371146991, - "grad_norm": 4.005593598400876, - "learning_rate": 3.6056172325685824e-06, - "loss": 0.1377, + "epoch": 1.473171420502683, + "grad_norm": 3.2243127522452775, + "learning_rate": 1.0775168136053809e-05, + "loss": 0.4784, "step": 10433 }, { - "epoch": 2.1878800587125182, - "grad_norm": 4.264745274089219, - "learning_rate": 3.603876825899304e-06, - "loss": 0.1398, + "epoch": 1.4733126235526688, + "grad_norm": 4.005976290688714, + "learning_rate": 1.0773648261106005e-05, + "loss": 0.6841, "step": 10434 }, { - "epoch": 2.1880897462780458, - "grad_norm": 3.4417428758340374, - "learning_rate": 3.6021367470424294e-06, - "loss": 0.1297, + "epoch": 1.4734538266026547, + "grad_norm": 3.960551467831488, + "learning_rate": 1.0772128368178949e-05, + "loss": 0.6163, "step": 10435 }, { - "epoch": 2.1882994338435733, - "grad_norm": 4.998102547897121, - "learning_rate": 3.6003969960871453e-06, - "loss": 0.1977, + "epoch": 1.4735950296526406, + "grad_norm": 3.5547132592901702, + "learning_rate": 1.0770608457307965e-05, + "loss": 0.6315, "step": 10436 }, { - "epoch": 2.1885091214091004, - "grad_norm": 4.288684259171469, - "learning_rate": 3.5986575731226124e-06, - "loss": 0.136, + "epoch": 1.4737362327026264, + "grad_norm": 4.727722578441118, + "learning_rate": 1.0769088528528373e-05, + "loss": 0.7355, "step": 10437 }, { - "epoch": 2.188718808974628, - "grad_norm": 4.2312655452545895, - "learning_rate": 3.596918478237985e-06, - "loss": 0.1736, + "epoch": 1.4738774357526123, + "grad_norm": 3.9931113841292625, + "learning_rate": 1.0767568581875494e-05, + "loss": 0.5316, "step": 10438 }, { - "epoch": 2.188928496540155, - "grad_norm": 3.9962452693592545, - "learning_rate": 3.5951797115223895e-06, - "loss": 0.1334, + "epoch": 1.4740186388025982, + "grad_norm": 3.3682292153760587, + "learning_rate": 1.0766048617384654e-05, + "loss": 0.5396, "step": 10439 }, { - "epoch": 2.1891381841056825, - "grad_norm": 4.682006963641091, - "learning_rate": 3.59344127306494e-06, - "loss": 0.2043, + "epoch": 1.474159841852584, + "grad_norm": 3.8446960816553046, + "learning_rate": 1.0764528635091179e-05, + "loss": 0.6224, "step": 10440 }, { - "epoch": 2.18934787167121, - "grad_norm": 4.4338111249637695, - "learning_rate": 3.591703162954737e-06, - "loss": 0.1643, + "epoch": 1.47430104490257, + "grad_norm": 3.913708299313398, + "learning_rate": 1.076300863503038e-05, + "loss": 0.6832, "step": 10441 }, { - "epoch": 2.189557559236737, - "grad_norm": 3.6987520607547832, - "learning_rate": 3.5899653812808656e-06, - "loss": 0.1392, + "epoch": 1.4744422479525559, + "grad_norm": 3.1458983115580392, + "learning_rate": 1.0761488617237597e-05, + "loss": 0.4528, "step": 10442 }, { - "epoch": 2.1897672468022646, - "grad_norm": 3.858218074185271, - "learning_rate": 3.588227928132385e-06, - "loss": 0.1162, + "epoch": 1.4745834510025417, + "grad_norm": 4.977736567548205, + "learning_rate": 1.0759968581748143e-05, + "loss": 0.919, "step": 10443 }, { - "epoch": 2.189976934367792, - "grad_norm": 4.567917550348727, - "learning_rate": 3.5864908035983416e-06, - "loss": 0.1598, + "epoch": 1.4747246540525276, + "grad_norm": 3.882070019188596, + "learning_rate": 1.075844852859735e-05, + "loss": 0.5102, "step": 10444 }, { - "epoch": 2.1901866219333193, - "grad_norm": 4.224606129728385, - "learning_rate": 3.5847540077677702e-06, - "loss": 0.1675, + "epoch": 1.4748658571025133, + "grad_norm": 3.212998954048273, + "learning_rate": 1.075692845782054e-05, + "loss": 0.6291, "step": 10445 }, { - "epoch": 2.190396309498847, - "grad_norm": 4.072874673174045, - "learning_rate": 3.58301754072968e-06, - "loss": 0.1584, + "epoch": 1.4750070601524992, + "grad_norm": 4.20949430170499, + "learning_rate": 1.075540836945304e-05, + "loss": 0.6801, "step": 10446 }, { - "epoch": 2.190605997064374, - "grad_norm": 4.152993738299188, - "learning_rate": 3.5812814025730724e-06, - "loss": 0.1491, + "epoch": 1.475148263202485, + "grad_norm": 3.0995461694264277, + "learning_rate": 1.0753888263530174e-05, + "loss": 0.5119, "step": 10447 }, { - "epoch": 2.1908156846299014, - "grad_norm": 3.3215017015813215, - "learning_rate": 3.579545593386927e-06, - "loss": 0.1087, + "epoch": 1.475289466252471, + "grad_norm": 3.837965917437043, + "learning_rate": 1.0752368140087272e-05, + "loss": 0.6213, "step": 10448 }, { - "epoch": 2.191025372195429, - "grad_norm": 4.906089861345951, - "learning_rate": 3.5778101132602026e-06, - "loss": 0.174, + "epoch": 1.4754306693024568, + "grad_norm": 3.8250854350871415, + "learning_rate": 1.0750847999159662e-05, + "loss": 0.6223, "step": 10449 }, { - "epoch": 2.191235059760956, - "grad_norm": 3.5663943431525964, - "learning_rate": 3.576074962281848e-06, - "loss": 0.148, + "epoch": 1.4755718723524427, + "grad_norm": 4.290464494944606, + "learning_rate": 1.0749327840782663e-05, + "loss": 0.6823, "step": 10450 }, { - "epoch": 2.1914447473264835, - "grad_norm": 4.099145239644733, - "learning_rate": 3.5743401405407998e-06, - "loss": 0.1213, + "epoch": 1.4757130754024286, + "grad_norm": 3.973253194572791, + "learning_rate": 1.0747807664991613e-05, + "loss": 0.6312, "step": 10451 }, { - "epoch": 2.191654434892011, - "grad_norm": 3.9316863565679934, - "learning_rate": 3.5726056481259572e-06, - "loss": 0.1528, + "epoch": 1.4758542784524145, + "grad_norm": 3.1632984312404133, + "learning_rate": 1.0746287471821833e-05, + "loss": 0.5967, "step": 10452 }, { - "epoch": 2.191864122457538, - "grad_norm": 3.4746753669339827, - "learning_rate": 3.5708714851262238e-06, - "loss": 0.1167, + "epoch": 1.4759954815024003, + "grad_norm": 3.1598379459742887, + "learning_rate": 1.0744767261308655e-05, + "loss": 0.481, "step": 10453 }, { - "epoch": 2.1920738100230657, - "grad_norm": 5.261791192110429, - "learning_rate": 3.569137651630481e-06, - "loss": 0.1855, + "epoch": 1.4761366845523862, + "grad_norm": 3.6438554689759153, + "learning_rate": 1.074324703348741e-05, + "loss": 0.5884, "step": 10454 }, { - "epoch": 2.192283497588593, - "grad_norm": 4.6948533865131195, - "learning_rate": 3.5674041477275867e-06, - "loss": 0.1934, + "epoch": 1.476277887602372, + "grad_norm": 3.5519157104017025, + "learning_rate": 1.0741726788393422e-05, + "loss": 0.5907, "step": 10455 }, { - "epoch": 2.1924931851541203, - "grad_norm": 4.354187519282696, - "learning_rate": 3.565670973506383e-06, - "loss": 0.175, + "epoch": 1.476419090652358, + "grad_norm": 4.303627433455919, + "learning_rate": 1.0740206526062022e-05, + "loss": 0.8525, "step": 10456 }, { - "epoch": 2.192702872719648, - "grad_norm": 5.609629806891594, - "learning_rate": 3.563938129055705e-06, - "loss": 0.1993, + "epoch": 1.4765602937023439, + "grad_norm": 3.2900217342551197, + "learning_rate": 1.0738686246528549e-05, + "loss": 0.5198, "step": 10457 }, { - "epoch": 2.192912560285175, - "grad_norm": 3.5434957148393367, - "learning_rate": 3.562205614464358e-06, - "loss": 0.1307, + "epoch": 1.4767014967523298, + "grad_norm": 3.7588471465778124, + "learning_rate": 1.073716594982832e-05, + "loss": 0.6257, "step": 10458 }, { - "epoch": 2.1931222478507024, - "grad_norm": 5.3123534010547, - "learning_rate": 3.560473429821142e-06, - "loss": 0.1777, + "epoch": 1.4768426998023156, + "grad_norm": 4.907918311461345, + "learning_rate": 1.0735645635996676e-05, + "loss": 0.7455, "step": 10459 }, { - "epoch": 2.19333193541623, - "grad_norm": 5.020795273170776, - "learning_rate": 3.5587415752148326e-06, - "loss": 0.1552, + "epoch": 1.4769839028523015, + "grad_norm": 3.619490274864713, + "learning_rate": 1.0734125305068943e-05, + "loss": 0.5059, "step": 10460 }, { - "epoch": 2.193541622981757, - "grad_norm": 3.920912824297932, - "learning_rate": 3.5570100507341843e-06, - "loss": 0.1249, + "epoch": 1.4771251059022874, + "grad_norm": 3.96297611982448, + "learning_rate": 1.0732604957080458e-05, + "loss": 0.5879, "step": 10461 }, { - "epoch": 2.1937513105472846, - "grad_norm": 3.69291304408508, - "learning_rate": 3.5552788564679464e-06, - "loss": 0.1523, + "epoch": 1.4772663089522733, + "grad_norm": 4.242860233005695, + "learning_rate": 1.0731084592066548e-05, + "loss": 0.7557, "step": 10462 }, { - "epoch": 2.193960998112812, - "grad_norm": 4.081209041070821, - "learning_rate": 3.5535479925048477e-06, - "loss": 0.1214, + "epoch": 1.4774075120022592, + "grad_norm": 4.081686665614728, + "learning_rate": 1.072956421006255e-05, + "loss": 0.627, "step": 10463 }, { - "epoch": 2.194170685678339, - "grad_norm": 3.7475962413922206, - "learning_rate": 3.5518174589335964e-06, - "loss": 0.1356, + "epoch": 1.477548715052245, + "grad_norm": 4.083272162312083, + "learning_rate": 1.072804381110379e-05, + "loss": 0.6534, "step": 10464 }, { - "epoch": 2.1943803732438667, - "grad_norm": 4.085818118016188, - "learning_rate": 3.55008725584288e-06, - "loss": 0.1325, + "epoch": 1.477689918102231, + "grad_norm": 3.709892848076743, + "learning_rate": 1.072652339522561e-05, + "loss": 0.5189, "step": 10465 }, { - "epoch": 2.194590060809394, - "grad_norm": 4.152270145068927, - "learning_rate": 3.5483573833213815e-06, - "loss": 0.135, + "epoch": 1.4778311211522168, + "grad_norm": 3.428386960961124, + "learning_rate": 1.072500296246334e-05, + "loss": 0.6283, "step": 10466 }, { - "epoch": 2.1947997483749213, - "grad_norm": 3.658197170513417, - "learning_rate": 3.546627841457755e-06, - "loss": 0.1187, + "epoch": 1.4779723242022027, + "grad_norm": 3.720846284707587, + "learning_rate": 1.0723482512852312e-05, + "loss": 0.5575, "step": 10467 }, { - "epoch": 2.195009435940449, - "grad_norm": 4.107237872048693, - "learning_rate": 3.544898630340646e-06, - "loss": 0.1411, + "epoch": 1.4781135272521886, + "grad_norm": 3.307112108843093, + "learning_rate": 1.0721962046427866e-05, + "loss": 0.5369, "step": 10468 }, { - "epoch": 2.195219123505976, - "grad_norm": 5.768380735941217, - "learning_rate": 3.5431697500586795e-06, - "loss": 0.1677, + "epoch": 1.4782547303021745, + "grad_norm": 3.6539954849903045, + "learning_rate": 1.0720441563225333e-05, + "loss": 0.575, "step": 10469 }, { - "epoch": 2.1954288110715034, - "grad_norm": 4.2366587294187665, - "learning_rate": 3.5414412007004573e-06, - "loss": 0.1625, + "epoch": 1.4783959333521604, + "grad_norm": 3.0968390958822902, + "learning_rate": 1.0718921063280048e-05, + "loss": 0.4234, "step": 10470 }, { - "epoch": 2.195638498637031, - "grad_norm": 4.4287366883140145, - "learning_rate": 3.5397129823545795e-06, - "loss": 0.1659, + "epoch": 1.4785371364021462, + "grad_norm": 3.8720564908569424, + "learning_rate": 1.0717400546627347e-05, + "loss": 0.741, "step": 10471 }, { - "epoch": 2.195848186202558, - "grad_norm": 3.8956753743029675, - "learning_rate": 3.5379850951096163e-06, - "loss": 0.1204, + "epoch": 1.4786783394521321, + "grad_norm": 3.8014641982390227, + "learning_rate": 1.0715880013302568e-05, + "loss": 0.668, "step": 10472 }, { - "epoch": 2.1960578737680856, - "grad_norm": 3.4117243215979696, - "learning_rate": 3.536257539054121e-06, - "loss": 0.1294, + "epoch": 1.478819542502118, + "grad_norm": 3.2517846199681104, + "learning_rate": 1.0714359463341047e-05, + "loss": 0.5572, "step": 10473 }, { - "epoch": 2.196267561333613, - "grad_norm": 3.7019824450155827, - "learning_rate": 3.5345303142766387e-06, - "loss": 0.1248, + "epoch": 1.4789607455521039, + "grad_norm": 3.3244556731199797, + "learning_rate": 1.0712838896778124e-05, + "loss": 0.5669, "step": 10474 }, { - "epoch": 2.19647724889914, - "grad_norm": 3.9654650854328795, - "learning_rate": 3.5328034208656935e-06, - "loss": 0.1352, + "epoch": 1.4791019486020898, + "grad_norm": 3.420177805326652, + "learning_rate": 1.0711318313649125e-05, + "loss": 0.5046, "step": 10475 }, { - "epoch": 2.1966869364646677, - "grad_norm": 3.6986469903974233, - "learning_rate": 3.531076858909791e-06, - "loss": 0.1339, + "epoch": 1.4792431516520756, + "grad_norm": 2.9662333024529124, + "learning_rate": 1.0709797713989403e-05, + "loss": 0.5026, "step": 10476 }, { - "epoch": 2.196896624030195, - "grad_norm": 4.972096715665888, - "learning_rate": 3.5293506284974155e-06, - "loss": 0.162, + "epoch": 1.4793843547020615, + "grad_norm": 3.4940641445958645, + "learning_rate": 1.0708277097834285e-05, + "loss": 0.5016, "step": 10477 }, { - "epoch": 2.1971063115957223, - "grad_norm": 5.667536979690002, - "learning_rate": 3.5276247297170453e-06, - "loss": 0.1481, + "epoch": 1.4795255577520474, + "grad_norm": 4.280897091987144, + "learning_rate": 1.0706756465219114e-05, + "loss": 0.6082, "step": 10478 }, { - "epoch": 2.19731599916125, - "grad_norm": 3.84387988238395, - "learning_rate": 3.5258991626571317e-06, - "loss": 0.1137, + "epoch": 1.4796667608020333, + "grad_norm": 3.3523169701562265, + "learning_rate": 1.070523581617923e-05, + "loss": 0.5188, "step": 10479 }, { - "epoch": 2.197525686726777, - "grad_norm": 3.760782493334706, - "learning_rate": 3.524173927406117e-06, - "loss": 0.1674, + "epoch": 1.4798079638520192, + "grad_norm": 3.3310197952152523, + "learning_rate": 1.0703715150749967e-05, + "loss": 0.5789, "step": 10480 }, { - "epoch": 2.1977353742923045, - "grad_norm": 5.048411022274463, - "learning_rate": 3.522449024052421e-06, - "loss": 0.1527, + "epoch": 1.479949166902005, + "grad_norm": 3.990554209894295, + "learning_rate": 1.0702194468966667e-05, + "loss": 0.5855, "step": 10481 }, { - "epoch": 2.197945061857832, - "grad_norm": 6.228036933870606, - "learning_rate": 3.520724452684443e-06, - "loss": 0.184, + "epoch": 1.480090369951991, + "grad_norm": 3.67082440346671, + "learning_rate": 1.0700673770864673e-05, + "loss": 0.5599, "step": 10482 }, { - "epoch": 2.198154749423359, - "grad_norm": 3.9885158448806446, - "learning_rate": 3.5190002133905753e-06, - "loss": 0.1721, + "epoch": 1.4802315730019768, + "grad_norm": 3.396742924243031, + "learning_rate": 1.0699153056479326e-05, + "loss": 0.5582, "step": 10483 }, { - "epoch": 2.1983644369888866, - "grad_norm": 4.66226562534614, - "learning_rate": 3.517276306259191e-06, - "loss": 0.1672, + "epoch": 1.4803727760519627, + "grad_norm": 2.912900201529329, + "learning_rate": 1.069763232584596e-05, + "loss": 0.4813, "step": 10484 }, { - "epoch": 2.1985741245544137, - "grad_norm": 4.835441321052882, - "learning_rate": 3.51555273137864e-06, - "loss": 0.1445, + "epoch": 1.4805139791019486, + "grad_norm": 2.9239851619140294, + "learning_rate": 1.069611157899992e-05, + "loss": 0.4996, "step": 10485 }, { - "epoch": 2.1987838121199412, - "grad_norm": 4.4400048666185805, - "learning_rate": 3.513829488837254e-06, - "loss": 0.1234, + "epoch": 1.4806551821519345, + "grad_norm": 3.389065766287909, + "learning_rate": 1.0694590815976549e-05, + "loss": 0.5679, "step": 10486 }, { - "epoch": 2.1989934996854688, - "grad_norm": 4.70895698180235, - "learning_rate": 3.5121065787233566e-06, - "loss": 0.1962, + "epoch": 1.4807963852019204, + "grad_norm": 2.881672226789623, + "learning_rate": 1.0693070036811187e-05, + "loss": 0.4959, "step": 10487 }, { - "epoch": 2.199203187250996, - "grad_norm": 4.813402709251195, - "learning_rate": 3.5103840011252556e-06, - "loss": 0.1565, + "epoch": 1.4809375882519062, + "grad_norm": 4.609545738960422, + "learning_rate": 1.0691549241539177e-05, + "loss": 0.6994, "step": 10488 }, { - "epoch": 2.1994128748165234, - "grad_norm": 3.775074632537291, - "learning_rate": 3.508661756131224e-06, - "loss": 0.1229, + "epoch": 1.4810787913018921, + "grad_norm": 2.410051903462418, + "learning_rate": 1.069002843019586e-05, + "loss": 0.4109, "step": 10489 }, { - "epoch": 2.199622562382051, - "grad_norm": 5.436575849762496, - "learning_rate": 3.5069398438295367e-06, - "loss": 0.173, + "epoch": 1.481219994351878, + "grad_norm": 3.5991031829862843, + "learning_rate": 1.0688507602816581e-05, + "loss": 0.6699, "step": 10490 }, { - "epoch": 2.199832249947578, - "grad_norm": 5.725163862744351, - "learning_rate": 3.5052182643084476e-06, - "loss": 0.1578, + "epoch": 1.481361197401864, + "grad_norm": 3.1026943708245254, + "learning_rate": 1.0686986759436684e-05, + "loss": 0.4499, "step": 10491 }, { - "epoch": 2.2000419375131055, - "grad_norm": 3.6001541904034267, - "learning_rate": 3.5034970176561868e-06, - "loss": 0.1392, + "epoch": 1.4815024004518498, + "grad_norm": 3.3735279823470092, + "learning_rate": 1.068546590009151e-05, + "loss": 0.5906, "step": 10492 }, { - "epoch": 2.200251625078633, - "grad_norm": 4.308541146283223, - "learning_rate": 3.5017761039609677e-06, - "loss": 0.1353, + "epoch": 1.4816436035018357, + "grad_norm": 3.7321381186528106, + "learning_rate": 1.0683945024816403e-05, + "loss": 0.6402, "step": 10493 }, { - "epoch": 2.20046131264416, - "grad_norm": 4.4183231738813715, - "learning_rate": 3.500055523310997e-06, - "loss": 0.1411, + "epoch": 1.4817848065518215, + "grad_norm": 3.164104625445981, + "learning_rate": 1.0682424133646712e-05, + "loss": 0.5077, "step": 10494 }, { - "epoch": 2.2006710002096876, - "grad_norm": 4.93957161864954, - "learning_rate": 3.498335275794451e-06, - "loss": 0.1704, + "epoch": 1.4819260096018074, + "grad_norm": 3.6875769512235554, + "learning_rate": 1.0680903226617776e-05, + "loss": 0.5717, "step": 10495 }, { - "epoch": 2.2008806877752147, - "grad_norm": 4.817211243280632, - "learning_rate": 3.4966153614995024e-06, - "loss": 0.1933, + "epoch": 1.4820672126517933, + "grad_norm": 3.6813699426432644, + "learning_rate": 1.0679382303764945e-05, + "loss": 0.5197, "step": 10496 }, { - "epoch": 2.2010903753407423, - "grad_norm": 6.067319472415323, - "learning_rate": 3.4948957805142957e-06, - "loss": 0.2036, + "epoch": 1.4822084157017792, + "grad_norm": 3.9876994534708494, + "learning_rate": 1.0677861365123564e-05, + "loss": 0.5978, "step": 10497 }, { - "epoch": 2.20130006290627, - "grad_norm": 4.360478969711781, - "learning_rate": 3.4931765329269605e-06, - "loss": 0.1713, + "epoch": 1.482349618751765, + "grad_norm": 3.010261162378477, + "learning_rate": 1.0676340410728976e-05, + "loss": 0.4811, "step": 10498 }, { - "epoch": 2.201509750471797, - "grad_norm": 3.5080431833645584, - "learning_rate": 3.491457618825612e-06, - "loss": 0.1345, + "epoch": 1.482490821801751, + "grad_norm": 4.906393482702666, + "learning_rate": 1.0674819440616526e-05, + "loss": 0.8318, "step": 10499 }, { - "epoch": 2.2017194380373244, - "grad_norm": 4.339862703699154, - "learning_rate": 3.4897390382983544e-06, - "loss": 0.1132, + "epoch": 1.4826320248517368, + "grad_norm": 4.267711930911907, + "learning_rate": 1.0673298454821567e-05, + "loss": 0.7232, "step": 10500 }, { - "epoch": 2.201929125602852, - "grad_norm": 3.009003361056837, - "learning_rate": 3.4880207914332608e-06, - "loss": 0.1429, + "epoch": 1.4827732279017227, + "grad_norm": 3.5744974265662965, + "learning_rate": 1.0671777453379442e-05, + "loss": 0.5965, "step": 10501 }, { - "epoch": 2.202138813168379, - "grad_norm": 4.898990018155972, - "learning_rate": 3.486302878318393e-06, - "loss": 0.1325, + "epoch": 1.4829144309517086, + "grad_norm": 3.9434576297083326, + "learning_rate": 1.0670256436325499e-05, + "loss": 0.608, "step": 10502 }, { - "epoch": 2.2023485007339065, - "grad_norm": 4.820679789728473, - "learning_rate": 3.484585299041804e-06, - "loss": 0.2099, + "epoch": 1.4830556340016945, + "grad_norm": 4.469037055151412, + "learning_rate": 1.0668735403695087e-05, + "loss": 0.6079, "step": 10503 }, { - "epoch": 2.2025581882994336, - "grad_norm": 5.059341046650569, - "learning_rate": 3.482868053691515e-06, - "loss": 0.1806, + "epoch": 1.4831968370516804, + "grad_norm": 2.8797954872366858, + "learning_rate": 1.0667214355523552e-05, + "loss": 0.4407, "step": 10504 }, { - "epoch": 2.202767875864961, - "grad_norm": 5.234688695094377, - "learning_rate": 3.4811511423555457e-06, - "loss": 0.203, + "epoch": 1.4833380401016663, + "grad_norm": 3.6042185703915712, + "learning_rate": 1.0665693291846245e-05, + "loss": 0.4911, "step": 10505 }, { - "epoch": 2.2029775634304887, - "grad_norm": 4.168740797287372, - "learning_rate": 3.4794345651218864e-06, - "loss": 0.1594, + "epoch": 1.4834792431516521, + "grad_norm": 3.213877548022614, + "learning_rate": 1.0664172212698512e-05, + "loss": 0.5408, "step": 10506 }, { - "epoch": 2.2031872509960158, - "grad_norm": 4.482010581068789, - "learning_rate": 3.4777183220785106e-06, - "loss": 0.1553, + "epoch": 1.483620446201638, + "grad_norm": 3.8103328697729224, + "learning_rate": 1.0662651118115702e-05, + "loss": 0.613, "step": 10507 }, { - "epoch": 2.2033969385615433, - "grad_norm": 3.528291660976985, - "learning_rate": 3.476002413313384e-06, - "loss": 0.1626, + "epoch": 1.483761649251624, + "grad_norm": 4.974678034431828, + "learning_rate": 1.0661130008133169e-05, + "loss": 0.7566, "step": 10508 }, { - "epoch": 2.203606626127071, - "grad_norm": 4.711893691774915, - "learning_rate": 3.4742868389144557e-06, - "loss": 0.1297, + "epoch": 1.4839028523016098, + "grad_norm": 3.9442237345619007, + "learning_rate": 1.065960888278626e-05, + "loss": 0.6382, "step": 10509 }, { - "epoch": 2.203816313692598, - "grad_norm": 4.735618789096599, - "learning_rate": 3.4725715989696385e-06, - "loss": 0.1687, + "epoch": 1.4840440553515957, + "grad_norm": 3.692718608417279, + "learning_rate": 1.0658087742110322e-05, + "loss": 0.6561, "step": 10510 }, { - "epoch": 2.2040260012581254, - "grad_norm": 3.0256162683806016, - "learning_rate": 3.4708566935668484e-06, - "loss": 0.087, + "epoch": 1.4841852584015816, + "grad_norm": 3.609423864713247, + "learning_rate": 1.065656658614071e-05, + "loss": 0.5949, "step": 10511 }, { - "epoch": 2.204235688823653, - "grad_norm": 4.038067414726757, - "learning_rate": 3.4691421227939804e-06, - "loss": 0.1382, + "epoch": 1.4843264614515674, + "grad_norm": 3.0310862451438916, + "learning_rate": 1.0655045414912777e-05, + "loss": 0.5409, "step": 10512 }, { - "epoch": 2.20444537638918, - "grad_norm": 2.663010938894181, - "learning_rate": 3.4674278867389043e-06, - "loss": 0.1009, + "epoch": 1.4844676645015533, + "grad_norm": 4.711655444936806, + "learning_rate": 1.0653524228461872e-05, + "loss": 0.6625, "step": 10513 }, { - "epoch": 2.2046550639547076, - "grad_norm": 3.522530704946384, - "learning_rate": 3.4657139854894774e-06, - "loss": 0.0998, + "epoch": 1.4846088675515392, + "grad_norm": 3.563533486167081, + "learning_rate": 1.0652003026823344e-05, + "loss": 0.5819, "step": 10514 }, { - "epoch": 2.2048647515202346, - "grad_norm": 4.30843232419637, - "learning_rate": 3.464000419133545e-06, - "loss": 0.1799, + "epoch": 1.484750070601525, + "grad_norm": 3.984902110079651, + "learning_rate": 1.0650481810032546e-05, + "loss": 0.6191, "step": 10515 }, { - "epoch": 2.205074439085762, - "grad_norm": 4.666999334763687, - "learning_rate": 3.462287187758924e-06, - "loss": 0.1556, + "epoch": 1.484891273651511, + "grad_norm": 3.981829053893209, + "learning_rate": 1.0648960578124831e-05, + "loss": 0.6829, "step": 10516 }, { - "epoch": 2.2052841266512897, - "grad_norm": 4.320642539329774, - "learning_rate": 3.460574291453427e-06, - "loss": 0.1501, + "epoch": 1.4850324767014969, + "grad_norm": 4.071376510192055, + "learning_rate": 1.0647439331135558e-05, + "loss": 0.6931, "step": 10517 }, { - "epoch": 2.205493814216817, - "grad_norm": 3.197882142366623, - "learning_rate": 3.458861730304839e-06, - "loss": 0.103, + "epoch": 1.4851736797514827, + "grad_norm": 3.5497722139733314, + "learning_rate": 1.064591806910007e-05, + "loss": 0.6207, "step": 10518 }, { - "epoch": 2.2057035017823443, - "grad_norm": 3.697010250738273, - "learning_rate": 3.4571495044009294e-06, - "loss": 0.0923, + "epoch": 1.4853148828014686, + "grad_norm": 3.362185798810174, + "learning_rate": 1.0644396792053726e-05, + "loss": 0.5832, "step": 10519 }, { - "epoch": 2.205913189347872, - "grad_norm": 3.679497328301629, - "learning_rate": 3.4554376138294553e-06, - "loss": 0.1173, + "epoch": 1.4854560858514545, + "grad_norm": 3.2524972429992722, + "learning_rate": 1.0642875500031878e-05, + "loss": 0.5468, "step": 10520 }, { - "epoch": 2.206122876913399, - "grad_norm": 6.508857305302051, - "learning_rate": 3.4537260586781587e-06, - "loss": 0.2256, + "epoch": 1.4855972889014404, + "grad_norm": 4.178699384834525, + "learning_rate": 1.0641354193069882e-05, + "loss": 0.6027, "step": 10521 }, { - "epoch": 2.2063325644789265, - "grad_norm": 3.863817772189926, - "learning_rate": 3.452014839034754e-06, - "loss": 0.1424, + "epoch": 1.4857384919514263, + "grad_norm": 3.1613355700054173, + "learning_rate": 1.0639832871203094e-05, + "loss": 0.5737, "step": 10522 }, { - "epoch": 2.2065422520444535, - "grad_norm": 3.972804207892561, - "learning_rate": 3.4503039549869433e-06, - "loss": 0.1384, + "epoch": 1.4858796950014121, + "grad_norm": 3.224470206750618, + "learning_rate": 1.0638311534466863e-05, + "loss": 0.4917, "step": 10523 }, { - "epoch": 2.206751939609981, - "grad_norm": 4.614506396101835, - "learning_rate": 3.448593406622419e-06, - "loss": 0.1612, + "epoch": 1.486020898051398, + "grad_norm": 3.4384606481890554, + "learning_rate": 1.0636790182896545e-05, + "loss": 0.5415, "step": 10524 }, { - "epoch": 2.2069616271755086, - "grad_norm": 3.161308821349727, - "learning_rate": 3.4468831940288404e-06, - "loss": 0.116, + "epoch": 1.486162101101384, + "grad_norm": 4.142255246534022, + "learning_rate": 1.0635268816527505e-05, + "loss": 0.5992, "step": 10525 }, { - "epoch": 2.2071713147410357, - "grad_norm": 3.1160466555212025, - "learning_rate": 3.4451733172938685e-06, - "loss": 0.1487, + "epoch": 1.4863033041513698, + "grad_norm": 3.152311809667922, + "learning_rate": 1.063374743539509e-05, + "loss": 0.4517, "step": 10526 }, { - "epoch": 2.207381002306563, - "grad_norm": 5.038137294236161, - "learning_rate": 3.443463776505133e-06, - "loss": 0.1304, + "epoch": 1.4864445072013557, + "grad_norm": 4.249334141775024, + "learning_rate": 1.0632226039534654e-05, + "loss": 0.6566, "step": 10527 }, { - "epoch": 2.2075906898720907, - "grad_norm": 4.611214281399228, - "learning_rate": 3.4417545717502466e-06, - "loss": 0.1523, + "epoch": 1.4865857102513416, + "grad_norm": 3.300371622912966, + "learning_rate": 1.0630704628981561e-05, + "loss": 0.4933, "step": 10528 }, { - "epoch": 2.207800377437618, - "grad_norm": 4.177045506310439, - "learning_rate": 3.4400457031168134e-06, - "loss": 0.1525, + "epoch": 1.4867269133013274, + "grad_norm": 3.5280982672157193, + "learning_rate": 1.0629183203771167e-05, + "loss": 0.6282, "step": 10529 }, { - "epoch": 2.2080100650031453, - "grad_norm": 5.7185468719057955, - "learning_rate": 3.4383371706924216e-06, - "loss": 0.1971, + "epoch": 1.486868116351313, + "grad_norm": 3.9558345275493707, + "learning_rate": 1.0627661763938824e-05, + "loss": 0.6994, "step": 10530 }, { - "epoch": 2.208219752568673, - "grad_norm": 4.612471219896324, - "learning_rate": 3.436628974564625e-06, - "loss": 0.1731, + "epoch": 1.487009319401299, + "grad_norm": 3.7516673550226156, + "learning_rate": 1.0626140309519892e-05, + "loss": 0.6037, "step": 10531 }, { - "epoch": 2.2084294401342, - "grad_norm": 4.713898468585529, - "learning_rate": 3.434921114820976e-06, - "loss": 0.1857, + "epoch": 1.4871505224512849, + "grad_norm": 3.4690088687965113, + "learning_rate": 1.0624618840549732e-05, + "loss": 0.5341, "step": 10532 }, { - "epoch": 2.2086391276997275, - "grad_norm": 4.558450999390657, - "learning_rate": 3.4332135915490106e-06, - "loss": 0.1291, + "epoch": 1.4872917255012708, + "grad_norm": 3.541605509417699, + "learning_rate": 1.0623097357063696e-05, + "loss": 0.5196, "step": 10533 }, { - "epoch": 2.208848815265255, - "grad_norm": 4.1623740809072025, - "learning_rate": 3.431506404836238e-06, - "loss": 0.1525, + "epoch": 1.4874329285512566, + "grad_norm": 2.7597660674418774, + "learning_rate": 1.0621575859097153e-05, + "loss": 0.4097, "step": 10534 }, { - "epoch": 2.209058502830782, - "grad_norm": 4.698715251956241, - "learning_rate": 3.4297995547701504e-06, - "loss": 0.166, + "epoch": 1.4875741316012425, + "grad_norm": 3.469382531640144, + "learning_rate": 1.0620054346685448e-05, + "loss": 0.5662, "step": 10535 }, { - "epoch": 2.2092681903963096, - "grad_norm": 3.3048108691664315, - "learning_rate": 3.4280930414382353e-06, - "loss": 0.12, + "epoch": 1.4877153346512284, + "grad_norm": 3.739270298136029, + "learning_rate": 1.0618532819863953e-05, + "loss": 0.704, "step": 10536 }, { - "epoch": 2.2094778779618367, - "grad_norm": 4.77923003217397, - "learning_rate": 3.4263868649279463e-06, - "loss": 0.1591, + "epoch": 1.4878565377012143, + "grad_norm": 3.858222073387154, + "learning_rate": 1.061701127866802e-05, + "loss": 0.6634, "step": 10537 }, { - "epoch": 2.2096875655273642, - "grad_norm": 3.681016843730481, - "learning_rate": 3.424681025326735e-06, - "loss": 0.1479, + "epoch": 1.4879977407512002, + "grad_norm": 3.8498646993296055, + "learning_rate": 1.0615489723133015e-05, + "loss": 0.6146, "step": 10538 }, { - "epoch": 2.2098972530928918, - "grad_norm": 5.848102685205805, - "learning_rate": 3.422975522722026e-06, - "loss": 0.1527, + "epoch": 1.488138943801186, + "grad_norm": 3.526163611950601, + "learning_rate": 1.0613968153294291e-05, + "loss": 0.6239, "step": 10539 }, { - "epoch": 2.210106940658419, - "grad_norm": 4.496715792620476, - "learning_rate": 3.421270357201225e-06, - "loss": 0.1321, + "epoch": 1.488280146851172, + "grad_norm": 3.53467241320696, + "learning_rate": 1.0612446569187214e-05, + "loss": 0.5695, "step": 10540 }, { - "epoch": 2.2103166282239464, - "grad_norm": 3.9315476034637364, - "learning_rate": 3.4195655288517295e-06, - "loss": 0.1293, + "epoch": 1.4884213499011578, + "grad_norm": 3.097588675485939, + "learning_rate": 1.061092497084714e-05, + "loss": 0.469, "step": 10541 }, { - "epoch": 2.2105263157894735, - "grad_norm": 6.1926348285561055, - "learning_rate": 3.4178610377609155e-06, - "loss": 0.1196, + "epoch": 1.4885625529511437, + "grad_norm": 2.934146884734023, + "learning_rate": 1.060940335830944e-05, + "loss": 0.3585, "step": 10542 }, { - "epoch": 2.210736003355001, - "grad_norm": 4.3186473056001375, - "learning_rate": 3.416156884016141e-06, - "loss": 0.1275, + "epoch": 1.4887037560011296, + "grad_norm": 3.501140792415015, + "learning_rate": 1.0607881731609464e-05, + "loss": 0.7052, "step": 10543 }, { - "epoch": 2.2109456909205285, - "grad_norm": 4.200387211734999, - "learning_rate": 3.4144530677047418e-06, - "loss": 0.1897, + "epoch": 1.4888449590511155, + "grad_norm": 3.521163004322675, + "learning_rate": 1.0606360090782578e-05, + "loss": 0.613, "step": 10544 }, { - "epoch": 2.2111553784860556, - "grad_norm": 4.444594369753747, - "learning_rate": 3.412749588914047e-06, - "loss": 0.1418, + "epoch": 1.4889861621011014, + "grad_norm": 3.584719252702429, + "learning_rate": 1.0604838435864148e-05, + "loss": 0.6691, "step": 10545 }, { - "epoch": 2.211365066051583, - "grad_norm": 6.035932093197452, - "learning_rate": 3.411046447731359e-06, - "loss": 0.1671, + "epoch": 1.4891273651510872, + "grad_norm": 3.5341102046622868, + "learning_rate": 1.0603316766889537e-05, + "loss": 0.4873, "step": 10546 }, { - "epoch": 2.2115747536171106, - "grad_norm": 4.597523340963421, - "learning_rate": 3.4093436442439697e-06, - "loss": 0.1614, + "epoch": 1.4892685682010731, + "grad_norm": 3.4630980682588586, + "learning_rate": 1.0601795083894099e-05, + "loss": 0.6669, "step": 10547 }, { - "epoch": 2.2117844411826377, - "grad_norm": 3.913425189278523, - "learning_rate": 3.407641178539148e-06, - "loss": 0.1421, + "epoch": 1.489409771251059, + "grad_norm": 3.870437461386357, + "learning_rate": 1.0600273386913207e-05, + "loss": 0.5526, "step": 10548 }, { - "epoch": 2.2119941287481653, - "grad_norm": 4.0555145765201805, - "learning_rate": 3.4059390507041523e-06, - "loss": 0.1326, + "epoch": 1.4895509743010449, + "grad_norm": 2.8343538965472956, + "learning_rate": 1.059875167598222e-05, + "loss": 0.4418, "step": 10549 }, { - "epoch": 2.212203816313693, - "grad_norm": 3.251623869958489, - "learning_rate": 3.4042372608262175e-06, - "loss": 0.1155, + "epoch": 1.4896921773510308, + "grad_norm": 4.138167662168074, + "learning_rate": 1.0597229951136498e-05, + "loss": 0.6783, "step": 10550 }, { - "epoch": 2.21241350387922, - "grad_norm": 4.3015527008423415, - "learning_rate": 3.40253580899256e-06, - "loss": 0.1508, + "epoch": 1.4898333804010166, + "grad_norm": 4.382081062055031, + "learning_rate": 1.0595708212411417e-05, + "loss": 0.7501, "step": 10551 }, { - "epoch": 2.2126231914447474, - "grad_norm": 4.599544889560929, - "learning_rate": 3.400834695290387e-06, - "loss": 0.1498, + "epoch": 1.4899745834510025, + "grad_norm": 2.7831376007897624, + "learning_rate": 1.0594186459842333e-05, + "loss": 0.4601, "step": 10552 }, { - "epoch": 2.212832879010275, - "grad_norm": 3.98689611312043, - "learning_rate": 3.3991339198068795e-06, - "loss": 0.1213, + "epoch": 1.4901157865009884, + "grad_norm": 3.090918659312495, + "learning_rate": 1.0592664693464608e-05, + "loss": 0.4574, "step": 10553 }, { - "epoch": 2.213042566575802, - "grad_norm": 3.44408713423122, - "learning_rate": 3.39743348262921e-06, - "loss": 0.1105, + "epoch": 1.4902569895509743, + "grad_norm": 3.9146949039389036, + "learning_rate": 1.0591142913313615e-05, + "loss": 0.6319, "step": 10554 }, { - "epoch": 2.2132522541413295, - "grad_norm": 4.2247241265191855, - "learning_rate": 3.395733383844526e-06, - "loss": 0.1448, + "epoch": 1.4903981926009602, + "grad_norm": 3.112110161441283, + "learning_rate": 1.0589621119424714e-05, + "loss": 0.4477, "step": 10555 }, { - "epoch": 2.2134619417068566, - "grad_norm": 4.779136395069259, - "learning_rate": 3.3940336235399574e-06, - "loss": 0.1761, + "epoch": 1.490539395650946, + "grad_norm": 3.3978813275955124, + "learning_rate": 1.0588099311833275e-05, + "loss": 0.5037, "step": 10556 }, { - "epoch": 2.213671629272384, - "grad_norm": 5.4207493341137685, - "learning_rate": 3.392334201802623e-06, - "loss": 0.2026, + "epoch": 1.490680598700932, + "grad_norm": 3.773390049071862, + "learning_rate": 1.0586577490574661e-05, + "loss": 0.6361, "step": 10557 }, { - "epoch": 2.2138813168379117, - "grad_norm": 4.301414001442894, - "learning_rate": 3.390635118719625e-06, - "loss": 0.1576, + "epoch": 1.4908218017509178, + "grad_norm": 3.06915715140273, + "learning_rate": 1.058505565568424e-05, + "loss": 0.5276, "step": 10558 }, { - "epoch": 2.2140910044034388, - "grad_norm": 3.338157722780891, - "learning_rate": 3.38893637437804e-06, - "loss": 0.1338, + "epoch": 1.4909630048009037, + "grad_norm": 4.021270209815351, + "learning_rate": 1.0583533807197377e-05, + "loss": 0.5874, "step": 10559 }, { - "epoch": 2.2143006919689663, - "grad_norm": 5.022263332220146, - "learning_rate": 3.387237968864929e-06, - "loss": 0.1937, + "epoch": 1.4911042078508896, + "grad_norm": 2.768638649273078, + "learning_rate": 1.058201194514944e-05, + "loss": 0.4758, "step": 10560 }, { - "epoch": 2.214510379534494, - "grad_norm": 3.1090608944638554, - "learning_rate": 3.385539902267344e-06, - "loss": 0.1124, + "epoch": 1.4912454109008755, + "grad_norm": 3.5384790708016354, + "learning_rate": 1.0580490069575795e-05, + "loss": 0.5344, "step": 10561 }, { - "epoch": 2.214720067100021, - "grad_norm": 5.215638916598176, - "learning_rate": 3.3838421746723083e-06, - "loss": 0.1619, + "epoch": 1.4913866139508614, + "grad_norm": 3.0696366877638446, + "learning_rate": 1.0578968180511815e-05, + "loss": 0.4356, "step": 10562 }, { - "epoch": 2.2149297546655484, - "grad_norm": 4.31591027115965, - "learning_rate": 3.38214478616684e-06, - "loss": 0.1137, + "epoch": 1.4915278170008472, + "grad_norm": 3.6301349671201004, + "learning_rate": 1.0577446277992866e-05, + "loss": 0.5945, "step": 10563 }, { - "epoch": 2.2151394422310755, - "grad_norm": 4.076780061126983, - "learning_rate": 3.3804477368379297e-06, - "loss": 0.1315, + "epoch": 1.4916690200508331, + "grad_norm": 2.794563585015319, + "learning_rate": 1.057592436205431e-05, + "loss": 0.476, "step": 10564 }, { - "epoch": 2.215349129796603, - "grad_norm": 3.4126418736210766, - "learning_rate": 3.3787510267725497e-06, - "loss": 0.1012, + "epoch": 1.491810223100819, + "grad_norm": 3.0788590557984254, + "learning_rate": 1.0574402432731523e-05, + "loss": 0.5144, "step": 10565 }, { - "epoch": 2.2155588173621306, - "grad_norm": 3.8985707238994496, - "learning_rate": 3.3770546560576644e-06, - "loss": 0.135, + "epoch": 1.491951426150805, + "grad_norm": 3.3632599451410687, + "learning_rate": 1.0572880490059874e-05, + "loss": 0.6204, "step": 10566 }, { - "epoch": 2.2157685049276576, - "grad_norm": 3.7325222353963996, - "learning_rate": 3.3753586247802204e-06, - "loss": 0.1133, + "epoch": 1.4920926292007908, + "grad_norm": 3.1286485872944927, + "learning_rate": 1.0571358534074724e-05, + "loss": 0.4443, "step": 10567 }, { - "epoch": 2.215978192493185, - "grad_norm": 3.578163677739487, - "learning_rate": 3.373662933027131e-06, - "loss": 0.1234, + "epoch": 1.4922338322507767, + "grad_norm": 3.8319576452970083, + "learning_rate": 1.0569836564811456e-05, + "loss": 0.5307, "step": 10568 }, { - "epoch": 2.2161878800587127, - "grad_norm": 4.0290058944375104, - "learning_rate": 3.3719675808853093e-06, - "loss": 0.1889, + "epoch": 1.4923750353007625, + "grad_norm": 3.4182815908947695, + "learning_rate": 1.0568314582305427e-05, + "loss": 0.5941, "step": 10569 }, { - "epoch": 2.21639756762424, - "grad_norm": 4.057417792827492, - "learning_rate": 3.3702725684416482e-06, - "loss": 0.1107, + "epoch": 1.4925162383507484, + "grad_norm": 3.333577685718195, + "learning_rate": 1.0566792586592012e-05, + "loss": 0.5171, "step": 10570 }, { - "epoch": 2.2166072551897673, - "grad_norm": 4.947652640492737, - "learning_rate": 3.368577895783016e-06, - "loss": 0.1604, + "epoch": 1.4926574414007343, + "grad_norm": 3.5826923222589113, + "learning_rate": 1.0565270577706584e-05, + "loss": 0.5951, "step": 10571 }, { - "epoch": 2.216816942755295, - "grad_norm": 3.4559309237423026, - "learning_rate": 3.3668835629962647e-06, - "loss": 0.1413, + "epoch": 1.4927986444507202, + "grad_norm": 3.606696533119227, + "learning_rate": 1.0563748555684511e-05, + "loss": 0.5473, "step": 10572 }, { - "epoch": 2.217026630320822, - "grad_norm": 3.5726464751194538, - "learning_rate": 3.365189570168239e-06, - "loss": 0.1414, + "epoch": 1.492939847500706, + "grad_norm": 4.272267248304502, + "learning_rate": 1.0562226520561165e-05, + "loss": 0.6211, "step": 10573 }, { - "epoch": 2.2172363178863495, - "grad_norm": 4.581596600241903, - "learning_rate": 3.3634959173857528e-06, - "loss": 0.1562, + "epoch": 1.493081050550692, + "grad_norm": 3.924272104515598, + "learning_rate": 1.0560704472371919e-05, + "loss": 0.6349, "step": 10574 }, { - "epoch": 2.2174460054518765, - "grad_norm": 3.0616292761040875, - "learning_rate": 3.3618026047356132e-06, - "loss": 0.0923, + "epoch": 1.4932222536006778, + "grad_norm": 4.814539056416804, + "learning_rate": 1.0559182411152142e-05, + "loss": 0.6481, "step": 10575 }, { - "epoch": 2.217655693017404, - "grad_norm": 4.443749971723422, - "learning_rate": 3.3601096323046045e-06, - "loss": 0.1367, + "epoch": 1.4933634566506637, + "grad_norm": 3.471217405949264, + "learning_rate": 1.0557660336937207e-05, + "loss": 0.5457, "step": 10576 }, { - "epoch": 2.2178653805829316, - "grad_norm": 3.7921328324508163, - "learning_rate": 3.3584170001794902e-06, - "loss": 0.1488, + "epoch": 1.4935046597006496, + "grad_norm": 3.3181901563755503, + "learning_rate": 1.0556138249762489e-05, + "loss": 0.558, "step": 10577 }, { - "epoch": 2.2180750681484587, - "grad_norm": 4.186439901959596, - "learning_rate": 3.356724708447023e-06, - "loss": 0.1617, + "epoch": 1.4936458627506355, + "grad_norm": 3.495875702061927, + "learning_rate": 1.0554616149663355e-05, + "loss": 0.5614, "step": 10578 }, { - "epoch": 2.218284755713986, - "grad_norm": 4.32495318969111, - "learning_rate": 3.355032757193941e-06, - "loss": 0.1233, + "epoch": 1.4937870658006214, + "grad_norm": 3.79344435268433, + "learning_rate": 1.0553094036675182e-05, + "loss": 0.6417, "step": 10579 }, { - "epoch": 2.2184944432795137, - "grad_norm": 3.4817229891839694, - "learning_rate": 3.3533411465069544e-06, - "loss": 0.1395, + "epoch": 1.4939282688506073, + "grad_norm": 3.5029799787392366, + "learning_rate": 1.0551571910833344e-05, + "loss": 0.5227, "step": 10580 }, { - "epoch": 2.218704130845041, - "grad_norm": 3.4824599062329344, - "learning_rate": 3.351649876472759e-06, - "loss": 0.1291, + "epoch": 1.494069471900593, + "grad_norm": 3.2333756950404147, + "learning_rate": 1.0550049772173212e-05, + "loss": 0.6126, "step": 10581 }, { - "epoch": 2.2189138184105683, - "grad_norm": 5.23394493837928, - "learning_rate": 3.3499589471780425e-06, - "loss": 0.1862, + "epoch": 1.4942106749505788, + "grad_norm": 3.186463549849212, + "learning_rate": 1.054852762073016e-05, + "loss": 0.548, "step": 10582 }, { - "epoch": 2.2191235059760954, - "grad_norm": 4.931904266411021, - "learning_rate": 3.34826835870946e-06, - "loss": 0.1384, + "epoch": 1.4943518780005647, + "grad_norm": 3.961973801370825, + "learning_rate": 1.0547005456539565e-05, + "loss": 0.632, "step": 10583 }, { - "epoch": 2.219333193541623, - "grad_norm": 4.8460089066395255, - "learning_rate": 3.3465781111536634e-06, - "loss": 0.1339, + "epoch": 1.4944930810505506, + "grad_norm": 3.4646340030379768, + "learning_rate": 1.0545483279636799e-05, + "loss": 0.5701, "step": 10584 }, { - "epoch": 2.2195428811071505, - "grad_norm": 5.723602856536666, - "learning_rate": 3.3448882045972797e-06, - "loss": 0.1603, + "epoch": 1.4946342841005364, + "grad_norm": 3.1896558800902497, + "learning_rate": 1.0543961090057237e-05, + "loss": 0.5411, "step": 10585 }, { - "epoch": 2.2197525686726776, - "grad_norm": 4.319926770022346, - "learning_rate": 3.3431986391269146e-06, - "loss": 0.1416, + "epoch": 1.4947754871505223, + "grad_norm": 3.1777557775516816, + "learning_rate": 1.0542438887836252e-05, + "loss": 0.4788, "step": 10586 }, { - "epoch": 2.219962256238205, - "grad_norm": 4.739797232011968, - "learning_rate": 3.3415094148291637e-06, - "loss": 0.1243, + "epoch": 1.4949166902005082, + "grad_norm": 3.3341202155699583, + "learning_rate": 1.0540916673009223e-05, + "loss": 0.517, "step": 10587 }, { - "epoch": 2.2201719438037326, - "grad_norm": 4.957718617779369, - "learning_rate": 3.339820531790612e-06, - "loss": 0.1232, + "epoch": 1.495057893250494, + "grad_norm": 3.65968268419147, + "learning_rate": 1.0539394445611524e-05, + "loss": 0.5962, "step": 10588 }, { - "epoch": 2.2203816313692597, - "grad_norm": 4.106930493258497, - "learning_rate": 3.3381319900978027e-06, - "loss": 0.1512, + "epoch": 1.49519909630048, + "grad_norm": 3.833366955029374, + "learning_rate": 1.0537872205678534e-05, + "loss": 0.5776, "step": 10589 }, { - "epoch": 2.2205913189347872, - "grad_norm": 4.673163328662491, - "learning_rate": 3.3364437898372826e-06, - "loss": 0.1529, + "epoch": 1.4953402993504659, + "grad_norm": 3.6204114151800395, + "learning_rate": 1.0536349953245622e-05, + "loss": 0.6622, "step": 10590 }, { - "epoch": 2.2208010065003148, - "grad_norm": 3.558039461232673, - "learning_rate": 3.334755931095579e-06, - "loss": 0.1312, + "epoch": 1.4954815024004517, + "grad_norm": 3.2003246043875304, + "learning_rate": 1.053482768834817e-05, + "loss": 0.5535, "step": 10591 }, { - "epoch": 2.221010694065842, - "grad_norm": 3.597685987223306, - "learning_rate": 3.333068413959195e-06, - "loss": 0.1353, + "epoch": 1.4956227054504376, + "grad_norm": 3.5617696632607303, + "learning_rate": 1.0533305411021555e-05, + "loss": 0.5234, "step": 10592 }, { - "epoch": 2.2212203816313694, - "grad_norm": 3.7332485875114365, - "learning_rate": 3.331381238514614e-06, - "loss": 0.1203, + "epoch": 1.4957639085004235, + "grad_norm": 3.2962863330580103, + "learning_rate": 1.053178312130115e-05, + "loss": 0.5383, "step": 10593 }, { - "epoch": 2.2214300691968965, - "grad_norm": 2.9904384411037235, - "learning_rate": 3.329694404848315e-06, - "loss": 0.1238, + "epoch": 1.4959051115504094, + "grad_norm": 3.6471945286242864, + "learning_rate": 1.0530260819222337e-05, + "loss": 0.5807, "step": 10594 }, { - "epoch": 2.221639756762424, - "grad_norm": 4.711471606219611, - "learning_rate": 3.3280079130467426e-06, - "loss": 0.1755, + "epoch": 1.4960463146003953, + "grad_norm": 4.782929583496178, + "learning_rate": 1.052873850482049e-05, + "loss": 0.8106, "step": 10595 }, { - "epoch": 2.2218494443279515, - "grad_norm": 3.8994048355169157, - "learning_rate": 3.3263217631963398e-06, - "loss": 0.1324, + "epoch": 1.4961875176503812, + "grad_norm": 3.2727013839818557, + "learning_rate": 1.0527216178130988e-05, + "loss": 0.5404, "step": 10596 }, { - "epoch": 2.2220591318934786, - "grad_norm": 3.235770309683167, - "learning_rate": 3.3246359553835217e-06, - "loss": 0.1164, + "epoch": 1.496328720700367, + "grad_norm": 3.069427279307464, + "learning_rate": 1.0525693839189215e-05, + "loss": 0.5051, "step": 10597 }, { - "epoch": 2.222268819459006, - "grad_norm": 4.662696884062969, - "learning_rate": 3.3229504896946852e-06, - "loss": 0.1802, + "epoch": 1.496469923750353, + "grad_norm": 4.166701709181684, + "learning_rate": 1.0524171488030537e-05, + "loss": 0.7346, "step": 10598 }, { - "epoch": 2.2224785070245336, - "grad_norm": 5.909160384507709, - "learning_rate": 3.3212653662162177e-06, - "loss": 0.2086, + "epoch": 1.4966111268003388, + "grad_norm": 3.5052762460286657, + "learning_rate": 1.0522649124690343e-05, + "loss": 0.6641, "step": 10599 }, { - "epoch": 2.2226881945900607, - "grad_norm": 4.3328417253334015, - "learning_rate": 3.3195805850344862e-06, - "loss": 0.1693, + "epoch": 1.4967523298503247, + "grad_norm": 3.707020070244611, + "learning_rate": 1.0521126749204009e-05, + "loss": 0.6915, "step": 10600 }, { - "epoch": 2.2228978821555883, - "grad_norm": 3.4605092190390185, - "learning_rate": 3.317896146235837e-06, - "loss": 0.1323, + "epoch": 1.4968935329003106, + "grad_norm": 2.9297796798064337, + "learning_rate": 1.0519604361606916e-05, + "loss": 0.4861, "step": 10601 }, { - "epoch": 2.2231075697211153, - "grad_norm": 5.048474557143465, - "learning_rate": 3.3162120499065964e-06, - "loss": 0.1673, + "epoch": 1.4970347359502965, + "grad_norm": 4.386256048133974, + "learning_rate": 1.051808196193444e-05, + "loss": 0.7643, "step": 10602 }, { - "epoch": 2.223317257286643, - "grad_norm": 3.977703827457185, - "learning_rate": 3.3145282961330836e-06, - "loss": 0.1661, + "epoch": 1.4971759390002823, + "grad_norm": 4.059441242684399, + "learning_rate": 1.0516559550221965e-05, + "loss": 0.6182, "step": 10603 }, { - "epoch": 2.2235269448521704, - "grad_norm": 4.570787231943582, - "learning_rate": 3.312844885001588e-06, - "loss": 0.1128, + "epoch": 1.4973171420502682, + "grad_norm": 3.2807785678543113, + "learning_rate": 1.0515037126504865e-05, + "loss": 0.5311, "step": 10604 }, { - "epoch": 2.2237366324176975, - "grad_norm": 3.4518611494357483, - "learning_rate": 3.3111618165983938e-06, - "loss": 0.1073, + "epoch": 1.497458345100254, + "grad_norm": 3.3128275430459504, + "learning_rate": 1.0513514690818529e-05, + "loss": 0.5556, "step": 10605 }, { - "epoch": 2.223946319983225, - "grad_norm": 4.767025805207989, - "learning_rate": 3.3094790910097573e-06, - "loss": 0.1815, + "epoch": 1.49759954815024, + "grad_norm": 3.1836646885535798, + "learning_rate": 1.0511992243198335e-05, + "loss": 0.5238, "step": 10606 }, { - "epoch": 2.2241560075487525, - "grad_norm": 4.5679070930550765, - "learning_rate": 3.3077967083219185e-06, - "loss": 0.1179, + "epoch": 1.4977407512002259, + "grad_norm": 3.1745483141096114, + "learning_rate": 1.0510469783679656e-05, + "loss": 0.6052, "step": 10607 }, { - "epoch": 2.2243656951142796, - "grad_norm": 4.9873069294838, - "learning_rate": 3.3061146686211054e-06, - "loss": 0.1342, + "epoch": 1.4978819542502118, + "grad_norm": 3.6747624398198466, + "learning_rate": 1.0508947312297884e-05, + "loss": 0.6404, "step": 10608 }, { - "epoch": 2.224575382679807, - "grad_norm": 3.4742804014925115, - "learning_rate": 3.3044329719935285e-06, - "loss": 0.1173, + "epoch": 1.4980231573001976, + "grad_norm": 3.645298779581951, + "learning_rate": 1.0507424829088394e-05, + "loss": 0.5757, "step": 10609 }, { - "epoch": 2.2247850702453347, - "grad_norm": 4.359742076413879, - "learning_rate": 3.302751618525375e-06, - "loss": 0.1705, + "epoch": 1.4981643603501835, + "grad_norm": 3.8629791611481843, + "learning_rate": 1.050590233408657e-05, + "loss": 0.6609, "step": 10610 }, { - "epoch": 2.2249947578108618, - "grad_norm": 4.19551151130385, - "learning_rate": 3.301070608302813e-06, - "loss": 0.1388, + "epoch": 1.4983055634001694, + "grad_norm": 3.441691936240074, + "learning_rate": 1.0504379827327798e-05, + "loss": 0.6546, "step": 10611 }, { - "epoch": 2.2252044453763893, - "grad_norm": 3.995780906942006, - "learning_rate": 3.2993899414120044e-06, - "loss": 0.1428, + "epoch": 1.4984467664501553, + "grad_norm": 3.6738136341037984, + "learning_rate": 1.0502857308847453e-05, + "loss": 0.4493, "step": 10612 }, { - "epoch": 2.2254141329419164, - "grad_norm": 4.6427190042705035, - "learning_rate": 3.2977096179390832e-06, - "loss": 0.1374, + "epoch": 1.4985879695001412, + "grad_norm": 3.5991797639020806, + "learning_rate": 1.050133477868092e-05, + "loss": 0.5777, "step": 10613 }, { - "epoch": 2.225623820507444, - "grad_norm": 4.706071616326585, - "learning_rate": 3.2960296379701662e-06, - "loss": 0.1438, + "epoch": 1.498729172550127, + "grad_norm": 3.0655042448320757, + "learning_rate": 1.0499812236863589e-05, + "loss": 0.5081, "step": 10614 }, { - "epoch": 2.2258335080729714, - "grad_norm": 3.0932886108939606, - "learning_rate": 3.2943500015913564e-06, - "loss": 0.1142, + "epoch": 1.498870375600113, + "grad_norm": 4.561699371880219, + "learning_rate": 1.0498289683430831e-05, + "loss": 0.5682, "step": 10615 }, { - "epoch": 2.2260431956384985, - "grad_norm": 4.460160031520024, - "learning_rate": 3.2926707088887433e-06, - "loss": 0.1673, + "epoch": 1.4990115786500988, + "grad_norm": 3.3813446997734298, + "learning_rate": 1.049676711841804e-05, + "loss": 0.5261, "step": 10616 }, { - "epoch": 2.226252883204026, - "grad_norm": 3.8306100276362125, - "learning_rate": 3.29099175994839e-06, - "loss": 0.136, + "epoch": 1.4991527817000847, + "grad_norm": 3.3080989600267374, + "learning_rate": 1.0495244541860596e-05, + "loss": 0.5053, "step": 10617 }, { - "epoch": 2.2264625707695536, - "grad_norm": 4.628461826191929, - "learning_rate": 3.2893131548563406e-06, - "loss": 0.1868, + "epoch": 1.4992939847500706, + "grad_norm": 2.665709672655359, + "learning_rate": 1.0493721953793881e-05, + "loss": 0.418, "step": 10618 }, { - "epoch": 2.2266722583350806, - "grad_norm": 4.758077852478928, - "learning_rate": 3.2876348936986357e-06, - "loss": 0.1503, + "epoch": 1.4994351878000565, + "grad_norm": 3.827623321147945, + "learning_rate": 1.0492199354253283e-05, + "loss": 0.6225, "step": 10619 }, { - "epoch": 2.226881945900608, - "grad_norm": 3.6317435033830887, - "learning_rate": 3.2859569765612798e-06, - "loss": 0.1525, + "epoch": 1.4995763908500424, + "grad_norm": 3.7877647057735744, + "learning_rate": 1.0490676743274181e-05, + "loss": 0.6504, "step": 10620 }, { - "epoch": 2.2270916334661353, - "grad_norm": 4.278099658006646, - "learning_rate": 3.284279403530278e-06, - "loss": 0.1495, + "epoch": 1.4997175939000282, + "grad_norm": 3.230238534325567, + "learning_rate": 1.0489154120891965e-05, + "loss": 0.5249, "step": 10621 }, { - "epoch": 2.227301321031663, - "grad_norm": 4.312772193620942, - "learning_rate": 3.2826021746916047e-06, - "loss": 0.1593, + "epoch": 1.4998587969500141, + "grad_norm": 3.392045831094418, + "learning_rate": 1.0487631487142018e-05, + "loss": 0.5686, "step": 10622 }, { - "epoch": 2.2275110085971903, - "grad_norm": 4.6337431699371505, - "learning_rate": 3.280925290131217e-06, - "loss": 0.1677, + "epoch": 1.5, + "grad_norm": 2.857943237891044, + "learning_rate": 1.048610884205973e-05, + "loss": 0.4656, "step": 10623 }, { - "epoch": 2.2277206961627174, - "grad_norm": 3.6948679268208875, - "learning_rate": 3.279248749935062e-06, - "loss": 0.1423, + "epoch": 1.5001412030499859, + "grad_norm": 4.6928547385105635, + "learning_rate": 1.0484586185680477e-05, + "loss": 0.7959, "step": 10624 }, { - "epoch": 2.227930383728245, - "grad_norm": 4.177971836722947, - "learning_rate": 3.277572554189069e-06, - "loss": 0.1435, + "epoch": 1.5002824060999718, + "grad_norm": 3.423783846338795, + "learning_rate": 1.0483063518039653e-05, + "loss": 0.5076, "step": 10625 }, { - "epoch": 2.2281400712937725, - "grad_norm": 4.301396948760756, - "learning_rate": 3.2758967029791412e-06, - "loss": 0.1412, + "epoch": 1.5004236091499576, + "grad_norm": 3.7261336177651576, + "learning_rate": 1.0481540839172641e-05, + "loss": 0.5955, "step": 10626 }, { - "epoch": 2.2283497588592995, - "grad_norm": 3.4752779585961453, - "learning_rate": 3.2742211963911676e-06, - "loss": 0.1206, + "epoch": 1.5005648121999435, + "grad_norm": 3.9315293077384283, + "learning_rate": 1.0480018149114828e-05, + "loss": 0.6742, "step": 10627 }, { - "epoch": 2.228559446424827, - "grad_norm": 5.306285696757676, - "learning_rate": 3.272546034511025e-06, - "loss": 0.1862, + "epoch": 1.5007060152499294, + "grad_norm": 3.9276241539481656, + "learning_rate": 1.04784954479016e-05, + "loss": 0.6066, "step": 10628 }, { - "epoch": 2.2287691339903546, - "grad_norm": 4.743314596531981, - "learning_rate": 3.270871217424567e-06, - "loss": 0.1337, + "epoch": 1.5008472182999153, + "grad_norm": 3.9673050975905384, + "learning_rate": 1.0476972735568348e-05, + "loss": 0.6024, "step": 10629 }, { - "epoch": 2.2289788215558817, - "grad_norm": 3.4118717446034785, - "learning_rate": 3.2691967452176264e-06, - "loss": 0.1165, + "epoch": 1.5009884213499012, + "grad_norm": 3.8060648418372462, + "learning_rate": 1.0475450012150447e-05, + "loss": 0.6438, "step": 10630 }, { - "epoch": 2.229188509121409, - "grad_norm": 4.1969270153552145, - "learning_rate": 3.26752261797603e-06, - "loss": 0.1285, + "epoch": 1.501129624399887, + "grad_norm": 3.7506308333558116, + "learning_rate": 1.0473927277683303e-05, + "loss": 0.6007, "step": 10631 }, { - "epoch": 2.2293981966869363, - "grad_norm": 5.0494130550202465, - "learning_rate": 3.2658488357855734e-06, - "loss": 0.1964, + "epoch": 1.501270827449873, + "grad_norm": 3.3662130918335054, + "learning_rate": 1.0472404532202289e-05, + "loss": 0.6333, "step": 10632 }, { - "epoch": 2.229607884252464, - "grad_norm": 4.371426353209292, - "learning_rate": 3.264175398732047e-06, - "loss": 0.1615, + "epoch": 1.5014120304998588, + "grad_norm": 3.2741674350262664, + "learning_rate": 1.0470881775742797e-05, + "loss": 0.6102, "step": 10633 }, { - "epoch": 2.2298175718179913, - "grad_norm": 6.1462048238815745, - "learning_rate": 3.2625023069012128e-06, - "loss": 0.1795, + "epoch": 1.5015532335498447, + "grad_norm": 4.414556425691292, + "learning_rate": 1.0469359008340216e-05, + "loss": 0.5617, "step": 10634 }, { - "epoch": 2.2300272593835184, - "grad_norm": 4.283301703261908, - "learning_rate": 3.2608295603788197e-06, - "loss": 0.1465, + "epoch": 1.5016944365998306, + "grad_norm": 3.475918923481075, + "learning_rate": 1.0467836230029935e-05, + "loss": 0.6183, "step": 10635 }, { - "epoch": 2.230236946949046, - "grad_norm": 3.879640179953613, - "learning_rate": 3.2591571592505987e-06, - "loss": 0.121, + "epoch": 1.5018356396498165, + "grad_norm": 2.85522448221672, + "learning_rate": 1.0466313440847343e-05, + "loss": 0.4082, "step": 10636 }, { - "epoch": 2.2304466345145735, - "grad_norm": 4.589213017893935, - "learning_rate": 3.2574851036022692e-06, - "loss": 0.1549, + "epoch": 1.5019768426998024, + "grad_norm": 3.1537581448977745, + "learning_rate": 1.0464790640827827e-05, + "loss": 0.5418, "step": 10637 }, { - "epoch": 2.2306563220801006, - "grad_norm": 3.506850279898489, - "learning_rate": 3.2558133935195213e-06, - "loss": 0.1075, + "epoch": 1.5021180457497882, + "grad_norm": 3.072649167395925, + "learning_rate": 1.0463267830006779e-05, + "loss": 0.5372, "step": 10638 }, { - "epoch": 2.230866009645628, - "grad_norm": 4.528415173616114, - "learning_rate": 3.254142029088031e-06, - "loss": 0.1488, + "epoch": 1.5022592487997741, + "grad_norm": 3.441337696152202, + "learning_rate": 1.0461745008419582e-05, + "loss": 0.6349, "step": 10639 }, { - "epoch": 2.231075697211155, - "grad_norm": 4.022609724380287, - "learning_rate": 3.2524710103934653e-06, - "loss": 0.1401, + "epoch": 1.50240045184976, + "grad_norm": 3.7221162697217087, + "learning_rate": 1.0460222176101635e-05, + "loss": 0.5879, "step": 10640 }, { - "epoch": 2.2312853847766827, - "grad_norm": 6.533516023039647, - "learning_rate": 3.2508003375214603e-06, - "loss": 0.2036, + "epoch": 1.502541654899746, + "grad_norm": 3.4432384111094416, + "learning_rate": 1.045869933308832e-05, + "loss": 0.4924, "step": 10641 }, { - "epoch": 2.2314950723422102, - "grad_norm": 5.131158955943327, - "learning_rate": 3.2491300105576473e-06, - "loss": 0.1578, + "epoch": 1.5026828579497318, + "grad_norm": 2.974841552635477, + "learning_rate": 1.0457176479415034e-05, + "loss": 0.4738, "step": 10642 }, { - "epoch": 2.2317047599077373, - "grad_norm": 5.013241872507247, - "learning_rate": 3.2474600295876304e-06, - "loss": 0.1583, + "epoch": 1.5028240609997177, + "grad_norm": 3.1311169313746827, + "learning_rate": 1.0455653615117163e-05, + "loss": 0.4812, "step": 10643 }, { - "epoch": 2.231914447473265, - "grad_norm": 5.7839247309075725, - "learning_rate": 3.2457903946969938e-06, - "loss": 0.1961, + "epoch": 1.5029652640497035, + "grad_norm": 3.3526803486148715, + "learning_rate": 1.0454130740230098e-05, + "loss": 0.541, "step": 10644 }, { - "epoch": 2.2321241350387924, - "grad_norm": 4.152706619445699, - "learning_rate": 3.2441211059713153e-06, - "loss": 0.1544, + "epoch": 1.5031064670996894, + "grad_norm": 4.206735411906454, + "learning_rate": 1.0452607854789231e-05, + "loss": 0.5845, "step": 10645 }, { - "epoch": 2.2323338226043195, - "grad_norm": 5.0144134919604015, - "learning_rate": 3.2424521634961535e-06, - "loss": 0.1394, + "epoch": 1.5032476701496753, + "grad_norm": 4.176821995496634, + "learning_rate": 1.0451084958829953e-05, + "loss": 0.5541, "step": 10646 }, { - "epoch": 2.232543510169847, - "grad_norm": 4.2678485023080945, - "learning_rate": 3.240783567357033e-06, - "loss": 0.1653, + "epoch": 1.5033888731996612, + "grad_norm": 2.9943205452987294, + "learning_rate": 1.0449562052387655e-05, + "loss": 0.4235, "step": 10647 }, { - "epoch": 2.2327531977353745, - "grad_norm": 4.379278766162334, - "learning_rate": 3.2391153176394773e-06, - "loss": 0.1685, + "epoch": 1.503530076249647, + "grad_norm": 4.077517394534269, + "learning_rate": 1.0448039135497732e-05, + "loss": 0.6591, "step": 10648 }, { - "epoch": 2.2329628853009016, - "grad_norm": 4.962994492594998, - "learning_rate": 3.2374474144289912e-06, - "loss": 0.1755, + "epoch": 1.503671279299633, + "grad_norm": 4.988747574571694, + "learning_rate": 1.044651620819557e-05, + "loss": 0.7839, "step": 10649 }, { - "epoch": 2.233172572866429, - "grad_norm": 3.531747805190829, - "learning_rate": 3.2357798578110545e-06, - "loss": 0.0953, + "epoch": 1.5038124823496188, + "grad_norm": 3.2462356871546465, + "learning_rate": 1.0444993270516562e-05, + "loss": 0.5089, "step": 10650 }, { - "epoch": 2.233382260431956, - "grad_norm": 3.689540673813028, - "learning_rate": 3.2341126478711284e-06, - "loss": 0.1353, + "epoch": 1.5039536853996047, + "grad_norm": 3.7188883245116844, + "learning_rate": 1.0443470322496106e-05, + "loss": 0.5665, "step": 10651 }, { - "epoch": 2.2335919479974837, - "grad_norm": 4.279537920929235, - "learning_rate": 3.2324457846946668e-06, - "loss": 0.1679, + "epoch": 1.5040948884495906, + "grad_norm": 3.4114700290368454, + "learning_rate": 1.044194736416959e-05, + "loss": 0.5231, "step": 10652 }, { - "epoch": 2.2338016355630113, - "grad_norm": 4.114438663815979, - "learning_rate": 3.2307792683670937e-06, - "loss": 0.1606, + "epoch": 1.5042360914995765, + "grad_norm": 3.701160540621693, + "learning_rate": 1.0440424395572408e-05, + "loss": 0.5086, "step": 10653 }, { - "epoch": 2.2340113231285383, - "grad_norm": 4.056885208687246, - "learning_rate": 3.229113098973826e-06, - "loss": 0.1572, + "epoch": 1.5043772945495624, + "grad_norm": 3.474808920520696, + "learning_rate": 1.0438901416739955e-05, + "loss": 0.5573, "step": 10654 }, { - "epoch": 2.234221010694066, - "grad_norm": 4.116459934672251, - "learning_rate": 3.2274472766002573e-06, - "loss": 0.1246, + "epoch": 1.5045184975995483, + "grad_norm": 3.6405132407606033, + "learning_rate": 1.0437378427707622e-05, + "loss": 0.5435, "step": 10655 }, { - "epoch": 2.2344306982595934, - "grad_norm": 4.718193127325923, - "learning_rate": 3.225781801331758e-06, - "loss": 0.1309, + "epoch": 1.5046597006495341, + "grad_norm": 3.9930381232853565, + "learning_rate": 1.04358554285108e-05, + "loss": 0.6188, "step": 10656 }, { - "epoch": 2.2346403858251205, - "grad_norm": 3.687084362985051, - "learning_rate": 3.2241166732536898e-06, - "loss": 0.1391, + "epoch": 1.50480090369952, + "grad_norm": 3.308892594305775, + "learning_rate": 1.0434332419184891e-05, + "loss": 0.506, "step": 10657 }, { - "epoch": 2.234850073390648, - "grad_norm": 6.079697287451632, - "learning_rate": 3.2224518924513983e-06, - "loss": 0.1869, + "epoch": 1.504942106749506, + "grad_norm": 3.5007520349967893, + "learning_rate": 1.0432809399765281e-05, + "loss": 0.5797, "step": 10658 }, { - "epoch": 2.235059760956175, - "grad_norm": 3.749754526130742, - "learning_rate": 3.220787459010203e-06, - "loss": 0.1584, + "epoch": 1.5050833097994918, + "grad_norm": 3.4569411015149925, + "learning_rate": 1.0431286370287368e-05, + "loss": 0.5283, "step": 10659 }, { - "epoch": 2.2352694485217026, - "grad_norm": 3.726670840675818, - "learning_rate": 3.2191233730154035e-06, - "loss": 0.1347, + "epoch": 1.5052245128494777, + "grad_norm": 3.7894744553181847, + "learning_rate": 1.0429763330786546e-05, + "loss": 0.5729, "step": 10660 }, { - "epoch": 2.23547913608723, - "grad_norm": 3.8463145648327997, - "learning_rate": 3.217459634552297e-06, - "loss": 0.1429, + "epoch": 1.5053657158994636, + "grad_norm": 3.064652749060904, + "learning_rate": 1.042824028129821e-05, + "loss": 0.4497, "step": 10661 }, { - "epoch": 2.2356888236527572, - "grad_norm": 3.6735594308711645, - "learning_rate": 3.215796243706143e-06, - "loss": 0.133, + "epoch": 1.5055069189494494, + "grad_norm": 4.126195711916415, + "learning_rate": 1.0426717221857756e-05, + "loss": 0.6448, "step": 10662 }, { - "epoch": 2.2358985112182848, - "grad_norm": 5.159506808179413, - "learning_rate": 3.214133200562203e-06, - "loss": 0.1444, + "epoch": 1.5056481219994353, + "grad_norm": 3.454300648568431, + "learning_rate": 1.0425194152500578e-05, + "loss": 0.4972, "step": 10663 }, { - "epoch": 2.2361081987838123, - "grad_norm": 4.251251132308287, - "learning_rate": 3.2124705052057047e-06, - "loss": 0.1237, + "epoch": 1.5057893250494212, + "grad_norm": 3.5344410392032835, + "learning_rate": 1.0423671073262067e-05, + "loss": 0.6495, "step": 10664 }, { - "epoch": 2.2363178863493394, - "grad_norm": 4.128294289381434, - "learning_rate": 3.210808157721863e-06, - "loss": 0.1304, + "epoch": 1.505930528099407, + "grad_norm": 3.1513981312978427, + "learning_rate": 1.042214798417763e-05, + "loss": 0.5719, "step": 10665 }, { - "epoch": 2.236527573914867, - "grad_norm": 3.9921345576603606, - "learning_rate": 3.209146158195877e-06, - "loss": 0.1681, + "epoch": 1.506071731149393, + "grad_norm": 4.478435747627044, + "learning_rate": 1.0420624885282653e-05, + "loss": 0.6894, "step": 10666 }, { - "epoch": 2.2367372614803944, - "grad_norm": 3.380505274633623, - "learning_rate": 3.2074845067129367e-06, - "loss": 0.1082, + "epoch": 1.5062129341993789, + "grad_norm": 3.505095033365735, + "learning_rate": 1.0419101776612533e-05, + "loss": 0.4854, "step": 10667 }, { - "epoch": 2.2369469490459215, - "grad_norm": 5.5600830677914574, - "learning_rate": 3.2058232033581893e-06, - "loss": 0.1861, + "epoch": 1.5063541372493647, + "grad_norm": 3.8911004716751876, + "learning_rate": 1.0417578658202672e-05, + "loss": 0.6173, "step": 10668 }, { - "epoch": 2.237156636611449, - "grad_norm": 3.9523289763903477, - "learning_rate": 3.2041622482167867e-06, - "loss": 0.1567, + "epoch": 1.5064953402993506, + "grad_norm": 4.137518356080205, + "learning_rate": 1.0416055530088462e-05, + "loss": 0.6059, "step": 10669 }, { - "epoch": 2.237366324176976, - "grad_norm": 3.765372948501272, - "learning_rate": 3.202501641373855e-06, - "loss": 0.1644, + "epoch": 1.5066365433493365, + "grad_norm": 3.663486816786475, + "learning_rate": 1.0414532392305301e-05, + "loss": 0.6695, "step": 10670 }, { - "epoch": 2.2375760117425036, - "grad_norm": 4.350471809551431, - "learning_rate": 3.2008413829145113e-06, - "loss": 0.1074, + "epoch": 1.5067777463993224, + "grad_norm": 3.7672204183016205, + "learning_rate": 1.0413009244888589e-05, + "loss": 0.685, "step": 10671 }, { - "epoch": 2.237785699308031, - "grad_norm": 4.640865153442866, - "learning_rate": 3.1991814729238325e-06, - "loss": 0.1933, + "epoch": 1.5069189494493083, + "grad_norm": 2.6557741288718515, + "learning_rate": 1.0411486087873717e-05, + "loss": 0.4203, "step": 10672 }, { - "epoch": 2.2379953868735583, - "grad_norm": 3.9901887373618057, - "learning_rate": 3.1975219114868994e-06, - "loss": 0.1542, + "epoch": 1.5070601524992941, + "grad_norm": 3.7998675235143735, + "learning_rate": 1.0409962921296086e-05, + "loss": 0.4974, "step": 10673 }, { - "epoch": 2.238205074439086, - "grad_norm": 3.4396244096590936, - "learning_rate": 3.195862698688769e-06, - "loss": 0.131, + "epoch": 1.50720135554928, + "grad_norm": 3.547406605501481, + "learning_rate": 1.0408439745191096e-05, + "loss": 0.5546, "step": 10674 }, { - "epoch": 2.2384147620046133, - "grad_norm": 3.939392748844276, - "learning_rate": 3.1942038346144766e-06, - "loss": 0.167, + "epoch": 1.5073425585992657, + "grad_norm": 3.7321021220746395, + "learning_rate": 1.040691655959414e-05, + "loss": 0.6021, "step": 10675 }, { - "epoch": 2.2386244495701404, - "grad_norm": 4.83238479485785, - "learning_rate": 3.1925453193490387e-06, - "loss": 0.1666, + "epoch": 1.5074837616492516, + "grad_norm": 2.842238427103037, + "learning_rate": 1.0405393364540618e-05, + "loss": 0.4843, "step": 10676 }, { - "epoch": 2.238834137135668, - "grad_norm": 4.3705087586477855, - "learning_rate": 3.1908871529774623e-06, - "loss": 0.1569, + "epoch": 1.5076249646992375, + "grad_norm": 3.934729011705652, + "learning_rate": 1.0403870160065934e-05, + "loss": 0.5095, "step": 10677 }, { - "epoch": 2.239043824701195, - "grad_norm": 3.4111072186848514, - "learning_rate": 3.189229335584727e-06, - "loss": 0.1538, + "epoch": 1.5077661677492233, + "grad_norm": 3.758741606697827, + "learning_rate": 1.040234694620548e-05, + "loss": 0.6459, "step": 10678 }, { - "epoch": 2.2392535122667225, - "grad_norm": 3.298364960160477, - "learning_rate": 3.187571867255803e-06, - "loss": 0.1087, + "epoch": 1.5079073707992092, + "grad_norm": 3.095702712973342, + "learning_rate": 1.0400823722994657e-05, + "loss": 0.4836, "step": 10679 }, { - "epoch": 2.23946319983225, - "grad_norm": 4.038776106475902, - "learning_rate": 3.185914748075638e-06, - "loss": 0.1476, + "epoch": 1.508048573849195, + "grad_norm": 3.4551705566821176, + "learning_rate": 1.0399300490468862e-05, + "loss": 0.543, "step": 10680 }, { - "epoch": 2.239672887397777, - "grad_norm": 3.9246584661438995, - "learning_rate": 3.184257978129156e-06, - "loss": 0.1407, + "epoch": 1.508189776899181, + "grad_norm": 2.9819411028647838, + "learning_rate": 1.0397777248663497e-05, + "loss": 0.4668, "step": 10681 }, { - "epoch": 2.2398825749633047, - "grad_norm": 3.7005412097884887, - "learning_rate": 3.182601557501275e-06, - "loss": 0.0989, + "epoch": 1.5083309799491669, + "grad_norm": 3.7547037972937685, + "learning_rate": 1.0396253997613964e-05, + "loss": 0.5888, "step": 10682 }, { - "epoch": 2.240092262528832, - "grad_norm": 4.557648386050514, - "learning_rate": 3.1809454862768908e-06, - "loss": 0.1641, + "epoch": 1.5084721829991528, + "grad_norm": 2.965033880619661, + "learning_rate": 1.0394730737355655e-05, + "loss": 0.4888, "step": 10683 }, { - "epoch": 2.2403019500943593, - "grad_norm": 3.5916031962341264, - "learning_rate": 3.1792897645408783e-06, - "loss": 0.1467, + "epoch": 1.5086133860491386, + "grad_norm": 3.455883602796061, + "learning_rate": 1.0393207467923973e-05, + "loss": 0.616, "step": 10684 }, { - "epoch": 2.240511637659887, - "grad_norm": 3.225484030173698, - "learning_rate": 3.1776343923780915e-06, - "loss": 0.1346, + "epoch": 1.5087545890991245, + "grad_norm": 4.266089654990721, + "learning_rate": 1.039168418935432e-05, + "loss": 0.7192, "step": 10685 }, { - "epoch": 2.2407213252254143, - "grad_norm": 3.618816861698131, - "learning_rate": 3.1759793698733776e-06, - "loss": 0.119, + "epoch": 1.5088957921491104, + "grad_norm": 3.8291436483811156, + "learning_rate": 1.03901609016821e-05, + "loss": 0.5379, "step": 10686 }, { - "epoch": 2.2409310127909414, - "grad_norm": 4.201344782604311, - "learning_rate": 3.1743246971115538e-06, - "loss": 0.1371, + "epoch": 1.5090369951990963, + "grad_norm": 4.250646272389876, + "learning_rate": 1.0388637604942707e-05, + "loss": 0.6041, "step": 10687 }, { - "epoch": 2.241140700356469, - "grad_norm": 4.535607229484524, - "learning_rate": 3.172670374177431e-06, - "loss": 0.1626, + "epoch": 1.5091781982490822, + "grad_norm": 3.3577096684394387, + "learning_rate": 1.0387114299171541e-05, + "loss": 0.539, "step": 10688 }, { - "epoch": 2.241350387921996, - "grad_norm": 4.2331814957837315, - "learning_rate": 3.171016401155793e-06, - "loss": 0.1611, + "epoch": 1.509319401299068, + "grad_norm": 3.39162793233433, + "learning_rate": 1.0385590984404009e-05, + "loss": 0.4743, "step": 10689 }, { - "epoch": 2.2415600754875236, - "grad_norm": 3.5263875962958005, - "learning_rate": 3.169362778131406e-06, - "loss": 0.1291, + "epoch": 1.509460604349054, + "grad_norm": 3.740006335034745, + "learning_rate": 1.0384067660675508e-05, + "loss": 0.6054, "step": 10690 }, { - "epoch": 2.241769763053051, - "grad_norm": 4.516105193828841, - "learning_rate": 3.167709505189027e-06, - "loss": 0.1552, + "epoch": 1.5096018073990398, + "grad_norm": 4.470842556156996, + "learning_rate": 1.038254432802144e-05, + "loss": 0.8593, "step": 10691 }, { - "epoch": 2.241979450618578, - "grad_norm": 3.6479127601514203, - "learning_rate": 3.166056582413385e-06, - "loss": 0.1224, + "epoch": 1.5097430104490257, + "grad_norm": 4.002830596222397, + "learning_rate": 1.0381020986477209e-05, + "loss": 0.6929, "step": 10692 }, { - "epoch": 2.2421891381841057, - "grad_norm": 4.652470713467641, - "learning_rate": 3.1644040098891938e-06, - "loss": 0.1397, + "epoch": 1.5098842134990116, + "grad_norm": 3.9615154186449586, + "learning_rate": 1.037949763607821e-05, + "loss": 0.5939, "step": 10693 }, { - "epoch": 2.2423988257496332, - "grad_norm": 4.04336450339858, - "learning_rate": 3.1627517877011516e-06, - "loss": 0.1552, + "epoch": 1.5100254165489975, + "grad_norm": 3.3118922781390547, + "learning_rate": 1.0377974276859853e-05, + "loss": 0.5474, "step": 10694 }, { - "epoch": 2.2426085133151603, - "grad_norm": 4.304574538521102, - "learning_rate": 3.1610999159339427e-06, - "loss": 0.143, + "epoch": 1.5101666195989834, + "grad_norm": 4.122721246407766, + "learning_rate": 1.0376450908857538e-05, + "loss": 0.7326, "step": 10695 }, { - "epoch": 2.242818200880688, - "grad_norm": 4.5750593121446155, - "learning_rate": 3.1594483946722254e-06, - "loss": 0.1919, + "epoch": 1.5103078226489692, + "grad_norm": 4.663151265065283, + "learning_rate": 1.0374927532106667e-05, + "loss": 0.5862, "step": 10696 }, { - "epoch": 2.243027888446215, - "grad_norm": 3.8149046175335286, - "learning_rate": 3.1577972240006384e-06, - "loss": 0.1427, + "epoch": 1.5104490256989551, + "grad_norm": 2.929039467369949, + "learning_rate": 1.0373404146642639e-05, + "loss": 0.4794, "step": 10697 }, { - "epoch": 2.2432375760117425, - "grad_norm": 3.4402778063016735, - "learning_rate": 3.1561464040038137e-06, - "loss": 0.1197, + "epoch": 1.510590228748941, + "grad_norm": 3.4532104361936646, + "learning_rate": 1.0371880752500862e-05, + "loss": 0.5628, "step": 10698 }, { - "epoch": 2.24344726357727, - "grad_norm": 5.4428125420926445, - "learning_rate": 3.154495934766353e-06, - "loss": 0.1883, + "epoch": 1.5107314317989269, + "grad_norm": 2.9490265843775254, + "learning_rate": 1.0370357349716738e-05, + "loss": 0.5863, "step": 10699 }, { - "epoch": 2.243656951142797, - "grad_norm": 4.041442735823655, - "learning_rate": 3.1528458163728515e-06, - "loss": 0.1231, + "epoch": 1.5108726348489128, + "grad_norm": 4.242329889155228, + "learning_rate": 1.0368833938325667e-05, + "loss": 0.808, "step": 10700 }, { - "epoch": 2.2438666387083246, - "grad_norm": 4.598540097774539, - "learning_rate": 3.1511960489078775e-06, - "loss": 0.1979, + "epoch": 1.5110138378988986, + "grad_norm": 3.3805513941509315, + "learning_rate": 1.0367310518363051e-05, + "loss": 0.4878, "step": 10701 }, { - "epoch": 2.244076326273852, - "grad_norm": 3.6264224777376746, - "learning_rate": 3.149546632455982e-06, - "loss": 0.1357, + "epoch": 1.5111550409488845, + "grad_norm": 3.186667493806593, + "learning_rate": 1.0365787089864303e-05, + "loss": 0.5358, "step": 10702 }, { - "epoch": 2.244286013839379, - "grad_norm": 4.407547328787699, - "learning_rate": 3.147897567101703e-06, - "loss": 0.1559, + "epoch": 1.5112962439988704, + "grad_norm": 3.2722161197868527, + "learning_rate": 1.036426365286482e-05, + "loss": 0.5306, "step": 10703 }, { - "epoch": 2.2444957014049067, - "grad_norm": 3.905412825519575, - "learning_rate": 3.146248852929561e-06, - "loss": 0.1309, + "epoch": 1.5114374470488563, + "grad_norm": 3.7593243171977115, + "learning_rate": 1.0362740207400006e-05, + "loss": 0.6014, "step": 10704 }, { - "epoch": 2.2447053889704343, - "grad_norm": 4.681106883892964, - "learning_rate": 3.1446004900240534e-06, - "loss": 0.112, + "epoch": 1.5115786500988422, + "grad_norm": 2.8273461660395585, + "learning_rate": 1.0361216753505267e-05, + "loss": 0.4599, "step": 10705 }, { - "epoch": 2.2449150765359613, - "grad_norm": 2.833973808978711, - "learning_rate": 3.1429524784696565e-06, - "loss": 0.1246, + "epoch": 1.511719853148828, + "grad_norm": 3.1387034016425415, + "learning_rate": 1.0359693291216007e-05, + "loss": 0.5433, "step": 10706 }, { - "epoch": 2.245124764101489, - "grad_norm": 3.9934521939892926, - "learning_rate": 3.141304818350841e-06, - "loss": 0.1485, + "epoch": 1.511861056198814, + "grad_norm": 2.914923835904362, + "learning_rate": 1.035816982056763e-05, + "loss": 0.4455, "step": 10707 }, { - "epoch": 2.245334451667016, - "grad_norm": 4.741158715735433, - "learning_rate": 3.1396575097520498e-06, - "loss": 0.1588, + "epoch": 1.5120022592487998, + "grad_norm": 3.3109363589013046, + "learning_rate": 1.0356646341595539e-05, + "loss": 0.5261, "step": 10708 }, { - "epoch": 2.2455441392325435, - "grad_norm": 3.5752646485221615, - "learning_rate": 3.1380105527577074e-06, - "loss": 0.1109, + "epoch": 1.5121434622987855, + "grad_norm": 3.0423422362868537, + "learning_rate": 1.0355122854335144e-05, + "loss": 0.5373, "step": 10709 }, { - "epoch": 2.245753826798071, - "grad_norm": 5.028553363506122, - "learning_rate": 3.136363947452228e-06, - "loss": 0.1605, + "epoch": 1.5122846653487714, + "grad_norm": 3.6766215321641496, + "learning_rate": 1.0353599358821845e-05, + "loss": 0.6584, "step": 10710 }, { - "epoch": 2.245963514363598, - "grad_norm": 4.104847486031501, - "learning_rate": 3.1347176939199984e-06, - "loss": 0.1464, + "epoch": 1.5124258683987573, + "grad_norm": 3.216887677273866, + "learning_rate": 1.0352075855091048e-05, + "loss": 0.5338, "step": 10711 }, { - "epoch": 2.2461732019291256, - "grad_norm": 3.374350047140469, - "learning_rate": 3.1330717922453967e-06, - "loss": 0.1121, + "epoch": 1.5125670714487431, + "grad_norm": 4.278486606249884, + "learning_rate": 1.0350552343178164e-05, + "loss": 0.5288, "step": 10712 }, { - "epoch": 2.246382889494653, - "grad_norm": 3.392144764759944, - "learning_rate": 3.131426242512775e-06, - "loss": 0.1469, + "epoch": 1.512708274498729, + "grad_norm": 4.0464516178499865, + "learning_rate": 1.0349028823118593e-05, + "loss": 0.646, "step": 10713 }, { - "epoch": 2.2465925770601802, - "grad_norm": 3.2207502618443646, - "learning_rate": 3.1297810448064693e-06, - "loss": 0.113, + "epoch": 1.512849477548715, + "grad_norm": 3.783110691465586, + "learning_rate": 1.0347505294947744e-05, + "loss": 0.6808, "step": 10714 }, { - "epoch": 2.2468022646257078, - "grad_norm": 5.405880838001209, - "learning_rate": 3.1281361992108005e-06, - "loss": 0.2209, + "epoch": 1.5129906805987008, + "grad_norm": 3.3582895211502892, + "learning_rate": 1.0345981758701023e-05, + "loss": 0.4965, "step": 10715 }, { - "epoch": 2.247011952191235, - "grad_norm": 4.680217965403063, - "learning_rate": 3.126491705810073e-06, - "loss": 0.168, + "epoch": 1.5131318836486867, + "grad_norm": 3.470690174407434, + "learning_rate": 1.0344458214413833e-05, + "loss": 0.552, "step": 10716 }, { - "epoch": 2.2472216397567624, - "grad_norm": 4.6834422657508, - "learning_rate": 3.1248475646885678e-06, - "loss": 0.1571, + "epoch": 1.5132730866986726, + "grad_norm": 4.1388034077192355, + "learning_rate": 1.0342934662121584e-05, + "loss": 0.7951, "step": 10717 }, { - "epoch": 2.24743132732229, - "grad_norm": 5.046100292557407, - "learning_rate": 3.1232037759305456e-06, - "loss": 0.2107, + "epoch": 1.5134142897486584, + "grad_norm": 3.5167443944072168, + "learning_rate": 1.034141110185968e-05, + "loss": 0.5158, "step": 10718 }, { - "epoch": 2.247641014887817, - "grad_norm": 4.864552236797289, - "learning_rate": 3.1215603396202608e-06, - "loss": 0.1462, + "epoch": 1.5135554927986443, + "grad_norm": 3.386596360497544, + "learning_rate": 1.033988753366353e-05, + "loss": 0.5771, "step": 10719 }, { - "epoch": 2.2478507024533445, - "grad_norm": 7.729762780229888, - "learning_rate": 3.1199172558419342e-06, - "loss": 0.1795, + "epoch": 1.5136966958486302, + "grad_norm": 4.08674948354086, + "learning_rate": 1.0338363957568544e-05, + "loss": 0.6265, "step": 10720 }, { - "epoch": 2.248060390018872, - "grad_norm": 5.103076269261817, - "learning_rate": 3.1182745246797852e-06, - "loss": 0.1758, + "epoch": 1.513837898898616, + "grad_norm": 4.21603526027606, + "learning_rate": 1.033684037361012e-05, + "loss": 0.5889, "step": 10721 }, { - "epoch": 2.248270077584399, - "grad_norm": 3.3815289499040335, - "learning_rate": 3.116632146218003e-06, - "loss": 0.1219, + "epoch": 1.513979101948602, + "grad_norm": 2.7706948592911713, + "learning_rate": 1.0335316781823675e-05, + "loss": 0.4133, "step": 10722 }, { - "epoch": 2.2484797651499266, - "grad_norm": 4.730575898742255, - "learning_rate": 3.1149901205407572e-06, - "loss": 0.159, + "epoch": 1.5141203049985879, + "grad_norm": 4.343415082959932, + "learning_rate": 1.0333793182244612e-05, + "loss": 0.683, "step": 10723 }, { - "epoch": 2.248689452715454, - "grad_norm": 3.56039129046489, - "learning_rate": 3.113348447732211e-06, - "loss": 0.1468, + "epoch": 1.5142615080485737, + "grad_norm": 3.720052817946793, + "learning_rate": 1.033226957490834e-05, + "loss": 0.6025, "step": 10724 }, { - "epoch": 2.2488991402809813, - "grad_norm": 3.4961659137641083, - "learning_rate": 3.1117071278765066e-06, - "loss": 0.1414, + "epoch": 1.5144027110985596, + "grad_norm": 3.146998267936162, + "learning_rate": 1.0330745959850266e-05, + "loss": 0.5246, "step": 10725 }, { - "epoch": 2.249108827846509, - "grad_norm": 5.067809440016506, - "learning_rate": 3.110066161057752e-06, - "loss": 0.1593, + "epoch": 1.5145439141485455, + "grad_norm": 4.085388540576302, + "learning_rate": 1.03292223371058e-05, + "loss": 0.7456, "step": 10726 }, { - "epoch": 2.249318515412036, - "grad_norm": 4.653335047595459, - "learning_rate": 3.108425547360058e-06, - "loss": 0.1496, + "epoch": 1.5146851171985314, + "grad_norm": 3.5864847519340537, + "learning_rate": 1.0327698706710346e-05, + "loss": 0.5825, "step": 10727 }, { - "epoch": 2.2495282029775634, - "grad_norm": 5.7321469455472, - "learning_rate": 3.10678528686751e-06, - "loss": 0.1499, + "epoch": 1.5148263202485173, + "grad_norm": 3.1974208137908753, + "learning_rate": 1.0326175068699316e-05, + "loss": 0.6628, "step": 10728 }, { - "epoch": 2.249737890543091, - "grad_norm": 4.89149136960824, - "learning_rate": 3.1051453796641718e-06, - "loss": 0.1487, + "epoch": 1.5149675232985031, + "grad_norm": 4.202713992135985, + "learning_rate": 1.0324651423108123e-05, + "loss": 0.7062, "step": 10729 }, { - "epoch": 2.249947578108618, - "grad_norm": 5.173151876393134, - "learning_rate": 3.1035058258340876e-06, - "loss": 0.1947, + "epoch": 1.515108726348489, + "grad_norm": 3.0383304276233245, + "learning_rate": 1.0323127769972165e-05, + "loss": 0.537, "step": 10730 }, { - "epoch": 2.2501572656741455, - "grad_norm": 4.387382183707479, - "learning_rate": 3.1018666254612927e-06, - "loss": 0.1841, + "epoch": 1.515249929398475, + "grad_norm": 3.081759142630451, + "learning_rate": 1.032160410932686e-05, + "loss": 0.5517, "step": 10731 }, { - "epoch": 2.250366953239673, - "grad_norm": 4.4664996662933385, - "learning_rate": 3.1002277786297996e-06, - "loss": 0.1711, + "epoch": 1.5153911324484608, + "grad_norm": 4.180940621073941, + "learning_rate": 1.0320080441207616e-05, + "loss": 0.661, "step": 10732 }, { - "epoch": 2.2505766408052, - "grad_norm": 4.198477045803032, - "learning_rate": 3.0985892854235998e-06, - "loss": 0.145, + "epoch": 1.5155323354984467, + "grad_norm": 4.248146844752272, + "learning_rate": 1.0318556765649838e-05, + "loss": 0.639, "step": 10733 }, { - "epoch": 2.2507863283707277, - "grad_norm": 5.16231479788112, - "learning_rate": 3.0969511459266666e-06, - "loss": 0.1623, + "epoch": 1.5156735385484326, + "grad_norm": 3.4639169158200938, + "learning_rate": 1.031703308268894e-05, + "loss": 0.5262, "step": 10734 }, { - "epoch": 2.2509960159362548, - "grad_norm": 4.920457625285519, - "learning_rate": 3.0953133602229624e-06, - "loss": 0.1661, + "epoch": 1.5158147415984184, + "grad_norm": 3.4236455513861674, + "learning_rate": 1.031550939236033e-05, + "loss": 0.6572, "step": 10735 }, { - "epoch": 2.2512057035017823, - "grad_norm": 4.352948071565112, - "learning_rate": 3.0936759283964204e-06, - "loss": 0.118, + "epoch": 1.5159559446484043, + "grad_norm": 3.549236424332188, + "learning_rate": 1.0313985694699415e-05, + "loss": 0.6291, "step": 10736 }, { - "epoch": 2.25141539106731, - "grad_norm": 3.661842826933915, - "learning_rate": 3.0920388505309697e-06, - "loss": 0.143, + "epoch": 1.5160971476983902, + "grad_norm": 3.5870318232796, + "learning_rate": 1.0312461989741614e-05, + "loss": 0.5646, "step": 10737 }, { - "epoch": 2.251625078632837, - "grad_norm": 3.448088734755034, - "learning_rate": 3.090402126710509e-06, - "loss": 0.122, + "epoch": 1.516238350748376, + "grad_norm": 4.202163561871317, + "learning_rate": 1.0310938277522326e-05, + "loss": 0.8006, "step": 10738 }, { - "epoch": 2.2518347661983644, - "grad_norm": 5.049641111539313, - "learning_rate": 3.08876575701892e-06, - "loss": 0.1613, + "epoch": 1.516379553798362, + "grad_norm": 3.4356758639756726, + "learning_rate": 1.030941455807697e-05, + "loss": 0.5235, "step": 10739 }, { - "epoch": 2.252044453763892, - "grad_norm": 5.158595952181279, - "learning_rate": 3.0871297415400727e-06, - "loss": 0.183, + "epoch": 1.5165207568483479, + "grad_norm": 4.061376312047332, + "learning_rate": 1.030789083144095e-05, + "loss": 0.5215, "step": 10740 }, { - "epoch": 2.252254141329419, - "grad_norm": 4.959679163330671, - "learning_rate": 3.0854940803578194e-06, - "loss": 0.1631, + "epoch": 1.5166619598983337, + "grad_norm": 3.8486771212211486, + "learning_rate": 1.0306367097649683e-05, + "loss": 0.5685, "step": 10741 }, { - "epoch": 2.2524638288949466, - "grad_norm": 3.9956075802820417, - "learning_rate": 3.0838587735559866e-06, - "loss": 0.1463, + "epoch": 1.5168031629483196, + "grad_norm": 3.2496066980241127, + "learning_rate": 1.0304843356738576e-05, + "loss": 0.5051, "step": 10742 }, { - "epoch": 2.252673516460474, - "grad_norm": 4.504463682186625, - "learning_rate": 3.082223821218384e-06, - "loss": 0.1783, + "epoch": 1.5169443659983055, + "grad_norm": 4.02896534899286, + "learning_rate": 1.030331960874304e-05, + "loss": 0.5971, "step": 10743 }, { - "epoch": 2.252883204026001, - "grad_norm": 4.877590025377896, - "learning_rate": 3.0805892234288127e-06, - "loss": 0.1578, + "epoch": 1.5170855690482914, + "grad_norm": 3.473025024389618, + "learning_rate": 1.0301795853698487e-05, + "loss": 0.5011, "step": 10744 }, { - "epoch": 2.2530928915915287, - "grad_norm": 4.2558157596042605, - "learning_rate": 3.0789549802710406e-06, - "loss": 0.1532, + "epoch": 1.5172267720982773, + "grad_norm": 3.75897097477085, + "learning_rate": 1.0300272091640332e-05, + "loss": 0.5927, "step": 10745 }, { - "epoch": 2.2533025791570562, - "grad_norm": 3.861508836868627, - "learning_rate": 3.0773210918288333e-06, - "loss": 0.1202, + "epoch": 1.5173679751482632, + "grad_norm": 3.3111728572930907, + "learning_rate": 1.0298748322603982e-05, + "loss": 0.5156, "step": 10746 }, { - "epoch": 2.2535122667225833, - "grad_norm": 4.816384688056086, - "learning_rate": 3.0756875581859267e-06, - "loss": 0.1544, + "epoch": 1.517509178198249, + "grad_norm": 4.55302032380926, + "learning_rate": 1.0297224546624846e-05, + "loss": 0.7441, "step": 10747 }, { - "epoch": 2.253721954288111, - "grad_norm": 4.612391747471403, - "learning_rate": 3.0740543794260403e-06, - "loss": 0.1665, + "epoch": 1.517650381248235, + "grad_norm": 3.03316958153463, + "learning_rate": 1.0295700763738345e-05, + "loss": 0.4633, "step": 10748 }, { - "epoch": 2.253931641853638, - "grad_norm": 3.787564757170909, - "learning_rate": 3.0724215556328784e-06, - "loss": 0.128, + "epoch": 1.5177915842982208, + "grad_norm": 3.371939652476338, + "learning_rate": 1.0294176973979884e-05, + "loss": 0.5287, "step": 10749 }, { - "epoch": 2.2541413294191655, - "grad_norm": 5.353320700650199, - "learning_rate": 3.0707890868901337e-06, - "loss": 0.1346, + "epoch": 1.5179327873482067, + "grad_norm": 3.3982940062672817, + "learning_rate": 1.0292653177384878e-05, + "loss": 0.5868, "step": 10750 }, { - "epoch": 2.254351016984693, - "grad_norm": 4.311020272899277, - "learning_rate": 3.0691569732814596e-06, - "loss": 0.1513, + "epoch": 1.5180739903981926, + "grad_norm": 3.389805755980886, + "learning_rate": 1.0291129373988737e-05, + "loss": 0.5738, "step": 10751 }, { - "epoch": 2.25456070455022, - "grad_norm": 4.165035291851166, - "learning_rate": 3.067525214890512e-06, - "loss": 0.1436, + "epoch": 1.5182151934481785, + "grad_norm": 3.70266849757621, + "learning_rate": 1.0289605563826876e-05, + "loss": 0.5883, "step": 10752 }, { - "epoch": 2.2547703921157476, - "grad_norm": 5.877515509216618, - "learning_rate": 3.065893811800924e-06, - "loss": 0.1681, + "epoch": 1.5183563964981643, + "grad_norm": 3.5792515947089765, + "learning_rate": 1.0288081746934705e-05, + "loss": 0.5138, "step": 10753 }, { - "epoch": 2.2549800796812747, - "grad_norm": 3.8060323708536203, - "learning_rate": 3.064262764096304e-06, - "loss": 0.1377, + "epoch": 1.5184975995481502, + "grad_norm": 3.7331261556830593, + "learning_rate": 1.0286557923347642e-05, + "loss": 0.5252, "step": 10754 }, { - "epoch": 2.255189767246802, - "grad_norm": 4.768631616737858, - "learning_rate": 3.0626320718602442e-06, - "loss": 0.1518, + "epoch": 1.518638802598136, + "grad_norm": 4.0708941975279, + "learning_rate": 1.0285034093101093e-05, + "loss": 0.5433, "step": 10755 }, { - "epoch": 2.2553994548123297, - "grad_norm": 4.540114404197204, - "learning_rate": 3.0610017351763265e-06, - "loss": 0.1389, + "epoch": 1.518780005648122, + "grad_norm": 3.66547379127143, + "learning_rate": 1.0283510256230478e-05, + "loss": 0.5746, "step": 10756 }, { - "epoch": 2.255609142377857, - "grad_norm": 5.28016184884163, - "learning_rate": 3.059371754128102e-06, - "loss": 0.1758, + "epoch": 1.5189212086981079, + "grad_norm": 3.0468055115602373, + "learning_rate": 1.0281986412771206e-05, + "loss": 0.5533, "step": 10757 }, { - "epoch": 2.2558188299433843, - "grad_norm": 4.282707146935706, - "learning_rate": 3.057742128799115e-06, - "loss": 0.1199, + "epoch": 1.5190624117480938, + "grad_norm": 4.123068740239987, + "learning_rate": 1.028046256275869e-05, + "loss": 0.7104, "step": 10758 }, { - "epoch": 2.256028517508912, - "grad_norm": 3.502794799874295, - "learning_rate": 3.056112859272886e-06, - "loss": 0.1322, + "epoch": 1.5192036147980796, + "grad_norm": 4.064910694859377, + "learning_rate": 1.0278938706228348e-05, + "loss": 0.6793, "step": 10759 }, { - "epoch": 2.256238205074439, - "grad_norm": 3.8887861468310607, - "learning_rate": 3.054483945632912e-06, - "loss": 0.1267, + "epoch": 1.5193448178480655, + "grad_norm": 2.654264781471614, + "learning_rate": 1.027741484321559e-05, + "loss": 0.398, "step": 10760 }, { - "epoch": 2.2564478926399665, - "grad_norm": 4.286909632059399, - "learning_rate": 3.052855387962683e-06, - "loss": 0.1456, + "epoch": 1.5194860208980514, + "grad_norm": 3.565654790322971, + "learning_rate": 1.0275890973755827e-05, + "loss": 0.5405, "step": 10761 }, { - "epoch": 2.256657580205494, - "grad_norm": 4.405800026844709, - "learning_rate": 3.0512271863456676e-06, - "loss": 0.1522, + "epoch": 1.5196272239480373, + "grad_norm": 3.1586589193095245, + "learning_rate": 1.0274367097884483e-05, + "loss": 0.5212, "step": 10762 }, { - "epoch": 2.256867267771021, - "grad_norm": 4.02597406666974, - "learning_rate": 3.049599340865311e-06, - "loss": 0.1418, + "epoch": 1.5197684269980232, + "grad_norm": 3.4330366667742185, + "learning_rate": 1.0272843215636964e-05, + "loss": 0.4573, "step": 10763 }, { - "epoch": 2.2570769553365486, - "grad_norm": 4.082163738537335, - "learning_rate": 3.0479718516050406e-06, - "loss": 0.1658, + "epoch": 1.519909630048009, + "grad_norm": 3.286642599153669, + "learning_rate": 1.0271319327048684e-05, + "loss": 0.4986, "step": 10764 }, { - "epoch": 2.257286642902076, - "grad_norm": 4.852521962642193, - "learning_rate": 3.046344718648272e-06, - "loss": 0.1808, + "epoch": 1.520050833097995, + "grad_norm": 3.3746420725995456, + "learning_rate": 1.026979543215506e-05, + "loss": 0.5442, "step": 10765 }, { - "epoch": 2.2574963304676032, - "grad_norm": 3.609655604986204, - "learning_rate": 3.0447179420783945e-06, - "loss": 0.1043, + "epoch": 1.5201920361479808, + "grad_norm": 2.8701017966810083, + "learning_rate": 1.0268271530991509e-05, + "loss": 0.5146, "step": 10766 }, { - "epoch": 2.2577060180331308, - "grad_norm": 3.376504162623587, - "learning_rate": 3.043091521978789e-06, - "loss": 0.1037, + "epoch": 1.5203332391979667, + "grad_norm": 3.054861134058723, + "learning_rate": 1.0266747623593445e-05, + "loss": 0.5298, "step": 10767 }, { - "epoch": 2.257915705598658, - "grad_norm": 4.18927843690016, - "learning_rate": 3.0414654584328096e-06, - "loss": 0.1472, + "epoch": 1.5204744422479526, + "grad_norm": 3.7143729809583363, + "learning_rate": 1.026522370999628e-05, + "loss": 0.6361, "step": 10768 }, { - "epoch": 2.2581253931641854, - "grad_norm": 4.142521652010309, - "learning_rate": 3.0398397515237908e-06, - "loss": 0.1471, + "epoch": 1.5206156452979385, + "grad_norm": 3.050900645590824, + "learning_rate": 1.0263699790235428e-05, + "loss": 0.4242, "step": 10769 }, { - "epoch": 2.258335080729713, - "grad_norm": 3.760157250159119, - "learning_rate": 3.03821440133506e-06, - "loss": 0.0864, + "epoch": 1.5207568483479244, + "grad_norm": 3.941076467403238, + "learning_rate": 1.0262175864346307e-05, + "loss": 0.6051, "step": 10770 }, { - "epoch": 2.25854476829524, - "grad_norm": 3.5359133254850246, - "learning_rate": 3.0365894079499158e-06, - "loss": 0.0938, + "epoch": 1.5208980513979102, + "grad_norm": 4.799702962143171, + "learning_rate": 1.0260651932364336e-05, + "loss": 0.7998, "step": 10771 }, { - "epoch": 2.2587544558607675, - "grad_norm": 4.259580070305667, - "learning_rate": 3.0349647714516396e-06, - "loss": 0.1704, + "epoch": 1.5210392544478961, + "grad_norm": 4.7578413410473175, + "learning_rate": 1.0259127994324923e-05, + "loss": 0.9242, "step": 10772 }, { - "epoch": 2.2589641434262946, - "grad_norm": 3.770184737223212, - "learning_rate": 3.033340491923498e-06, - "loss": 0.1345, + "epoch": 1.521180457497882, + "grad_norm": 3.3859820689312192, + "learning_rate": 1.025760405026349e-05, + "loss": 0.5839, "step": 10773 }, { - "epoch": 2.259173830991822, - "grad_norm": 3.8208175270030473, - "learning_rate": 3.0317165694487415e-06, - "loss": 0.1216, + "epoch": 1.5213216605478679, + "grad_norm": 3.092771110797661, + "learning_rate": 1.0256080100215448e-05, + "loss": 0.5667, "step": 10774 }, { - "epoch": 2.2593835185573496, - "grad_norm": 3.561508016419555, - "learning_rate": 3.030093004110598e-06, - "loss": 0.124, + "epoch": 1.5214628635978538, + "grad_norm": 3.781753774865506, + "learning_rate": 1.0254556144216217e-05, + "loss": 0.5657, "step": 10775 }, { - "epoch": 2.2595932061228767, - "grad_norm": 3.9073214899595934, - "learning_rate": 3.028469795992274e-06, - "loss": 0.1443, + "epoch": 1.5216040666478396, + "grad_norm": 3.5143576569437984, + "learning_rate": 1.025303218230121e-05, + "loss": 0.626, "step": 10776 }, { - "epoch": 2.2598028936884043, - "grad_norm": 3.5160419215571017, - "learning_rate": 3.0268469451769654e-06, - "loss": 0.123, + "epoch": 1.5217452696978255, + "grad_norm": 3.7430402918285535, + "learning_rate": 1.0251508214505846e-05, + "loss": 0.5763, "step": 10777 }, { - "epoch": 2.260012581253932, - "grad_norm": 5.5583007611969855, - "learning_rate": 3.0252244517478447e-06, - "loss": 0.1534, + "epoch": 1.5218864727478114, + "grad_norm": 3.860088208280659, + "learning_rate": 1.0249984240865534e-05, + "loss": 0.5832, "step": 10778 }, { - "epoch": 2.260222268819459, - "grad_norm": 3.2243622002638257, - "learning_rate": 3.023602315788069e-06, - "loss": 0.1185, + "epoch": 1.5220276757977973, + "grad_norm": 3.722200154794035, + "learning_rate": 1.0248460261415702e-05, + "loss": 0.5673, "step": 10779 }, { - "epoch": 2.2604319563849864, - "grad_norm": 3.5558776321170824, - "learning_rate": 3.0219805373807752e-06, - "loss": 0.1256, + "epoch": 1.5221688788477832, + "grad_norm": 3.025008856647848, + "learning_rate": 1.024693627619176e-05, + "loss": 0.5136, "step": 10780 }, { - "epoch": 2.260641643950514, - "grad_norm": 5.852776329887172, - "learning_rate": 3.0203591166090796e-06, - "loss": 0.1665, + "epoch": 1.522310081897769, + "grad_norm": 3.5513118389981124, + "learning_rate": 1.0245412285229124e-05, + "loss": 0.5398, "step": 10781 }, { - "epoch": 2.260851331516041, - "grad_norm": 3.9080746374356083, - "learning_rate": 3.0187380535560838e-06, - "loss": 0.1147, + "epoch": 1.522451284947755, + "grad_norm": 3.5335742702715565, + "learning_rate": 1.0243888288563213e-05, + "loss": 0.6397, "step": 10782 }, { - "epoch": 2.2610610190815685, - "grad_norm": 3.7383703280491725, - "learning_rate": 3.0171173483048744e-06, - "loss": 0.1369, + "epoch": 1.5225924879977408, + "grad_norm": 3.7466056213979533, + "learning_rate": 1.0242364286229445e-05, + "loss": 0.6529, "step": 10783 }, { - "epoch": 2.261270706647096, - "grad_norm": 4.2241462794518645, - "learning_rate": 3.015497000938512e-06, - "loss": 0.1618, + "epoch": 1.5227336910477267, + "grad_norm": 3.529590249552236, + "learning_rate": 1.0240840278263233e-05, + "loss": 0.6124, "step": 10784 }, { - "epoch": 2.261480394212623, - "grad_norm": 4.386034082502578, - "learning_rate": 3.0138770115400396e-06, - "loss": 0.1556, + "epoch": 1.5228748940977126, + "grad_norm": 4.947686941480129, + "learning_rate": 1.0239316264699999e-05, + "loss": 0.6988, "step": 10785 }, { - "epoch": 2.2616900817781507, - "grad_norm": 4.389248930771413, - "learning_rate": 3.0122573801924903e-06, - "loss": 0.1648, + "epoch": 1.5230160971476985, + "grad_norm": 3.410002209791899, + "learning_rate": 1.0237792245575158e-05, + "loss": 0.5477, "step": 10786 }, { - "epoch": 2.2618997693436778, - "grad_norm": 4.762619257881491, - "learning_rate": 3.0106381069788694e-06, - "loss": 0.1467, + "epoch": 1.5231573001976844, + "grad_norm": 3.60029846246348, + "learning_rate": 1.0236268220924126e-05, + "loss": 0.6168, "step": 10787 }, { - "epoch": 2.2621094569092053, - "grad_norm": 6.351787661682319, - "learning_rate": 3.009019191982164e-06, - "loss": 0.1403, + "epoch": 1.5232985032476702, + "grad_norm": 3.1893475343815605, + "learning_rate": 1.0234744190782326e-05, + "loss": 0.5347, "step": 10788 }, { - "epoch": 2.262319144474733, - "grad_norm": 5.135216116777504, - "learning_rate": 3.0074006352853557e-06, - "loss": 0.1674, + "epoch": 1.5234397062976561, + "grad_norm": 3.338189965567737, + "learning_rate": 1.023322015518517e-05, + "loss": 0.6376, "step": 10789 }, { - "epoch": 2.26252883204026, - "grad_norm": 4.132108517975747, - "learning_rate": 3.0057824369713883e-06, - "loss": 0.1427, + "epoch": 1.523580909347642, + "grad_norm": 3.5114590987758914, + "learning_rate": 1.0231696114168077e-05, + "loss": 0.6213, "step": 10790 }, { - "epoch": 2.2627385196057874, - "grad_norm": 3.653071605095956, - "learning_rate": 3.004164597123205e-06, - "loss": 0.1273, + "epoch": 1.523722112397628, + "grad_norm": 3.2079116904648983, + "learning_rate": 1.0230172067766469e-05, + "loss": 0.5426, "step": 10791 }, { - "epoch": 2.2629482071713145, - "grad_norm": 4.958798621005177, - "learning_rate": 3.0025471158237162e-06, - "loss": 0.1599, + "epoch": 1.5238633154476138, + "grad_norm": 2.614659926126286, + "learning_rate": 1.022864801601576e-05, + "loss": 0.4739, "step": 10792 }, { - "epoch": 2.263157894736842, - "grad_norm": 3.711438651627813, - "learning_rate": 3.000929993155829e-06, - "loss": 0.1395, + "epoch": 1.5240045184975997, + "grad_norm": 3.0033341586244533, + "learning_rate": 1.0227123958951372e-05, + "loss": 0.5594, "step": 10793 }, { - "epoch": 2.2633675823023696, - "grad_norm": 4.956851699239931, - "learning_rate": 2.999313229202414e-06, - "loss": 0.1815, + "epoch": 1.5241457215475855, + "grad_norm": 3.735375929696478, + "learning_rate": 1.022559989660872e-05, + "loss": 0.5841, "step": 10794 }, { - "epoch": 2.2635772698678966, - "grad_norm": 3.4254172346776164, - "learning_rate": 2.997696824046342e-06, - "loss": 0.1106, + "epoch": 1.5242869245975714, + "grad_norm": 3.670593706331668, + "learning_rate": 1.0224075829023225e-05, + "loss": 0.6564, "step": 10795 }, { - "epoch": 2.263786957433424, - "grad_norm": 4.063648494813445, - "learning_rate": 2.9960807777704527e-06, - "loss": 0.1296, + "epoch": 1.5244281276475573, + "grad_norm": 4.339592549836884, + "learning_rate": 1.0222551756230304e-05, + "loss": 0.6286, "step": 10796 }, { - "epoch": 2.2639966449989517, - "grad_norm": 3.6339727701038536, - "learning_rate": 2.9944650904575677e-06, - "loss": 0.1331, + "epoch": 1.5245693306975432, + "grad_norm": 3.7122236862730857, + "learning_rate": 1.0221027678265374e-05, + "loss": 0.5415, "step": 10797 }, { - "epoch": 2.264206332564479, - "grad_norm": 4.242863508473164, - "learning_rate": 2.992849762190497e-06, - "loss": 0.1249, + "epoch": 1.524710533747529, + "grad_norm": 3.006824213688265, + "learning_rate": 1.0219503595163857e-05, + "loss": 0.5853, "step": 10798 }, { - "epoch": 2.2644160201300063, - "grad_norm": 4.119778938292382, - "learning_rate": 2.9912347930520334e-06, - "loss": 0.1026, + "epoch": 1.524851736797515, + "grad_norm": 2.7327168360891796, + "learning_rate": 1.0217979506961171e-05, + "loss": 0.437, "step": 10799 }, { - "epoch": 2.264625707695534, - "grad_norm": 4.784281318046052, - "learning_rate": 2.9896201831249426e-06, - "loss": 0.177, + "epoch": 1.5249929398475008, + "grad_norm": 3.410055426051843, + "learning_rate": 1.0216455413692738e-05, + "loss": 0.5988, "step": 10800 }, { - "epoch": 2.264835395261061, - "grad_norm": 3.519635691003411, - "learning_rate": 2.988005932491973e-06, - "loss": 0.1316, + "epoch": 1.5251341428974867, + "grad_norm": 3.7942416400220322, + "learning_rate": 1.0214931315393972e-05, + "loss": 0.6408, "step": 10801 }, { - "epoch": 2.2650450828265885, - "grad_norm": 3.903676600011058, - "learning_rate": 2.9863920412358637e-06, - "loss": 0.1623, + "epoch": 1.5252753459474726, + "grad_norm": 4.179932471649775, + "learning_rate": 1.0213407212100296e-05, + "loss": 0.6958, "step": 10802 }, { - "epoch": 2.265254770392116, - "grad_norm": 3.934792121922775, - "learning_rate": 2.9847785094393243e-06, - "loss": 0.1668, + "epoch": 1.5254165489974585, + "grad_norm": 3.6348708571818844, + "learning_rate": 1.0211883103847132e-05, + "loss": 0.6412, "step": 10803 }, { - "epoch": 2.265464457957643, - "grad_norm": 3.5384464209255544, - "learning_rate": 2.983165337185057e-06, - "loss": 0.116, + "epoch": 1.5255577520474444, + "grad_norm": 4.302144685172392, + "learning_rate": 1.0210358990669889e-05, + "loss": 0.5559, "step": 10804 }, { - "epoch": 2.2656741455231706, - "grad_norm": 3.220997227853989, - "learning_rate": 2.9815525245557355e-06, - "loss": 0.1003, + "epoch": 1.5256989550974303, + "grad_norm": 2.9180947112451485, + "learning_rate": 1.0208834872604e-05, + "loss": 0.5373, "step": 10805 }, { - "epoch": 2.2658838330886977, - "grad_norm": 4.010711161500659, - "learning_rate": 2.9799400716340167e-06, - "loss": 0.1486, + "epoch": 1.5258401581474161, + "grad_norm": 4.376543422439997, + "learning_rate": 1.0207310749684877e-05, + "loss": 0.6667, "step": 10806 }, { - "epoch": 2.266093520654225, - "grad_norm": 3.724318805681504, - "learning_rate": 2.9783279785025455e-06, - "loss": 0.1271, + "epoch": 1.525981361197402, + "grad_norm": 3.279232572524097, + "learning_rate": 1.020578662194794e-05, + "loss": 0.5411, "step": 10807 }, { - "epoch": 2.2663032082197527, - "grad_norm": 5.9147519830319855, - "learning_rate": 2.976716245243949e-06, - "loss": 0.1656, + "epoch": 1.526122564247388, + "grad_norm": 4.640963051513512, + "learning_rate": 1.0204262489428611e-05, + "loss": 0.7202, "step": 10808 }, { - "epoch": 2.26651289578528, - "grad_norm": 3.625110983113078, - "learning_rate": 2.9751048719408216e-06, - "loss": 0.1165, + "epoch": 1.5262637672973738, + "grad_norm": 3.7365046219987947, + "learning_rate": 1.0202738352162312e-05, + "loss": 0.6673, "step": 10809 }, { - "epoch": 2.2667225833508073, - "grad_norm": 4.343066883595352, - "learning_rate": 2.9734938586757522e-06, - "loss": 0.1723, + "epoch": 1.5264049703473597, + "grad_norm": 3.529370069058791, + "learning_rate": 1.020121421018446e-05, + "loss": 0.5743, "step": 10810 }, { - "epoch": 2.2669322709163344, - "grad_norm": 4.519290255556346, - "learning_rate": 2.971883205531313e-06, - "loss": 0.1806, + "epoch": 1.5265461733973453, + "grad_norm": 3.4899936980636697, + "learning_rate": 1.0199690063530476e-05, + "loss": 0.6175, "step": 10811 }, { - "epoch": 2.267141958481862, - "grad_norm": 3.992865376113029, - "learning_rate": 2.9702729125900497e-06, - "loss": 0.1434, + "epoch": 1.5266873764473312, + "grad_norm": 3.0779141600897613, + "learning_rate": 1.0198165912235784e-05, + "loss": 0.4678, "step": 10812 }, { - "epoch": 2.2673516460473895, - "grad_norm": 5.190167415771511, - "learning_rate": 2.968662979934488e-06, - "loss": 0.1837, + "epoch": 1.526828579497317, + "grad_norm": 3.4354560135490133, + "learning_rate": 1.0196641756335799e-05, + "loss": 0.5682, "step": 10813 }, { - "epoch": 2.2675613336129166, - "grad_norm": 3.680736822779035, - "learning_rate": 2.967053407647148e-06, - "loss": 0.1349, + "epoch": 1.526969782547303, + "grad_norm": 4.260733881178502, + "learning_rate": 1.0195117595865947e-05, + "loss": 0.707, "step": 10814 }, { - "epoch": 2.267771021178444, - "grad_norm": 4.575894058145139, - "learning_rate": 2.965444195810515e-06, - "loss": 0.1569, + "epoch": 1.5271109855972889, + "grad_norm": 2.877082196128858, + "learning_rate": 1.0193593430861641e-05, + "loss": 0.4357, "step": 10815 }, { - "epoch": 2.2679807087439716, - "grad_norm": 3.6588259245477124, - "learning_rate": 2.963835344507071e-06, - "loss": 0.1338, + "epoch": 1.5272521886472747, + "grad_norm": 3.389076909288225, + "learning_rate": 1.0192069261358313e-05, + "loss": 0.5568, "step": 10816 }, { - "epoch": 2.2681903963094987, - "grad_norm": 4.454585144055542, - "learning_rate": 2.96222685381927e-06, - "loss": 0.1416, + "epoch": 1.5273933916972606, + "grad_norm": 3.4653763484809947, + "learning_rate": 1.0190545087391374e-05, + "loss": 0.6226, "step": 10817 }, { - "epoch": 2.2684000838750262, - "grad_norm": 4.9222067865871235, - "learning_rate": 2.9606187238295446e-06, - "loss": 0.1704, + "epoch": 1.5275345947472465, + "grad_norm": 4.420364930901948, + "learning_rate": 1.0189020908996255e-05, + "loss": 0.6211, "step": 10818 }, { - "epoch": 2.2686097714405538, - "grad_norm": 5.878330757799028, - "learning_rate": 2.9590109546203196e-06, - "loss": 0.1969, + "epoch": 1.5276757977972324, + "grad_norm": 3.60835856013331, + "learning_rate": 1.0187496726208367e-05, + "loss": 0.5551, "step": 10819 }, { - "epoch": 2.268819459006081, - "grad_norm": 4.374341316323395, - "learning_rate": 2.957403546273998e-06, - "loss": 0.1374, + "epoch": 1.5278170008472183, + "grad_norm": 3.8806064405786644, + "learning_rate": 1.0185972539063139e-05, + "loss": 0.6031, "step": 10820 }, { - "epoch": 2.2690291465716084, - "grad_norm": 3.616805945137954, - "learning_rate": 2.9557964988729606e-06, - "loss": 0.1512, + "epoch": 1.5279582038972042, + "grad_norm": 3.772379972771627, + "learning_rate": 1.0184448347595986e-05, + "loss": 0.5701, "step": 10821 }, { - "epoch": 2.269238834137136, - "grad_norm": 4.158588774981077, - "learning_rate": 2.954189812499565e-06, - "loss": 0.132, + "epoch": 1.52809940694719, + "grad_norm": 3.3146146513027634, + "learning_rate": 1.0182924151842337e-05, + "loss": 0.4833, "step": 10822 }, { - "epoch": 2.269448521702663, - "grad_norm": 3.88399857024793, - "learning_rate": 2.952583487236167e-06, - "loss": 0.1399, + "epoch": 1.528240609997176, + "grad_norm": 3.1263709259459835, + "learning_rate": 1.0181399951837607e-05, + "loss": 0.6202, "step": 10823 }, { - "epoch": 2.2696582092681905, - "grad_norm": 4.035728487294192, - "learning_rate": 2.950977523165084e-06, - "loss": 0.1456, + "epoch": 1.5283818130471618, + "grad_norm": 2.985224087053555, + "learning_rate": 1.0179875747617221e-05, + "loss": 0.5199, "step": 10824 }, { - "epoch": 2.2698678968337176, - "grad_norm": 4.303443582495658, - "learning_rate": 2.949371920368632e-06, - "loss": 0.1035, + "epoch": 1.5285230160971477, + "grad_norm": 3.56091592489957, + "learning_rate": 1.01783515392166e-05, + "loss": 0.5115, "step": 10825 }, { - "epoch": 2.270077584399245, - "grad_norm": 5.761793592742784, - "learning_rate": 2.947766678929098e-06, - "loss": 0.2102, + "epoch": 1.5286642191471336, + "grad_norm": 3.628655433667966, + "learning_rate": 1.0176827326671168e-05, + "loss": 0.6372, "step": 10826 }, { - "epoch": 2.2702872719647726, - "grad_norm": 3.7182545241688922, - "learning_rate": 2.9461617989287504e-06, - "loss": 0.13, + "epoch": 1.5288054221971195, + "grad_norm": 3.6780830269367666, + "learning_rate": 1.0175303110016343e-05, + "loss": 0.6047, "step": 10827 }, { - "epoch": 2.2704969595302997, - "grad_norm": 4.19171275587322, - "learning_rate": 2.9445572804498446e-06, - "loss": 0.1367, + "epoch": 1.5289466252471053, + "grad_norm": 3.872800740182754, + "learning_rate": 1.017377888928755e-05, + "loss": 0.5073, "step": 10828 }, { - "epoch": 2.2707066470958273, - "grad_norm": 3.848244528166398, - "learning_rate": 2.942953123574621e-06, - "loss": 0.1292, + "epoch": 1.5290878282970912, + "grad_norm": 4.571592073931463, + "learning_rate": 1.0172254664520212e-05, + "loss": 0.7177, "step": 10829 }, { - "epoch": 2.2709163346613543, - "grad_norm": 4.2612739434881615, - "learning_rate": 2.9413493283852847e-06, - "loss": 0.1423, + "epoch": 1.529229031347077, + "grad_norm": 3.123048871079358, + "learning_rate": 1.017073043574975e-05, + "loss": 0.5308, "step": 10830 }, { - "epoch": 2.271126022226882, - "grad_norm": 3.998077519648316, - "learning_rate": 2.939745894964037e-06, - "loss": 0.1322, + "epoch": 1.529370234397063, + "grad_norm": 3.4301184161878258, + "learning_rate": 1.0169206203011585e-05, + "loss": 0.5928, "step": 10831 }, { - "epoch": 2.2713357097924094, - "grad_norm": 3.9673085609332546, - "learning_rate": 2.938142823393061e-06, - "loss": 0.1237, + "epoch": 1.5295114374470489, + "grad_norm": 3.3770221408773113, + "learning_rate": 1.016768196634114e-05, + "loss": 0.5661, "step": 10832 }, { - "epoch": 2.2715453973579365, - "grad_norm": 4.70776790576569, - "learning_rate": 2.9365401137545123e-06, - "loss": 0.1692, + "epoch": 1.5296526404970348, + "grad_norm": 3.564144779157971, + "learning_rate": 1.016615772577384e-05, + "loss": 0.5623, "step": 10833 }, { - "epoch": 2.271755084923464, - "grad_norm": 4.793834441964729, - "learning_rate": 2.9349377661305325e-06, - "loss": 0.1757, + "epoch": 1.5297938435470206, + "grad_norm": 3.9949911416556816, + "learning_rate": 1.0164633481345108e-05, + "loss": 0.5961, "step": 10834 }, { - "epoch": 2.2719647724889915, - "grad_norm": 4.66836977197758, - "learning_rate": 2.933335780603248e-06, - "loss": 0.187, + "epoch": 1.5299350465970065, + "grad_norm": 3.4903135639271707, + "learning_rate": 1.0163109233090362e-05, + "loss": 0.6087, "step": 10835 }, { - "epoch": 2.2721744600545186, - "grad_norm": 4.285000965839606, - "learning_rate": 2.9317341572547574e-06, - "loss": 0.1619, + "epoch": 1.5300762496469924, + "grad_norm": 3.7903213727824485, + "learning_rate": 1.0161584981045029e-05, + "loss": 0.6402, "step": 10836 }, { - "epoch": 2.272384147620046, - "grad_norm": 5.882703803553985, - "learning_rate": 2.9301328961671548e-06, - "loss": 0.1673, + "epoch": 1.5302174526969783, + "grad_norm": 3.7117570409018743, + "learning_rate": 1.0160060725244531e-05, + "loss": 0.6447, "step": 10837 }, { - "epoch": 2.2725938351855737, - "grad_norm": 4.07008667667668, - "learning_rate": 2.9285319974225033e-06, - "loss": 0.1508, + "epoch": 1.5303586557469642, + "grad_norm": 3.8528055911004713, + "learning_rate": 1.0158536465724291e-05, + "loss": 0.5457, "step": 10838 }, { - "epoch": 2.2728035227511008, - "grad_norm": 3.184363690106782, - "learning_rate": 2.926931461102849e-06, - "loss": 0.1246, + "epoch": 1.53049985879695, + "grad_norm": 3.838165064205097, + "learning_rate": 1.0157012202519732e-05, + "loss": 0.6316, "step": 10839 }, { - "epoch": 2.2730132103166283, - "grad_norm": 4.259076507754931, - "learning_rate": 2.925331287290224e-06, - "loss": 0.1315, + "epoch": 1.530641061846936, + "grad_norm": 3.570666249953665, + "learning_rate": 1.0155487935666277e-05, + "loss": 0.519, "step": 10840 }, { - "epoch": 2.273222897882156, - "grad_norm": 4.39751364669792, - "learning_rate": 2.9237314760666436e-06, - "loss": 0.1525, + "epoch": 1.5307822648969218, + "grad_norm": 3.1061534079062816, + "learning_rate": 1.0153963665199346e-05, + "loss": 0.5304, "step": 10841 }, { - "epoch": 2.273432585447683, - "grad_norm": 4.845012199337564, - "learning_rate": 2.9221320275140996e-06, - "loss": 0.1809, + "epoch": 1.5309234679469077, + "grad_norm": 4.150516022070862, + "learning_rate": 1.0152439391154372e-05, + "loss": 0.7266, "step": 10842 }, { - "epoch": 2.2736422730132104, - "grad_norm": 3.873528490021121, - "learning_rate": 2.920532941714561e-06, - "loss": 0.133, + "epoch": 1.5310646709968936, + "grad_norm": 4.389242780359409, + "learning_rate": 1.015091511356677e-05, + "loss": 0.5468, "step": 10843 }, { - "epoch": 2.2738519605787375, - "grad_norm": 4.602904997656741, - "learning_rate": 2.9189342187499913e-06, - "loss": 0.19, + "epoch": 1.5312058740468795, + "grad_norm": 3.628326860197344, + "learning_rate": 1.0149390832471965e-05, + "loss": 0.6314, "step": 10844 }, { - "epoch": 2.274061648144265, - "grad_norm": 4.656508198162215, - "learning_rate": 2.9173358587023215e-06, - "loss": 0.1515, + "epoch": 1.5313470770968651, + "grad_norm": 3.3240980300636638, + "learning_rate": 1.0147866547905383e-05, + "loss": 0.6569, "step": 10845 }, { - "epoch": 2.2742713357097926, - "grad_norm": 3.511222136721952, - "learning_rate": 2.915737861653476e-06, - "loss": 0.1267, + "epoch": 1.531488280146851, + "grad_norm": 3.2715836993123477, + "learning_rate": 1.0146342259902446e-05, + "loss": 0.5062, "step": 10846 }, { - "epoch": 2.2744810232753196, - "grad_norm": 4.887492108860774, - "learning_rate": 2.9141402276853527e-06, - "loss": 0.1108, + "epoch": 1.531629483196837, + "grad_norm": 4.925787943714355, + "learning_rate": 1.0144817968498578e-05, + "loss": 0.6173, "step": 10847 }, { - "epoch": 2.274690710840847, - "grad_norm": 4.107942201418428, - "learning_rate": 2.912542956879828e-06, - "loss": 0.1612, + "epoch": 1.5317706862468228, + "grad_norm": 3.050127053992394, + "learning_rate": 1.0143293673729202e-05, + "loss": 0.51, "step": 10848 }, { - "epoch": 2.2749003984063743, - "grad_norm": 4.382246120681435, - "learning_rate": 2.9109460493187715e-06, - "loss": 0.1592, + "epoch": 1.5319118892968087, + "grad_norm": 4.0111594728175355, + "learning_rate": 1.0141769375629744e-05, + "loss": 0.6394, "step": 10849 }, { - "epoch": 2.275110085971902, - "grad_norm": 4.689642117714378, - "learning_rate": 2.909349505084029e-06, - "loss": 0.1746, + "epoch": 1.5320530923467945, + "grad_norm": 3.4653401401748085, + "learning_rate": 1.0140245074235624e-05, + "loss": 0.5273, "step": 10850 }, { - "epoch": 2.2753197735374293, - "grad_norm": 3.531439313650851, - "learning_rate": 2.9077533242574184e-06, - "loss": 0.1405, + "epoch": 1.5321942953967804, + "grad_norm": 3.749479047890687, + "learning_rate": 1.0138720769582275e-05, + "loss": 0.5919, "step": 10851 }, { - "epoch": 2.2755294611029564, - "grad_norm": 3.7598197933109607, - "learning_rate": 2.9061575069207494e-06, - "loss": 0.1325, + "epoch": 1.5323354984467663, + "grad_norm": 3.758078417935565, + "learning_rate": 1.013719646170511e-05, + "loss": 0.6169, "step": 10852 }, { - "epoch": 2.275739148668484, - "grad_norm": 4.357492588350868, - "learning_rate": 2.904562053155816e-06, - "loss": 0.1399, + "epoch": 1.5324767014967522, + "grad_norm": 3.3955507907120395, + "learning_rate": 1.013567215063956e-05, + "loss": 0.6259, "step": 10853 }, { - "epoch": 2.2759488362340115, - "grad_norm": 3.959555353262392, - "learning_rate": 2.9029669630443834e-06, - "loss": 0.1622, + "epoch": 1.532617904546738, + "grad_norm": 3.6475645910660535, + "learning_rate": 1.013414783642105e-05, + "loss": 0.582, "step": 10854 }, { - "epoch": 2.2761585237995385, - "grad_norm": 3.408510224420956, - "learning_rate": 2.9013722366681997e-06, - "loss": 0.1034, + "epoch": 1.532759107596724, + "grad_norm": 3.8963786497547175, + "learning_rate": 1.0132623519084999e-05, + "loss": 0.6456, "step": 10855 }, { - "epoch": 2.276368211365066, - "grad_norm": 4.545920661949794, - "learning_rate": 2.899777874109001e-06, - "loss": 0.1474, + "epoch": 1.5329003106467098, + "grad_norm": 3.4837219679644447, + "learning_rate": 1.0131099198666835e-05, + "loss": 0.5232, "step": 10856 }, { - "epoch": 2.2765778989305936, - "grad_norm": 3.6360900411770922, - "learning_rate": 2.8981838754485036e-06, - "loss": 0.143, + "epoch": 1.5330415136966957, + "grad_norm": 4.315342235930342, + "learning_rate": 1.0129574875201984e-05, + "loss": 0.783, "step": 10857 }, { - "epoch": 2.2767875864961207, - "grad_norm": 4.126495728531195, - "learning_rate": 2.896590240768401e-06, - "loss": 0.1315, + "epoch": 1.5331827167466816, + "grad_norm": 3.649671292498803, + "learning_rate": 1.0128050548725865e-05, + "loss": 0.6658, "step": 10858 }, { - "epoch": 2.276997274061648, - "grad_norm": 6.605061217475966, - "learning_rate": 2.8949969701503653e-06, - "loss": 0.1436, + "epoch": 1.5333239197966675, + "grad_norm": 3.7723236022748474, + "learning_rate": 1.012652621927391e-05, + "loss": 0.5657, "step": 10859 }, { - "epoch": 2.2772069616271757, - "grad_norm": 4.371259412511121, - "learning_rate": 2.8934040636760608e-06, - "loss": 0.1517, + "epoch": 1.5334651228466534, + "grad_norm": 3.857105580962735, + "learning_rate": 1.0125001886881543e-05, + "loss": 0.5787, "step": 10860 }, { - "epoch": 2.277416649192703, - "grad_norm": 4.684086543430655, - "learning_rate": 2.891811521427121e-06, - "loss": 0.1738, + "epoch": 1.5336063258966393, + "grad_norm": 3.469997853997336, + "learning_rate": 1.0123477551584182e-05, + "loss": 0.5886, "step": 10861 }, { - "epoch": 2.2776263367582303, - "grad_norm": 4.905707287726102, - "learning_rate": 2.8902193434851723e-06, - "loss": 0.1439, + "epoch": 1.5337475289466251, + "grad_norm": 3.5571859371648604, + "learning_rate": 1.0121953213417256e-05, + "loss": 0.5346, "step": 10862 }, { - "epoch": 2.2778360243237574, - "grad_norm": 3.8266854917527926, - "learning_rate": 2.8886275299318133e-06, - "loss": 0.1527, + "epoch": 1.533888731996611, + "grad_norm": 3.587325552848159, + "learning_rate": 1.0120428872416192e-05, + "loss": 0.5733, "step": 10863 }, { - "epoch": 2.278045711889285, - "grad_norm": 3.872439496742937, - "learning_rate": 2.887036080848624e-06, - "loss": 0.1578, + "epoch": 1.534029935046597, + "grad_norm": 4.385468868983309, + "learning_rate": 1.0118904528616411e-05, + "loss": 0.7094, "step": 10864 }, { - "epoch": 2.2782553994548125, - "grad_norm": 3.944525539759928, - "learning_rate": 2.8854449963171737e-06, - "loss": 0.1522, + "epoch": 1.5341711380965828, + "grad_norm": 3.220405323224042, + "learning_rate": 1.0117380182053343e-05, + "loss": 0.4289, "step": 10865 }, { - "epoch": 2.2784650870203396, - "grad_norm": 5.640306986055992, - "learning_rate": 2.8838542764190113e-06, - "loss": 0.2121, + "epoch": 1.5343123411465687, + "grad_norm": 4.074739176308353, + "learning_rate": 1.0115855832762408e-05, + "loss": 0.6375, "step": 10866 }, { - "epoch": 2.278674774585867, - "grad_norm": 4.2313843551486725, - "learning_rate": 2.882263921235655e-06, - "loss": 0.136, + "epoch": 1.5344535441965546, + "grad_norm": 2.8811642147672734, + "learning_rate": 1.0114331480779032e-05, + "loss": 0.4714, "step": 10867 }, { - "epoch": 2.278884462151394, - "grad_norm": 3.734333618186167, - "learning_rate": 2.8806739308486164e-06, - "loss": 0.1337, + "epoch": 1.5345947472465404, + "grad_norm": 3.9884244352780023, + "learning_rate": 1.0112807126138646e-05, + "loss": 0.7534, "step": 10868 }, { - "epoch": 2.2790941497169217, - "grad_norm": 4.5720340818010685, - "learning_rate": 2.8790843053393915e-06, - "loss": 0.1497, + "epoch": 1.5347359502965263, + "grad_norm": 3.000433603460855, + "learning_rate": 1.0111282768876668e-05, + "loss": 0.5358, "step": 10869 }, { - "epoch": 2.2793038372824492, - "grad_norm": 5.102434722344642, - "learning_rate": 2.8774950447894455e-06, - "loss": 0.196, + "epoch": 1.5348771533465122, + "grad_norm": 3.3341796864822375, + "learning_rate": 1.0109758409028527e-05, + "loss": 0.4796, "step": 10870 }, { - "epoch": 2.2795135248479763, - "grad_norm": 3.4647175461219244, - "learning_rate": 2.8759061492802286e-06, - "loss": 0.1395, + "epoch": 1.535018356396498, + "grad_norm": 3.8213246943267243, + "learning_rate": 1.0108234046629649e-05, + "loss": 0.6433, "step": 10871 }, { - "epoch": 2.279723212413504, - "grad_norm": 3.8137786472061084, - "learning_rate": 2.874317618893181e-06, - "loss": 0.1335, + "epoch": 1.535159559446484, + "grad_norm": 4.041295045368597, + "learning_rate": 1.0106709681715456e-05, + "loss": 0.5925, "step": 10872 }, { - "epoch": 2.2799328999790314, - "grad_norm": 3.0033002310764525, - "learning_rate": 2.8727294537097107e-06, - "loss": 0.1053, + "epoch": 1.5353007624964699, + "grad_norm": 3.1904937607155457, + "learning_rate": 1.0105185314321379e-05, + "loss": 0.4692, "step": 10873 }, { - "epoch": 2.2801425875445585, - "grad_norm": 3.4224330276172394, - "learning_rate": 2.8711416538112214e-06, - "loss": 0.1314, + "epoch": 1.5354419655464557, + "grad_norm": 3.368556222712298, + "learning_rate": 1.0103660944482841e-05, + "loss": 0.5227, "step": 10874 }, { - "epoch": 2.280352275110086, - "grad_norm": 4.411927999008102, - "learning_rate": 2.869554219279086e-06, - "loss": 0.1156, + "epoch": 1.5355831685964416, + "grad_norm": 3.682840019916227, + "learning_rate": 1.0102136572235264e-05, + "loss": 0.7302, "step": 10875 }, { - "epoch": 2.2805619626756135, - "grad_norm": 4.815637932898665, - "learning_rate": 2.8679671501946617e-06, - "loss": 0.1665, + "epoch": 1.5357243716464275, + "grad_norm": 3.7408249587892626, + "learning_rate": 1.0100612197614076e-05, + "loss": 0.5307, "step": 10876 }, { - "epoch": 2.2807716502411406, - "grad_norm": 4.219286958201543, - "learning_rate": 2.86638044663929e-06, - "loss": 0.1426, + "epoch": 1.5358655746964134, + "grad_norm": 3.4428531654573837, + "learning_rate": 1.0099087820654712e-05, + "loss": 0.5151, "step": 10877 }, { - "epoch": 2.280981337806668, - "grad_norm": 3.7153800293720622, - "learning_rate": 2.8647941086942967e-06, - "loss": 0.1346, + "epoch": 1.5360067777463993, + "grad_norm": 3.9639534357029484, + "learning_rate": 1.0097563441392582e-05, + "loss": 0.6823, "step": 10878 }, { - "epoch": 2.2811910253721956, - "grad_norm": 4.4891993870687195, - "learning_rate": 2.8632081364409804e-06, - "loss": 0.1612, + "epoch": 1.5361479807963851, + "grad_norm": 4.277259759764523, + "learning_rate": 1.0096039059863123e-05, + "loss": 0.7096, "step": 10879 }, { - "epoch": 2.2814007129377227, - "grad_norm": 5.333259823322492, - "learning_rate": 2.8616225299606228e-06, - "loss": 0.1905, + "epoch": 1.536289183846371, + "grad_norm": 3.5369173275654973, + "learning_rate": 1.0094514676101759e-05, + "loss": 0.6019, "step": 10880 }, { - "epoch": 2.2816104005032503, - "grad_norm": 3.892640192888336, - "learning_rate": 2.860037289334494e-06, - "loss": 0.1081, + "epoch": 1.536430386896357, + "grad_norm": 3.127945643973049, + "learning_rate": 1.0092990290143912e-05, + "loss": 0.5274, "step": 10881 }, { - "epoch": 2.2818200880687773, - "grad_norm": 3.121292950721039, - "learning_rate": 2.858452414643834e-06, - "loss": 0.1059, + "epoch": 1.5365715899463428, + "grad_norm": 3.995972761588996, + "learning_rate": 1.0091465902025012e-05, + "loss": 0.6019, "step": 10882 }, { - "epoch": 2.282029775634305, - "grad_norm": 4.3195703498059395, - "learning_rate": 2.856867905969879e-06, - "loss": 0.1483, + "epoch": 1.5367127929963287, + "grad_norm": 2.772659005933049, + "learning_rate": 1.0089941511780485e-05, + "loss": 0.4603, "step": 10883 }, { - "epoch": 2.2822394631998324, - "grad_norm": 4.501755302741633, - "learning_rate": 2.8552837633938314e-06, - "loss": 0.1624, + "epoch": 1.5368539960463146, + "grad_norm": 4.108210193490873, + "learning_rate": 1.0088417119445752e-05, + "loss": 0.6784, "step": 10884 }, { - "epoch": 2.2824491507653595, - "grad_norm": 3.8154818098268546, - "learning_rate": 2.8536999869968807e-06, - "loss": 0.1548, + "epoch": 1.5369951990963004, + "grad_norm": 3.7366079617792307, + "learning_rate": 1.008689272505625e-05, + "loss": 0.545, "step": 10885 }, { - "epoch": 2.282658838330887, - "grad_norm": 3.99808293556011, - "learning_rate": 2.852116576860199e-06, - "loss": 0.1541, + "epoch": 1.5371364021462863, + "grad_norm": 3.4338635343940407, + "learning_rate": 1.0085368328647395e-05, + "loss": 0.632, "step": 10886 }, { - "epoch": 2.2828685258964145, - "grad_norm": 4.1879128730303234, - "learning_rate": 2.8505335330649476e-06, - "loss": 0.1562, + "epoch": 1.5372776051962722, + "grad_norm": 3.532957904935381, + "learning_rate": 1.0083843930254616e-05, + "loss": 0.6199, "step": 10887 }, { - "epoch": 2.2830782134619416, - "grad_norm": 5.431519078517125, - "learning_rate": 2.8489508556922464e-06, - "loss": 0.1866, + "epoch": 1.537418808246258, + "grad_norm": 4.099484117642051, + "learning_rate": 1.008231952991334e-05, + "loss": 0.5915, "step": 10888 }, { - "epoch": 2.283287901027469, - "grad_norm": 4.785016465188952, - "learning_rate": 2.847368544823216e-06, - "loss": 0.1786, + "epoch": 1.537560011296244, + "grad_norm": 4.195137685907803, + "learning_rate": 1.0080795127658995e-05, + "loss": 0.5924, "step": 10889 }, { - "epoch": 2.2834975885929962, - "grad_norm": 3.9230401757289544, - "learning_rate": 2.845786600538957e-06, - "loss": 0.1495, + "epoch": 1.5377012143462299, + "grad_norm": 3.287018401571734, + "learning_rate": 1.0079270723527005e-05, + "loss": 0.4986, "step": 10890 }, { - "epoch": 2.2837072761585238, - "grad_norm": 4.843360176089437, - "learning_rate": 2.844205022920542e-06, - "loss": 0.1813, + "epoch": 1.5378424173962157, + "grad_norm": 3.867946739072173, + "learning_rate": 1.0077746317552802e-05, + "loss": 0.5672, "step": 10891 }, { - "epoch": 2.2839169637240513, - "grad_norm": 5.499741147700227, - "learning_rate": 2.842623812049029e-06, - "loss": 0.1788, + "epoch": 1.5379836204462016, + "grad_norm": 3.9683482267674024, + "learning_rate": 1.0076221909771805e-05, + "loss": 0.6451, "step": 10892 }, { - "epoch": 2.2841266512895784, - "grad_norm": 4.780227833531306, - "learning_rate": 2.8410429680054617e-06, - "loss": 0.1541, + "epoch": 1.5381248234961875, + "grad_norm": 3.9692343494778886, + "learning_rate": 1.007469750021944e-05, + "loss": 0.6517, "step": 10893 }, { - "epoch": 2.284336338855106, - "grad_norm": 5.436887340210706, - "learning_rate": 2.8394624908708568e-06, - "loss": 0.1522, + "epoch": 1.5382660265461734, + "grad_norm": 3.6171686861649275, + "learning_rate": 1.0073173088931143e-05, + "loss": 0.5841, "step": 10894 }, { - "epoch": 2.2845460264206334, - "grad_norm": 3.6591589581961452, - "learning_rate": 2.8378823807262202e-06, - "loss": 0.149, + "epoch": 1.5384072295961593, + "grad_norm": 2.8364643047584783, + "learning_rate": 1.007164867594233e-05, + "loss": 0.4696, "step": 10895 }, { - "epoch": 2.2847557139861605, - "grad_norm": 3.4198537755217413, - "learning_rate": 2.8363026376525347e-06, - "loss": 0.1154, + "epoch": 1.5385484326461452, + "grad_norm": 3.1662089758908203, + "learning_rate": 1.0070124261288437e-05, + "loss": 0.5704, "step": 10896 }, { - "epoch": 2.284965401551688, - "grad_norm": 4.329877872852249, - "learning_rate": 2.834723261730761e-06, - "loss": 0.1568, + "epoch": 1.538689635696131, + "grad_norm": 3.504347518699753, + "learning_rate": 1.0068599845004885e-05, + "loss": 0.4925, "step": 10897 }, { - "epoch": 2.2851750891172156, - "grad_norm": 4.851258982694854, - "learning_rate": 2.8331442530418463e-06, - "loss": 0.1631, + "epoch": 1.538830838746117, + "grad_norm": 3.444490853475091, + "learning_rate": 1.0067075427127103e-05, + "loss": 0.5464, "step": 10898 }, { - "epoch": 2.2853847766827426, - "grad_norm": 3.551350542815262, - "learning_rate": 2.8315656116667234e-06, - "loss": 0.1177, + "epoch": 1.5389720417961028, + "grad_norm": 3.1880651206828645, + "learning_rate": 1.0065551007690515e-05, + "loss": 0.5048, "step": 10899 }, { - "epoch": 2.28559446424827, - "grad_norm": 3.911238506998745, - "learning_rate": 2.829987337686295e-06, - "loss": 0.152, + "epoch": 1.5391132448460887, + "grad_norm": 3.6171025722897694, + "learning_rate": 1.0064026586730553e-05, + "loss": 0.5443, "step": 10900 }, { - "epoch": 2.2858041518137973, - "grad_norm": 3.2436121767719146, - "learning_rate": 2.828409431181448e-06, - "loss": 0.1052, + "epoch": 1.5392544478960746, + "grad_norm": 3.893147381024192, + "learning_rate": 1.0062502164282638e-05, + "loss": 0.632, "step": 10901 }, { - "epoch": 2.286013839379325, - "grad_norm": 5.065058671621125, - "learning_rate": 2.8268318922330597e-06, - "loss": 0.1328, + "epoch": 1.5393956509460605, + "grad_norm": 2.7743224846134216, + "learning_rate": 1.00609777403822e-05, + "loss": 0.4383, "step": 10902 }, { - "epoch": 2.2862235269448523, - "grad_norm": 3.688865624184741, - "learning_rate": 2.825254720921975e-06, - "loss": 0.1356, + "epoch": 1.5395368539960463, + "grad_norm": 2.8162866545508973, + "learning_rate": 1.0059453315064666e-05, + "loss": 0.5759, "step": 10903 }, { - "epoch": 2.2864332145103794, - "grad_norm": 4.472086900387992, - "learning_rate": 2.823677917329033e-06, - "loss": 0.114, + "epoch": 1.5396780570460322, + "grad_norm": 3.7527013171827748, + "learning_rate": 1.0057928888365462e-05, + "loss": 0.5241, "step": 10904 }, { - "epoch": 2.286642902075907, - "grad_norm": 4.257352103596923, - "learning_rate": 2.8221014815350447e-06, - "loss": 0.1657, + "epoch": 1.539819260096018, + "grad_norm": 4.002753136683803, + "learning_rate": 1.0056404460320018e-05, + "loss": 0.6505, "step": 10905 }, { - "epoch": 2.2868525896414345, - "grad_norm": 4.890087067822891, - "learning_rate": 2.8205254136208016e-06, - "loss": 0.129, + "epoch": 1.539960463146004, + "grad_norm": 4.29698550002353, + "learning_rate": 1.0054880030963756e-05, + "loss": 0.7404, "step": 10906 }, { - "epoch": 2.2870622772069615, - "grad_norm": 4.336838988182833, - "learning_rate": 2.818949713667083e-06, - "loss": 0.1662, + "epoch": 1.5401016661959899, + "grad_norm": 4.650099967825517, + "learning_rate": 1.0053355600332109e-05, + "loss": 0.6484, "step": 10907 }, { - "epoch": 2.287271964772489, - "grad_norm": 3.924415873564747, - "learning_rate": 2.817374381754653e-06, - "loss": 0.136, + "epoch": 1.5402428692459758, + "grad_norm": 3.1023745987678275, + "learning_rate": 1.0051831168460497e-05, + "loss": 0.4934, "step": 10908 }, { - "epoch": 2.287481652338016, - "grad_norm": 4.327641954432818, - "learning_rate": 2.815799417964238e-06, - "loss": 0.1365, + "epoch": 1.5403840722959616, + "grad_norm": 3.805794474564786, + "learning_rate": 1.0050306735384354e-05, + "loss": 0.6202, "step": 10909 }, { - "epoch": 2.2876913399035437, - "grad_norm": 5.480002568661021, - "learning_rate": 2.814224822376562e-06, - "loss": 0.2157, + "epoch": 1.5405252753459475, + "grad_norm": 4.0769701433264185, + "learning_rate": 1.0048782301139102e-05, + "loss": 0.6403, "step": 10910 }, { - "epoch": 2.287901027469071, - "grad_norm": 4.577886241266605, - "learning_rate": 2.8126505950723305e-06, - "loss": 0.1727, + "epoch": 1.5406664783959334, + "grad_norm": 3.661659374219437, + "learning_rate": 1.0047257865760173e-05, + "loss": 0.6022, "step": 10911 }, { - "epoch": 2.2881107150345983, - "grad_norm": 4.649738268214217, - "learning_rate": 2.811076736132221e-06, - "loss": 0.1809, + "epoch": 1.5408076814459193, + "grad_norm": 2.9733222828040256, + "learning_rate": 1.0045733429282989e-05, + "loss": 0.4623, "step": 10912 }, { - "epoch": 2.288320402600126, - "grad_norm": 4.032422574647493, - "learning_rate": 2.8095032456368953e-06, - "loss": 0.1156, + "epoch": 1.5409488844959052, + "grad_norm": 3.4721395546704117, + "learning_rate": 1.0044208991742981e-05, + "loss": 0.6567, "step": 10913 }, { - "epoch": 2.2885300901656533, - "grad_norm": 4.929841683847431, - "learning_rate": 2.8079301236670032e-06, - "loss": 0.1333, + "epoch": 1.541090087545891, + "grad_norm": 3.449648298353695, + "learning_rate": 1.0042684553175575e-05, + "loss": 0.604, "step": 10914 }, { - "epoch": 2.2887397777311804, - "grad_norm": 4.534548000746281, - "learning_rate": 2.806357370303162e-06, - "loss": 0.186, + "epoch": 1.541231290595877, + "grad_norm": 3.9459203639663087, + "learning_rate": 1.0041160113616199e-05, + "loss": 0.6785, "step": 10915 }, { - "epoch": 2.288949465296708, - "grad_norm": 4.751398053541186, - "learning_rate": 2.8047849856259856e-06, - "loss": 0.1805, + "epoch": 1.5413724936458628, + "grad_norm": 4.241477861162531, + "learning_rate": 1.0039635673100282e-05, + "loss": 0.5823, "step": 10916 }, { - "epoch": 2.2891591528622355, - "grad_norm": 4.249496927293688, - "learning_rate": 2.803212969716055e-06, - "loss": 0.1117, + "epoch": 1.5415136966958487, + "grad_norm": 3.5979469168791915, + "learning_rate": 1.0038111231663243e-05, + "loss": 0.5902, "step": 10917 }, { - "epoch": 2.2893688404277626, - "grad_norm": 4.1953376578574755, - "learning_rate": 2.801641322653945e-06, - "loss": 0.1538, + "epoch": 1.5416548997458346, + "grad_norm": 3.3145853515199946, + "learning_rate": 1.0036586789340518e-05, + "loss": 0.5782, "step": 10918 }, { - "epoch": 2.28957852799329, - "grad_norm": 3.8180116843929297, - "learning_rate": 2.8000700445202e-06, - "loss": 0.1256, + "epoch": 1.5417961027958205, + "grad_norm": 4.052384017712191, + "learning_rate": 1.0035062346167535e-05, + "loss": 0.6741, "step": 10919 }, { - "epoch": 2.289788215558817, - "grad_norm": 4.61342975553467, - "learning_rate": 2.798499135395355e-06, - "loss": 0.1548, + "epoch": 1.5419373058458063, + "grad_norm": 4.067502849031871, + "learning_rate": 1.0033537902179716e-05, + "loss": 0.6779, "step": 10920 }, { - "epoch": 2.2899979031243447, - "grad_norm": 5.468465751174898, - "learning_rate": 2.7969285953599203e-06, - "loss": 0.1234, + "epoch": 1.5420785088957922, + "grad_norm": 3.615298237457858, + "learning_rate": 1.0032013457412488e-05, + "loss": 0.6203, "step": 10921 }, { - "epoch": 2.2902075906898722, - "grad_norm": 4.820803288641013, - "learning_rate": 2.7953584244943842e-06, - "loss": 0.1697, + "epoch": 1.5422197119457781, + "grad_norm": 3.36791644879669, + "learning_rate": 1.0030489011901285e-05, + "loss": 0.6217, "step": 10922 }, { - "epoch": 2.2904172782553993, - "grad_norm": 5.563363845909404, - "learning_rate": 2.7937886228792256e-06, - "loss": 0.193, + "epoch": 1.542360914995764, + "grad_norm": 3.9547204754781426, + "learning_rate": 1.0028964565681531e-05, + "loss": 0.7283, "step": 10923 }, { - "epoch": 2.290626965820927, - "grad_norm": 5.1031562862616875, - "learning_rate": 2.7922191905949e-06, - "loss": 0.1628, + "epoch": 1.5425021180457499, + "grad_norm": 3.6423541218236712, + "learning_rate": 1.0027440118788649e-05, + "loss": 0.5856, "step": 10924 }, { - "epoch": 2.2908366533864544, - "grad_norm": 4.684495202838718, - "learning_rate": 2.7906501277218447e-06, - "loss": 0.1275, + "epoch": 1.5426433210957358, + "grad_norm": 3.591299023026102, + "learning_rate": 1.0025915671258074e-05, + "loss": 0.5306, "step": 10925 }, { - "epoch": 2.2910463409519815, - "grad_norm": 4.09148039356376, - "learning_rate": 2.7890814343404694e-06, - "loss": 0.1323, + "epoch": 1.5427845241457216, + "grad_norm": 3.7569841752755098, + "learning_rate": 1.0024391223125226e-05, + "loss": 0.6327, "step": 10926 }, { - "epoch": 2.291256028517509, - "grad_norm": 3.6203582418177676, - "learning_rate": 2.78751311053118e-06, - "loss": 0.1215, + "epoch": 1.5429257271957075, + "grad_norm": 3.2999671243887367, + "learning_rate": 1.002286677442554e-05, + "loss": 0.5714, "step": 10927 }, { - "epoch": 2.291465716083036, - "grad_norm": 3.776714307517992, - "learning_rate": 2.7859451563743523e-06, - "loss": 0.1423, + "epoch": 1.5430669302456934, + "grad_norm": 3.5940693999363362, + "learning_rate": 1.0021342325194441e-05, + "loss": 0.5157, "step": 10928 }, { - "epoch": 2.2916754036485636, - "grad_norm": 5.028675429087357, - "learning_rate": 2.7843775719503486e-06, - "loss": 0.1366, + "epoch": 1.5432081332956793, + "grad_norm": 3.280275048012509, + "learning_rate": 1.0019817875467352e-05, + "loss": 0.5629, "step": 10929 }, { - "epoch": 2.291885091214091, - "grad_norm": 4.713518592126404, - "learning_rate": 2.7828103573395103e-06, - "loss": 0.145, + "epoch": 1.5433493363456652, + "grad_norm": 3.281031566404936, + "learning_rate": 1.0018293425279706e-05, + "loss": 0.4773, "step": 10930 }, { - "epoch": 2.292094778779618, - "grad_norm": 5.908249613996727, - "learning_rate": 2.781243512622155e-06, - "loss": 0.1478, + "epoch": 1.543490539395651, + "grad_norm": 3.0563608862167, + "learning_rate": 1.0016768974666929e-05, + "loss": 0.5303, "step": 10931 }, { - "epoch": 2.2923044663451457, - "grad_norm": 4.501431416301457, - "learning_rate": 2.7796770378785933e-06, - "loss": 0.1512, + "epoch": 1.543631742445637, + "grad_norm": 3.263004615260855, + "learning_rate": 1.0015244523664447e-05, + "loss": 0.5046, "step": 10932 }, { - "epoch": 2.2925141539106733, - "grad_norm": 5.3048531679526185, - "learning_rate": 2.7781109331891063e-06, - "loss": 0.1785, + "epoch": 1.5437729454956228, + "grad_norm": 3.3023444517668517, + "learning_rate": 1.001372007230769e-05, + "loss": 0.5476, "step": 10933 }, { - "epoch": 2.2927238414762003, - "grad_norm": 3.916355861595163, - "learning_rate": 2.7765451986339566e-06, - "loss": 0.125, + "epoch": 1.5439141485456087, + "grad_norm": 3.531429004575241, + "learning_rate": 1.0012195620632084e-05, + "loss": 0.4837, "step": 10934 }, { - "epoch": 2.292933529041728, - "grad_norm": 4.832964778357069, - "learning_rate": 2.7749798342933944e-06, - "loss": 0.1641, + "epoch": 1.5440553515955946, + "grad_norm": 4.128862736031032, + "learning_rate": 1.0010671168673058e-05, + "loss": 0.4588, "step": 10935 }, { - "epoch": 2.2931432166072554, - "grad_norm": 4.014210604039888, - "learning_rate": 2.773414840247649e-06, - "loss": 0.1588, + "epoch": 1.5441965546455805, + "grad_norm": 3.4545110107729484, + "learning_rate": 1.0009146716466038e-05, + "loss": 0.4562, "step": 10936 }, { - "epoch": 2.2933529041727825, - "grad_norm": 5.217905132852857, - "learning_rate": 2.771850216576928e-06, - "loss": 0.1872, + "epoch": 1.5443377576955664, + "grad_norm": 2.9724091653720506, + "learning_rate": 1.0007622264046452e-05, + "loss": 0.4546, "step": 10937 }, { - "epoch": 2.29356259173831, - "grad_norm": 4.599792998793363, - "learning_rate": 2.7702859633614154e-06, - "loss": 0.137, + "epoch": 1.5444789607455522, + "grad_norm": 3.3485666957552604, + "learning_rate": 1.0006097811449726e-05, + "loss": 0.516, "step": 10938 }, { - "epoch": 2.293772279303837, - "grad_norm": 3.833527536084704, - "learning_rate": 2.768722080681291e-06, - "loss": 0.1406, + "epoch": 1.5446201637955381, + "grad_norm": 4.883901709674883, + "learning_rate": 1.0004573358711291e-05, + "loss": 0.8398, "step": 10939 }, { - "epoch": 2.2939819668693646, - "grad_norm": 5.32027297733488, - "learning_rate": 2.767158568616698e-06, - "loss": 0.186, + "epoch": 1.544761366845524, + "grad_norm": 3.6774825047371698, + "learning_rate": 1.0003048905866577e-05, + "loss": 0.651, "step": 10940 }, { - "epoch": 2.294191654434892, - "grad_norm": 3.7333278416088924, - "learning_rate": 2.765595427247777e-06, - "loss": 0.1484, + "epoch": 1.54490256989551, + "grad_norm": 3.6521124080385516, + "learning_rate": 1.0001524452951002e-05, + "loss": 0.5474, "step": 10941 }, { - "epoch": 2.2944013420004192, - "grad_norm": 3.5176573271309834, - "learning_rate": 2.764032656654637e-06, - "loss": 0.1137, + "epoch": 1.5450437729454958, + "grad_norm": 3.850576827009961, + "learning_rate": 1e-05, + "loss": 0.4882, "step": 10942 }, { - "epoch": 2.2946110295659468, - "grad_norm": 4.527064273965617, - "learning_rate": 2.7624702569173713e-06, - "loss": 0.1428, + "epoch": 1.5451849759954817, + "grad_norm": 3.9981016267525864, + "learning_rate": 9.998475547049001e-06, + "loss": 0.6308, "step": 10943 }, { - "epoch": 2.2948207171314743, - "grad_norm": 4.9475130730125425, - "learning_rate": 2.760908228116057e-06, - "loss": 0.1588, + "epoch": 1.5453261790454675, + "grad_norm": 4.956245261273162, + "learning_rate": 9.996951094133426e-06, + "loss": 0.5662, "step": 10944 }, { - "epoch": 2.2950304046970014, - "grad_norm": 5.048553808441142, - "learning_rate": 2.7593465703307555e-06, - "loss": 0.182, + "epoch": 1.5454673820954534, + "grad_norm": 4.783976378252596, + "learning_rate": 9.995426641288709e-06, + "loss": 0.8168, "step": 10945 }, { - "epoch": 2.295240092262529, - "grad_norm": 4.486613381633271, - "learning_rate": 2.7577852836414997e-06, - "loss": 0.1371, + "epoch": 1.5456085851454393, + "grad_norm": 3.615977929741095, + "learning_rate": 9.993902188550276e-06, + "loss": 0.619, "step": 10946 }, { - "epoch": 2.295449779828056, - "grad_norm": 4.230508060924311, - "learning_rate": 2.7562243681283063e-06, - "loss": 0.1551, + "epoch": 1.545749788195425, + "grad_norm": 3.9095515198498942, + "learning_rate": 9.992377735953553e-06, + "loss": 0.5923, "step": 10947 }, { - "epoch": 2.2956594673935835, - "grad_norm": 3.657073109136003, - "learning_rate": 2.754663823871181e-06, - "loss": 0.1379, + "epoch": 1.5458909912454109, + "grad_norm": 3.1330331228393087, + "learning_rate": 9.990853283533968e-06, + "loss": 0.5671, "step": 10948 }, { - "epoch": 2.295869154959111, - "grad_norm": 5.476866868740158, - "learning_rate": 2.7531036509501006e-06, - "loss": 0.1672, + "epoch": 1.5460321942953967, + "grad_norm": 3.3054078991648543, + "learning_rate": 9.989328831326945e-06, + "loss": 0.5971, "step": 10949 }, { - "epoch": 2.296078842524638, - "grad_norm": 3.747436450620385, - "learning_rate": 2.7515438494450243e-06, - "loss": 0.1358, + "epoch": 1.5461733973453826, + "grad_norm": 3.390727016763065, + "learning_rate": 9.987804379367919e-06, + "loss": 0.5824, "step": 10950 }, { - "epoch": 2.2962885300901656, - "grad_norm": 4.158123442037323, - "learning_rate": 2.7499844194359016e-06, - "loss": 0.1626, + "epoch": 1.5463146003953685, + "grad_norm": 4.229698253503014, + "learning_rate": 9.986279927692312e-06, + "loss": 0.6429, "step": 10951 }, { - "epoch": 2.296498217655693, - "grad_norm": 5.662663579374337, - "learning_rate": 2.748425361002648e-06, - "loss": 0.1595, + "epoch": 1.5464558034453544, + "grad_norm": 3.8337285158981875, + "learning_rate": 9.984755476335556e-06, + "loss": 0.6007, "step": 10952 }, { - "epoch": 2.2967079052212203, - "grad_norm": 3.820839838919107, - "learning_rate": 2.7468666742251747e-06, - "loss": 0.1651, + "epoch": 1.5465970064953403, + "grad_norm": 3.890871821218218, + "learning_rate": 9.983231025333073e-06, + "loss": 0.5898, "step": 10953 }, { - "epoch": 2.296917592786748, - "grad_norm": 3.9349774907179036, - "learning_rate": 2.745308359183364e-06, - "loss": 0.1215, + "epoch": 1.5467382095453261, + "grad_norm": 3.819259567439549, + "learning_rate": 9.981706574720296e-06, + "loss": 0.6331, "step": 10954 }, { - "epoch": 2.2971272803522753, - "grad_norm": 4.610434473242038, - "learning_rate": 2.7437504159570804e-06, - "loss": 0.1553, + "epoch": 1.546879412595312, + "grad_norm": 3.5820446940855803, + "learning_rate": 9.980182124532651e-06, + "loss": 0.6529, "step": 10955 }, { - "epoch": 2.2973369679178024, - "grad_norm": 2.6658628917792524, - "learning_rate": 2.7421928446261726e-06, - "loss": 0.0835, + "epoch": 1.547020615645298, + "grad_norm": 4.260721031234754, + "learning_rate": 9.978657674805564e-06, + "loss": 0.5508, "step": 10956 }, { - "epoch": 2.29754665548333, - "grad_norm": 4.406227439795561, - "learning_rate": 2.740635645270473e-06, - "loss": 0.1381, + "epoch": 1.5471618186952838, + "grad_norm": 3.223921431803935, + "learning_rate": 9.977133225574464e-06, + "loss": 0.429, "step": 10957 }, { - "epoch": 2.2977563430488575, - "grad_norm": 4.180955645660982, - "learning_rate": 2.739078817969787e-06, - "loss": 0.1385, + "epoch": 1.5473030217452697, + "grad_norm": 3.9918675782315147, + "learning_rate": 9.975608776874775e-06, + "loss": 0.4975, "step": 10958 }, { - "epoch": 2.2979660306143845, - "grad_norm": 4.006099900622913, - "learning_rate": 2.7375223628039025e-06, - "loss": 0.1271, + "epoch": 1.5474442247952556, + "grad_norm": 3.9052264789490567, + "learning_rate": 9.97408432874193e-06, + "loss": 0.5224, "step": 10959 }, { - "epoch": 2.298175718179912, - "grad_norm": 4.392571319093092, - "learning_rate": 2.7359662798525953e-06, - "loss": 0.1356, + "epoch": 1.5475854278452414, + "grad_norm": 3.0299842304804283, + "learning_rate": 9.972559881211353e-06, + "loss": 0.5199, "step": 10960 }, { - "epoch": 2.298385405745439, - "grad_norm": 3.636146971920943, - "learning_rate": 2.7344105691956122e-06, - "loss": 0.1437, + "epoch": 1.5477266308952273, + "grad_norm": 4.543207033493665, + "learning_rate": 9.971035434318472e-06, + "loss": 0.6872, "step": 10961 }, { - "epoch": 2.2985950933109667, - "grad_norm": 5.0847305524419, - "learning_rate": 2.7328552309126934e-06, - "loss": 0.1868, + "epoch": 1.5478678339452132, + "grad_norm": 3.5331514811431513, + "learning_rate": 9.969510988098716e-06, + "loss": 0.638, "step": 10962 }, { - "epoch": 2.298804780876494, - "grad_norm": 3.701476413708825, - "learning_rate": 2.7313002650835463e-06, - "loss": 0.1408, + "epoch": 1.548009036995199, + "grad_norm": 3.551643965214065, + "learning_rate": 9.967986542587512e-06, + "loss": 0.5838, "step": 10963 }, { - "epoch": 2.2990144684420213, - "grad_norm": 4.901698596266978, - "learning_rate": 2.7297456717878656e-06, - "loss": 0.1496, + "epoch": 1.548150240045185, + "grad_norm": 3.660109173354012, + "learning_rate": 9.966462097820289e-06, + "loss": 0.6389, "step": 10964 }, { - "epoch": 2.299224156007549, - "grad_norm": 5.314994755671248, - "learning_rate": 2.7281914511053285e-06, - "loss": 0.1327, + "epoch": 1.5482914430951709, + "grad_norm": 4.720458932388365, + "learning_rate": 9.96493765383247e-06, + "loss": 0.7642, "step": 10965 }, { - "epoch": 2.299433843573076, - "grad_norm": 4.182942187325313, - "learning_rate": 2.7266376031155984e-06, - "loss": 0.1471, + "epoch": 1.5484326461451567, + "grad_norm": 3.9246607595384146, + "learning_rate": 9.963413210659485e-06, + "loss": 0.5275, "step": 10966 }, { - "epoch": 2.2996435311386034, - "grad_norm": 3.817063763514465, - "learning_rate": 2.725084127898299e-06, - "loss": 0.1104, + "epoch": 1.5485738491951426, + "grad_norm": 3.7851812279665027, + "learning_rate": 9.961888768336758e-06, + "loss": 0.6899, "step": 10967 }, { - "epoch": 2.299853218704131, - "grad_norm": 4.3267705169489155, - "learning_rate": 2.723531025533056e-06, - "loss": 0.1567, + "epoch": 1.5487150522451285, + "grad_norm": 3.998468331831382, + "learning_rate": 9.960364326899723e-06, + "loss": 0.81, "step": 10968 }, { - "epoch": 2.300062906269658, - "grad_norm": 5.256568771421014, - "learning_rate": 2.721978296099472e-06, - "loss": 0.1567, + "epoch": 1.5488562552951144, + "grad_norm": 3.063442862938305, + "learning_rate": 9.958839886383803e-06, + "loss": 0.5054, "step": 10969 }, { - "epoch": 2.3002725938351856, - "grad_norm": 4.317323868510292, - "learning_rate": 2.7204259396771236e-06, - "loss": 0.1539, + "epoch": 1.5489974583451003, + "grad_norm": 3.3488784109610217, + "learning_rate": 9.957315446824425e-06, + "loss": 0.5648, "step": 10970 }, { - "epoch": 2.300482281400713, - "grad_norm": 3.7762725267391244, - "learning_rate": 2.7188739563455677e-06, - "loss": 0.1661, + "epoch": 1.5491386613950862, + "grad_norm": 3.9199635333857534, + "learning_rate": 9.95579100825702e-06, + "loss": 0.511, "step": 10971 }, { - "epoch": 2.30069196896624, - "grad_norm": 3.96655206650356, - "learning_rate": 2.7173223461843535e-06, - "loss": 0.1327, + "epoch": 1.549279864445072, + "grad_norm": 3.0739812756048384, + "learning_rate": 9.954266570717014e-06, + "loss": 0.5009, "step": 10972 }, { - "epoch": 2.3009016565317677, - "grad_norm": 4.938880370582554, - "learning_rate": 2.715771109272998e-06, - "loss": 0.1603, + "epoch": 1.549421067495058, + "grad_norm": 3.669459655383874, + "learning_rate": 9.952742134239832e-06, + "loss": 0.5425, "step": 10973 }, { - "epoch": 2.3011113440972952, - "grad_norm": 3.745290601015423, - "learning_rate": 2.7142202456910085e-06, - "loss": 0.1161, + "epoch": 1.5495622705450438, + "grad_norm": 3.2824250677714235, + "learning_rate": 9.951217698860902e-06, + "loss": 0.5803, "step": 10974 }, { - "epoch": 2.3013210316628223, - "grad_norm": 4.829599929006866, - "learning_rate": 2.7126697555178683e-06, - "loss": 0.1793, + "epoch": 1.5497034735950297, + "grad_norm": 3.5848429712375625, + "learning_rate": 9.949693264615649e-06, + "loss": 0.5386, "step": 10975 }, { - "epoch": 2.30153071922835, - "grad_norm": 4.236044629448255, - "learning_rate": 2.7111196388330396e-06, - "loss": 0.132, + "epoch": 1.5498446766450156, + "grad_norm": 3.6956291615012256, + "learning_rate": 9.948168831539505e-06, + "loss": 0.6402, "step": 10976 }, { - "epoch": 2.3017404067938774, - "grad_norm": 4.993906754488048, - "learning_rate": 2.709569895715972e-06, - "loss": 0.154, + "epoch": 1.5499858796950015, + "grad_norm": 3.2395045614414943, + "learning_rate": 9.946644399667894e-06, + "loss": 0.464, "step": 10977 }, { - "epoch": 2.3019500943594045, - "grad_norm": 3.6426238826710593, - "learning_rate": 2.7080205262460934e-06, - "loss": 0.1378, + "epoch": 1.5501270827449873, + "grad_norm": 3.530201912993151, + "learning_rate": 9.945119969036245e-06, + "loss": 0.5783, "step": 10978 }, { - "epoch": 2.302159781924932, - "grad_norm": 3.8866238907160073, - "learning_rate": 2.706471530502811e-06, - "loss": 0.1437, + "epoch": 1.5502682857949732, + "grad_norm": 3.0408780303815632, + "learning_rate": 9.943595539679984e-06, + "loss": 0.5223, "step": 10979 }, { - "epoch": 2.302369469490459, - "grad_norm": 5.205591909304769, - "learning_rate": 2.7049229085655103e-06, - "loss": 0.1572, + "epoch": 1.550409488844959, + "grad_norm": 3.308609321562329, + "learning_rate": 9.942071111634538e-06, + "loss": 0.4528, "step": 10980 }, { - "epoch": 2.3025791570559866, - "grad_norm": 3.361814001701675, - "learning_rate": 2.7033746605135627e-06, - "loss": 0.1192, + "epoch": 1.5505506918949448, + "grad_norm": 3.2772635392157863, + "learning_rate": 9.940546684935337e-06, + "loss": 0.5212, "step": 10981 }, { - "epoch": 2.302788844621514, - "grad_norm": 3.8730219995543362, - "learning_rate": 2.7018267864263215e-06, - "loss": 0.158, + "epoch": 1.5506918949449306, + "grad_norm": 4.672306016172381, + "learning_rate": 9.939022259617805e-06, + "loss": 0.7533, "step": 10982 }, { - "epoch": 2.302998532187041, - "grad_norm": 4.309931794256158, - "learning_rate": 2.7002792863831162e-06, - "loss": 0.162, + "epoch": 1.5508330979949165, + "grad_norm": 3.6361901857489656, + "learning_rate": 9.937497835717367e-06, + "loss": 0.586, "step": 10983 }, { - "epoch": 2.3032082197525687, - "grad_norm": 3.2208454119156444, - "learning_rate": 2.6987321604632545e-06, - "loss": 0.1356, + "epoch": 1.5509743010449024, + "grad_norm": 3.648131963915785, + "learning_rate": 9.93597341326945e-06, + "loss": 0.5248, "step": 10984 }, { - "epoch": 2.303417907318096, - "grad_norm": 4.452595458522969, - "learning_rate": 2.6971854087460368e-06, - "loss": 0.1527, + "epoch": 1.5511155040948883, + "grad_norm": 3.258287010760036, + "learning_rate": 9.934448992309486e-06, + "loss": 0.5992, "step": 10985 }, { - "epoch": 2.3036275948836233, - "grad_norm": 3.173403539647856, - "learning_rate": 2.6956390313107293e-06, - "loss": 0.0895, + "epoch": 1.5512567071448742, + "grad_norm": 3.6078168521211778, + "learning_rate": 9.9329245728729e-06, + "loss": 0.5795, "step": 10986 }, { - "epoch": 2.303837282449151, - "grad_norm": 3.537001705979659, - "learning_rate": 2.694093028236594e-06, - "loss": 0.1237, + "epoch": 1.55139791019486, + "grad_norm": 3.9607538668599616, + "learning_rate": 9.931400154995116e-06, + "loss": 0.6294, "step": 10987 }, { - "epoch": 2.304046970014678, - "grad_norm": 4.038299730515624, - "learning_rate": 2.692547399602862e-06, - "loss": 0.1532, + "epoch": 1.551539113244846, + "grad_norm": 4.090456691991101, + "learning_rate": 9.929875738711565e-06, + "loss": 0.5916, "step": 10988 }, { - "epoch": 2.3042566575802055, - "grad_norm": 4.397544426520461, - "learning_rate": 2.691002145488747e-06, - "loss": 0.1726, + "epoch": 1.5516803162948318, + "grad_norm": 4.110139101600692, + "learning_rate": 9.928351324057672e-06, + "loss": 0.5982, "step": 10989 }, { - "epoch": 2.304466345145733, - "grad_norm": 3.750266184027141, - "learning_rate": 2.6894572659734485e-06, - "loss": 0.1446, + "epoch": 1.5518215193448177, + "grad_norm": 3.206756963829273, + "learning_rate": 9.926826911068862e-06, + "loss": 0.4756, "step": 10990 }, { - "epoch": 2.30467603271126, - "grad_norm": 3.485296211674269, - "learning_rate": 2.687912761136151e-06, - "loss": 0.1179, + "epoch": 1.5519627223948036, + "grad_norm": 2.8032570694957144, + "learning_rate": 9.925302499780564e-06, + "loss": 0.4413, "step": 10991 }, { - "epoch": 2.3048857202767876, - "grad_norm": 3.920530570125744, - "learning_rate": 2.686368631056001e-06, - "loss": 0.1492, + "epoch": 1.5521039254447895, + "grad_norm": 4.168918041833502, + "learning_rate": 9.9237780902282e-06, + "loss": 0.6589, "step": 10992 }, { - "epoch": 2.305095407842315, - "grad_norm": 3.3642528232155446, - "learning_rate": 2.684824875812143e-06, - "loss": 0.0898, + "epoch": 1.5522451284947754, + "grad_norm": 3.636840634575385, + "learning_rate": 9.922253682447203e-06, + "loss": 0.5696, "step": 10993 }, { - "epoch": 2.3053050954078422, - "grad_norm": 5.588708162419901, - "learning_rate": 2.6832814954837004e-06, - "loss": 0.187, + "epoch": 1.5523863315447612, + "grad_norm": 3.2346667093673176, + "learning_rate": 9.920729276472996e-06, + "loss": 0.5692, "step": 10994 }, { - "epoch": 2.3055147829733698, - "grad_norm": 3.4597144806253626, - "learning_rate": 2.6817384901497713e-06, - "loss": 0.134, + "epoch": 1.5525275345947471, + "grad_norm": 3.5787025885546284, + "learning_rate": 9.919204872341007e-06, + "loss": 0.6254, "step": 10995 }, { - "epoch": 2.3057244705388973, - "grad_norm": 4.627972614820141, - "learning_rate": 2.680195859889435e-06, - "loss": 0.1528, + "epoch": 1.552668737644733, + "grad_norm": 3.4985745493997555, + "learning_rate": 9.91768047008666e-06, + "loss": 0.5508, "step": 10996 }, { - "epoch": 2.3059341581044244, - "grad_norm": 4.2301864415351975, - "learning_rate": 2.678653604781759e-06, - "loss": 0.1558, + "epoch": 1.552809940694719, + "grad_norm": 4.029881776238102, + "learning_rate": 9.916156069745385e-06, + "loss": 0.601, "step": 10997 }, { - "epoch": 2.306143845669952, - "grad_norm": 4.677111590584339, - "learning_rate": 2.6771117249057822e-06, - "loss": 0.1444, + "epoch": 1.5529511437447048, + "grad_norm": 3.3420150765935457, + "learning_rate": 9.91463167135261e-06, + "loss": 0.5619, "step": 10998 }, { - "epoch": 2.306353533235479, - "grad_norm": 5.284685089921044, - "learning_rate": 2.675570220340532e-06, - "loss": 0.1605, + "epoch": 1.5530923467946907, + "grad_norm": 2.9321403450381474, + "learning_rate": 9.913107274943755e-06, + "loss": 0.4683, "step": 10999 }, { - "epoch": 2.3065632208010065, - "grad_norm": 7.46246771698627, - "learning_rate": 2.6740290911650125e-06, - "loss": 0.2058, + "epoch": 1.5532335498446765, + "grad_norm": 3.7731773904722172, + "learning_rate": 9.91158288055425e-06, + "loss": 0.6206, "step": 11000 }, { - "epoch": 2.306772908366534, - "grad_norm": 3.2256380773378823, - "learning_rate": 2.6724883374582044e-06, - "loss": 0.1147, + "epoch": 1.5533747528946624, + "grad_norm": 3.235745532869052, + "learning_rate": 9.910058488219518e-06, + "loss": 0.5215, "step": 11001 }, { - "epoch": 2.306982595932061, - "grad_norm": 3.7999741504952276, - "learning_rate": 2.670947959299078e-06, - "loss": 0.1344, + "epoch": 1.5535159559446483, + "grad_norm": 3.8544401915386244, + "learning_rate": 9.90853409797499e-06, + "loss": 0.5897, "step": 11002 }, { - "epoch": 2.3071922834975886, - "grad_norm": 3.4418588787445183, - "learning_rate": 2.669407956766584e-06, - "loss": 0.128, + "epoch": 1.5536571589946342, + "grad_norm": 3.347653121218531, + "learning_rate": 9.90700970985609e-06, + "loss": 0.5605, "step": 11003 }, { - "epoch": 2.3074019710631157, - "grad_norm": 4.012557444196535, - "learning_rate": 2.6678683299396455e-06, - "loss": 0.1492, + "epoch": 1.55379836204462, + "grad_norm": 3.632005605306555, + "learning_rate": 9.905485323898243e-06, + "loss": 0.5682, "step": 11004 }, { - "epoch": 2.3076116586286433, - "grad_norm": 4.008298727172779, - "learning_rate": 2.6663290788971705e-06, - "loss": 0.1357, + "epoch": 1.553939565094606, + "grad_norm": 3.383346874243086, + "learning_rate": 9.903960940136877e-06, + "loss": 0.5214, "step": 11005 }, { - "epoch": 2.307821346194171, - "grad_norm": 5.146556463587217, - "learning_rate": 2.664790203718052e-06, - "loss": 0.1559, + "epoch": 1.5540807681445918, + "grad_norm": 4.325692870211108, + "learning_rate": 9.90243655860742e-06, + "loss": 0.631, "step": 11006 }, { - "epoch": 2.308031033759698, - "grad_norm": 3.6543128631543795, - "learning_rate": 2.6632517044811546e-06, - "loss": 0.157, + "epoch": 1.5542219711945777, + "grad_norm": 4.141956621011428, + "learning_rate": 9.900912179345293e-06, + "loss": 0.5596, "step": 11007 }, { - "epoch": 2.3082407213252254, - "grad_norm": 4.046779356658434, - "learning_rate": 2.661713581265336e-06, - "loss": 0.1323, + "epoch": 1.5543631742445636, + "grad_norm": 4.233159459021565, + "learning_rate": 9.899387802385925e-06, + "loss": 0.7085, "step": 11008 }, { - "epoch": 2.308450408890753, - "grad_norm": 4.505874717886358, - "learning_rate": 2.660175834149423e-06, - "loss": 0.1561, + "epoch": 1.5545043772945495, + "grad_norm": 3.038362143285985, + "learning_rate": 9.897863427764738e-06, + "loss": 0.4693, "step": 11009 }, { - "epoch": 2.30866009645628, - "grad_norm": 4.396419738199487, - "learning_rate": 2.6586384632122264e-06, - "loss": 0.1034, + "epoch": 1.5546455803445354, + "grad_norm": 3.1925020125216217, + "learning_rate": 9.896339055517164e-06, + "loss": 0.5525, "step": 11010 }, { - "epoch": 2.3088697840218075, - "grad_norm": 4.683882187034118, - "learning_rate": 2.6571014685325446e-06, - "loss": 0.1405, + "epoch": 1.5547867833945213, + "grad_norm": 3.4980041751199997, + "learning_rate": 9.894814685678625e-06, + "loss": 0.5529, "step": 11011 }, { - "epoch": 2.309079471587335, - "grad_norm": 4.701119434506378, - "learning_rate": 2.6555648501891473e-06, - "loss": 0.1718, + "epoch": 1.5549279864445071, + "grad_norm": 3.5712936072907557, + "learning_rate": 9.893290318284546e-06, + "loss": 0.5523, "step": 11012 }, { - "epoch": 2.309289159152862, - "grad_norm": 3.2700157587019087, - "learning_rate": 2.654028608260788e-06, - "loss": 0.121, + "epoch": 1.555069189494493, + "grad_norm": 3.3102799881955214, + "learning_rate": 9.891765953370353e-06, + "loss": 0.4521, "step": 11013 }, { - "epoch": 2.3094988467183897, - "grad_norm": 3.900264992861094, - "learning_rate": 2.6524927428262024e-06, - "loss": 0.1355, + "epoch": 1.555210392544479, + "grad_norm": 3.042106438110427, + "learning_rate": 9.890241590971475e-06, + "loss": 0.5406, "step": 11014 }, { - "epoch": 2.309708534283917, - "grad_norm": 4.223028631270602, - "learning_rate": 2.6509572539641115e-06, - "loss": 0.1363, + "epoch": 1.5553515955944648, + "grad_norm": 2.982570164844036, + "learning_rate": 9.888717231123337e-06, + "loss": 0.448, "step": 11015 }, { - "epoch": 2.3099182218494443, - "grad_norm": 5.25396807609399, - "learning_rate": 2.6494221417532064e-06, - "loss": 0.1935, + "epoch": 1.5554927986444507, + "grad_norm": 3.377970794285306, + "learning_rate": 9.88719287386136e-06, + "loss": 0.561, "step": 11016 }, { - "epoch": 2.310127909414972, - "grad_norm": 4.759936659032829, - "learning_rate": 2.6478874062721626e-06, - "loss": 0.1524, + "epoch": 1.5556340016944366, + "grad_norm": 3.782535975722361, + "learning_rate": 9.885668519220971e-06, + "loss": 0.5911, "step": 11017 }, { - "epoch": 2.310337596980499, - "grad_norm": 5.257778133248842, - "learning_rate": 2.6463530475996445e-06, - "loss": 0.1613, + "epoch": 1.5557752047444224, + "grad_norm": 3.7704138366619504, + "learning_rate": 9.884144167237595e-06, + "loss": 0.6215, "step": 11018 }, { - "epoch": 2.3105472845460264, - "grad_norm": 3.673564899557511, - "learning_rate": 2.6448190658142838e-06, - "loss": 0.1091, + "epoch": 1.5559164077944083, + "grad_norm": 4.06741716582298, + "learning_rate": 9.88261981794666e-06, + "loss": 0.6318, "step": 11019 }, { - "epoch": 2.310756972111554, - "grad_norm": 4.246249811284005, - "learning_rate": 2.6432854609947055e-06, - "loss": 0.1723, + "epoch": 1.5560576108443942, + "grad_norm": 4.094535282158565, + "learning_rate": 9.88109547138359e-06, + "loss": 0.5665, "step": 11020 }, { - "epoch": 2.310966659677081, - "grad_norm": 3.662389159895316, - "learning_rate": 2.6417522332195066e-06, - "loss": 0.1432, + "epoch": 1.55619881389438, + "grad_norm": 3.721235366169721, + "learning_rate": 9.87957112758381e-06, + "loss": 0.5356, "step": 11021 }, { - "epoch": 2.3111763472426086, - "grad_norm": 5.169935543833419, - "learning_rate": 2.6402193825672664e-06, - "loss": 0.1807, + "epoch": 1.556340016944366, + "grad_norm": 3.2925217123105597, + "learning_rate": 9.878046786582745e-06, + "loss": 0.5113, "step": 11022 }, { - "epoch": 2.3113860348081356, - "grad_norm": 4.048917475315759, - "learning_rate": 2.6386869091165467e-06, - "loss": 0.1464, + "epoch": 1.5564812199943519, + "grad_norm": 3.566011543151594, + "learning_rate": 9.876522448415822e-06, + "loss": 0.5336, "step": 11023 }, { - "epoch": 2.311595722373663, - "grad_norm": 4.236512843488925, - "learning_rate": 2.6371548129458935e-06, - "loss": 0.1611, + "epoch": 1.5566224230443377, + "grad_norm": 3.776967192416559, + "learning_rate": 9.874998113118462e-06, + "loss": 0.5697, "step": 11024 }, { - "epoch": 2.3118054099391907, - "grad_norm": 4.644487607364284, - "learning_rate": 2.6356230941338256e-06, - "loss": 0.1558, + "epoch": 1.5567636260943236, + "grad_norm": 3.161591556825347, + "learning_rate": 9.87347378072609e-06, + "loss": 0.5177, "step": 11025 }, { - "epoch": 2.312015097504718, - "grad_norm": 4.922419772766671, - "learning_rate": 2.634091752758845e-06, - "loss": 0.1656, + "epoch": 1.5569048291443095, + "grad_norm": 4.156065235815623, + "learning_rate": 9.871949451274137e-06, + "loss": 0.6063, "step": 11026 }, { - "epoch": 2.3122247850702453, - "grad_norm": 4.846652978973002, - "learning_rate": 2.632560788899441e-06, - "loss": 0.1654, + "epoch": 1.5570460321942954, + "grad_norm": 4.047820198423382, + "learning_rate": 9.870425124798019e-06, + "loss": 0.6944, "step": 11027 }, { - "epoch": 2.312434472635773, - "grad_norm": 4.270031500418427, - "learning_rate": 2.631030202634074e-06, - "loss": 0.16, + "epoch": 1.5571872352442813, + "grad_norm": 4.241571837314496, + "learning_rate": 9.868900801333168e-06, + "loss": 0.9151, "step": 11028 }, { - "epoch": 2.3126441602013, - "grad_norm": 3.817901552661584, - "learning_rate": 2.629499994041187e-06, - "loss": 0.1314, + "epoch": 1.5573284382942671, + "grad_norm": 2.6854301111055756, + "learning_rate": 9.867376480915005e-06, + "loss": 0.4321, "step": 11029 }, { - "epoch": 2.3128538477668275, - "grad_norm": 4.346036836048172, - "learning_rate": 2.6279701631992115e-06, - "loss": 0.158, + "epoch": 1.557469641344253, + "grad_norm": 3.962780893883301, + "learning_rate": 9.865852163578952e-06, + "loss": 0.7327, "step": 11030 }, { - "epoch": 2.313063535332355, - "grad_norm": 3.9678589778545326, - "learning_rate": 2.6264407101865485e-06, - "loss": 0.1803, + "epoch": 1.557610844394239, + "grad_norm": 3.2541450940530257, + "learning_rate": 9.864327849360445e-06, + "loss": 0.5365, "step": 11031 }, { - "epoch": 2.313273222897882, - "grad_norm": 5.078822317021803, - "learning_rate": 2.6249116350815917e-06, - "loss": 0.1409, + "epoch": 1.5577520474442248, + "grad_norm": 3.462869494157753, + "learning_rate": 9.862803538294894e-06, + "loss": 0.5682, "step": 11032 }, { - "epoch": 2.3134829104634096, - "grad_norm": 4.330413912235461, - "learning_rate": 2.623382937962704e-06, - "loss": 0.1419, + "epoch": 1.5578932504942107, + "grad_norm": 3.109962561887938, + "learning_rate": 9.861279230417731e-06, + "loss": 0.6087, "step": 11033 }, { - "epoch": 2.313692598028937, - "grad_norm": 4.086239909442471, - "learning_rate": 2.621854618908233e-06, - "loss": 0.1528, + "epoch": 1.5580344535441966, + "grad_norm": 3.110390532331657, + "learning_rate": 9.85975492576438e-06, + "loss": 0.5075, "step": 11034 }, { - "epoch": 2.313902285594464, - "grad_norm": 4.246507720981344, - "learning_rate": 2.6203266779965074e-06, - "loss": 0.1434, + "epoch": 1.5581756565941824, + "grad_norm": 3.1082667953036367, + "learning_rate": 9.85823062437026e-06, + "loss": 0.5156, "step": 11035 }, { - "epoch": 2.3141119731599917, - "grad_norm": 4.498810340479034, - "learning_rate": 2.6187991153058434e-06, - "loss": 0.1534, + "epoch": 1.5583168596441683, + "grad_norm": 3.124045467650272, + "learning_rate": 9.856706326270801e-06, + "loss": 0.4758, "step": 11036 }, { - "epoch": 2.314321660725519, - "grad_norm": 4.189964786486981, - "learning_rate": 2.6172719309145266e-06, - "loss": 0.1422, + "epoch": 1.5584580626941542, + "grad_norm": 3.1760384596278586, + "learning_rate": 9.855182031501426e-06, + "loss": 0.5358, "step": 11037 }, { - "epoch": 2.3145313482910463, - "grad_norm": 4.082382256871633, - "learning_rate": 2.615745124900826e-06, - "loss": 0.1569, + "epoch": 1.55859926574414, + "grad_norm": 3.643211183138645, + "learning_rate": 9.853657740097558e-06, + "loss": 0.6096, "step": 11038 }, { - "epoch": 2.314741035856574, - "grad_norm": 4.252897105437072, - "learning_rate": 2.614218697342994e-06, - "loss": 0.154, + "epoch": 1.558740468794126, + "grad_norm": 3.1600737490830504, + "learning_rate": 9.852133452094617e-06, + "loss": 0.5619, "step": 11039 }, { - "epoch": 2.314950723422101, - "grad_norm": 5.2430805769654265, - "learning_rate": 2.612692648319267e-06, - "loss": 0.1504, + "epoch": 1.5588816718441119, + "grad_norm": 4.742996082884778, + "learning_rate": 9.850609167528038e-06, + "loss": 0.5731, "step": 11040 }, { - "epoch": 2.3151604109876285, - "grad_norm": 4.808348435525007, - "learning_rate": 2.611166977907854e-06, - "loss": 0.176, + "epoch": 1.5590228748940977, + "grad_norm": 4.2004756402495484, + "learning_rate": 9.849084886433234e-06, + "loss": 0.6578, "step": 11041 }, { - "epoch": 2.3153700985531556, - "grad_norm": 4.689170116188598, - "learning_rate": 2.6096416861869467e-06, - "loss": 0.1421, + "epoch": 1.5591640779440836, + "grad_norm": 3.222498054947762, + "learning_rate": 9.847560608845632e-06, + "loss": 0.5417, "step": 11042 }, { - "epoch": 2.315579786118683, - "grad_norm": 3.8015418691963143, - "learning_rate": 2.6081167732347224e-06, - "loss": 0.1157, + "epoch": 1.5593052809940695, + "grad_norm": 3.832533987296883, + "learning_rate": 9.846036334800655e-06, + "loss": 0.7, "step": 11043 }, { - "epoch": 2.3157894736842106, - "grad_norm": 5.230349188215154, - "learning_rate": 2.606592239129332e-06, - "loss": 0.1955, + "epoch": 1.5594464840440554, + "grad_norm": 3.566351945791429, + "learning_rate": 9.844512064333726e-06, + "loss": 0.5696, "step": 11044 }, { - "epoch": 2.3159991612497377, - "grad_norm": 4.7429372302443635, - "learning_rate": 2.605068083948915e-06, - "loss": 0.1572, + "epoch": 1.5595876870940413, + "grad_norm": 3.5406809049398236, + "learning_rate": 9.842987797480271e-06, + "loss": 0.5348, "step": 11045 }, { - "epoch": 2.3162088488152652, - "grad_norm": 3.8242447164713815, - "learning_rate": 2.603544307771585e-06, - "loss": 0.1138, + "epoch": 1.5597288901440272, + "grad_norm": 3.4329647204462046, + "learning_rate": 9.841463534275712e-06, + "loss": 0.5087, "step": 11046 }, { - "epoch": 2.3164185363807928, - "grad_norm": 4.589727617072916, - "learning_rate": 2.6020209106754356e-06, - "loss": 0.1691, + "epoch": 1.559870093194013, + "grad_norm": 3.3824969857209575, + "learning_rate": 9.83993927475547e-06, + "loss": 0.5507, "step": 11047 }, { - "epoch": 2.31662822394632, - "grad_norm": 5.5666875804948575, - "learning_rate": 2.600497892738545e-06, - "loss": 0.1719, + "epoch": 1.560011296243999, + "grad_norm": 3.352357444763647, + "learning_rate": 9.838415018954976e-06, + "loss": 0.5445, "step": 11048 }, { - "epoch": 2.3168379115118474, - "grad_norm": 4.79842266198786, - "learning_rate": 2.598975254038977e-06, - "loss": 0.1616, + "epoch": 1.5601524992939848, + "grad_norm": 4.207332250125473, + "learning_rate": 9.836890766909641e-06, + "loss": 0.7608, "step": 11049 }, { - "epoch": 2.317047599077375, - "grad_norm": 3.721189596633461, - "learning_rate": 2.5974529946547565e-06, - "loss": 0.1276, + "epoch": 1.5602937023439707, + "grad_norm": 3.593039251468266, + "learning_rate": 9.835366518654897e-06, + "loss": 0.5754, "step": 11050 }, { - "epoch": 2.317257286642902, - "grad_norm": 4.310452703709921, - "learning_rate": 2.595931114663911e-06, - "loss": 0.1516, + "epoch": 1.5604349053939566, + "grad_norm": 4.3556913021754795, + "learning_rate": 9.833842274226163e-06, + "loss": 0.6453, "step": 11051 }, { - "epoch": 2.3174669742084295, - "grad_norm": 4.800998941984439, - "learning_rate": 2.59440961414444e-06, - "loss": 0.1432, + "epoch": 1.5605761084439425, + "grad_norm": 3.4044251991903027, + "learning_rate": 9.832318033658862e-06, + "loss": 0.6171, "step": 11052 }, { - "epoch": 2.317676661773957, - "grad_norm": 4.176185711657633, - "learning_rate": 2.5928884931743205e-06, - "loss": 0.1413, + "epoch": 1.5607173114939283, + "grad_norm": 4.163850498836935, + "learning_rate": 9.830793796988418e-06, + "loss": 0.6102, "step": 11053 }, { - "epoch": 2.317886349339484, - "grad_norm": 3.9365789723719775, - "learning_rate": 2.5913677518315093e-06, - "loss": 0.0952, + "epoch": 1.5608585145439142, + "grad_norm": 3.5490780090453344, + "learning_rate": 9.829269564250254e-06, + "loss": 0.5892, "step": 11054 }, { - "epoch": 2.3180960369050116, - "grad_norm": 4.347808106915857, - "learning_rate": 2.5898473901939535e-06, - "loss": 0.1899, + "epoch": 1.5609997175939, + "grad_norm": 3.3306301601829427, + "learning_rate": 9.827745335479791e-06, + "loss": 0.5288, "step": 11055 }, { - "epoch": 2.3183057244705387, - "grad_norm": 4.366273051737066, - "learning_rate": 2.588327408339568e-06, - "loss": 0.1405, + "epoch": 1.561140920643886, + "grad_norm": 3.1771786079337283, + "learning_rate": 9.826221110712451e-06, + "loss": 0.4091, "step": 11056 }, { - "epoch": 2.3185154120360663, - "grad_norm": 4.032375106953269, - "learning_rate": 2.586807806346262e-06, - "loss": 0.1594, + "epoch": 1.5612821236938719, + "grad_norm": 4.080740606508169, + "learning_rate": 9.824696889983662e-06, + "loss": 0.635, "step": 11057 }, { - "epoch": 2.318725099601594, - "grad_norm": 3.540466741180801, - "learning_rate": 2.585288584291912e-06, - "loss": 0.1186, + "epoch": 1.5614233267438578, + "grad_norm": 4.050565144137989, + "learning_rate": 9.823172673328837e-06, + "loss": 0.6903, "step": 11058 }, { - "epoch": 2.318934787167121, - "grad_norm": 4.268923900692967, - "learning_rate": 2.583769742254381e-06, - "loss": 0.1781, + "epoch": 1.5615645297938436, + "grad_norm": 3.388371557857401, + "learning_rate": 9.821648460783403e-06, + "loss": 0.453, "step": 11059 }, { - "epoch": 2.3191444747326484, - "grad_norm": 5.8015503744344885, - "learning_rate": 2.5822512803115117e-06, - "loss": 0.1415, + "epoch": 1.5617057328438295, + "grad_norm": 3.3153858697018648, + "learning_rate": 9.820124252382784e-06, + "loss": 0.4818, "step": 11060 }, { - "epoch": 2.3193541622981755, - "grad_norm": 4.612179974168851, - "learning_rate": 2.5807331985411334e-06, - "loss": 0.1745, + "epoch": 1.5618469358938154, + "grad_norm": 3.2259213453138877, + "learning_rate": 9.818600048162396e-06, + "loss": 0.4808, "step": 11061 }, { - "epoch": 2.319563849863703, - "grad_norm": 5.5740521151437425, - "learning_rate": 2.579215497021046e-06, - "loss": 0.2312, + "epoch": 1.5619881389438013, + "grad_norm": 4.604173032987742, + "learning_rate": 9.817075848157666e-06, + "loss": 0.8981, "step": 11062 }, { - "epoch": 2.3197735374292305, - "grad_norm": 5.004602740010987, - "learning_rate": 2.5776981758290332e-06, - "loss": 0.1742, + "epoch": 1.5621293419937872, + "grad_norm": 3.7249339310392604, + "learning_rate": 9.815551652404016e-06, + "loss": 0.731, "step": 11063 }, { - "epoch": 2.3199832249947576, - "grad_norm": 4.339493179625758, - "learning_rate": 2.576181235042865e-06, - "loss": 0.153, + "epoch": 1.562270545043773, + "grad_norm": 4.082670268471065, + "learning_rate": 9.814027460936863e-06, + "loss": 0.6932, "step": 11064 }, { - "epoch": 2.320192912560285, - "grad_norm": 5.4524200210560485, - "learning_rate": 2.5746646747402815e-06, - "loss": 0.1695, + "epoch": 1.562411748093759, + "grad_norm": 2.7667242503987195, + "learning_rate": 9.812503273791638e-06, + "loss": 0.4621, "step": 11065 }, { - "epoch": 2.3204026001258127, - "grad_norm": 5.389359063275125, - "learning_rate": 2.5731484949990147e-06, - "loss": 0.1899, + "epoch": 1.5625529511437448, + "grad_norm": 4.682839506723859, + "learning_rate": 9.81097909100375e-06, + "loss": 0.741, "step": 11066 }, { - "epoch": 2.3206122876913398, - "grad_norm": 4.216134597554432, - "learning_rate": 2.5716326958967684e-06, - "loss": 0.171, + "epoch": 1.5626941541937307, + "grad_norm": 3.928630660299773, + "learning_rate": 9.809454912608628e-06, + "loss": 0.7171, "step": 11067 }, { - "epoch": 2.3208219752568673, - "grad_norm": 4.95813376127907, - "learning_rate": 2.570117277511228e-06, - "loss": 0.1367, + "epoch": 1.5628353572437166, + "grad_norm": 2.9733643039082853, + "learning_rate": 9.807930738641692e-06, + "loss": 0.4513, "step": 11068 }, { - "epoch": 2.321031662822395, - "grad_norm": 3.785872982925218, - "learning_rate": 2.568602239920063e-06, - "loss": 0.0974, + "epoch": 1.5629765602937025, + "grad_norm": 3.304227200806797, + "learning_rate": 9.806406569138362e-06, + "loss": 0.5797, "step": 11069 }, { - "epoch": 2.321241350387922, - "grad_norm": 4.387770741695435, - "learning_rate": 2.567087583200929e-06, - "loss": 0.1548, + "epoch": 1.5631177633436883, + "grad_norm": 3.319060721062751, + "learning_rate": 9.804882404134057e-06, + "loss": 0.6071, "step": 11070 }, { - "epoch": 2.3214510379534494, - "grad_norm": 4.529446370646171, - "learning_rate": 2.5655733074314416e-06, - "loss": 0.1587, + "epoch": 1.5632589663936742, + "grad_norm": 3.5603658602722246, + "learning_rate": 9.803358243664203e-06, + "loss": 0.6179, "step": 11071 }, { - "epoch": 2.321660725518977, - "grad_norm": 5.179315681511126, - "learning_rate": 2.564059412689217e-06, - "loss": 0.152, + "epoch": 1.5634001694436601, + "grad_norm": 3.108087480974047, + "learning_rate": 9.801834087764219e-06, + "loss": 0.4617, "step": 11072 }, { - "epoch": 2.321870413084504, - "grad_norm": 4.050453092733685, - "learning_rate": 2.562545899051847e-06, - "loss": 0.1407, + "epoch": 1.563541372493646, + "grad_norm": 3.4097711734676683, + "learning_rate": 9.800309936469523e-06, + "loss": 0.5444, "step": 11073 }, { - "epoch": 2.3220801006500316, - "grad_norm": 5.113077770356172, - "learning_rate": 2.5610327665968992e-06, - "loss": 0.1685, + "epoch": 1.5636825755436319, + "grad_norm": 4.846372969974859, + "learning_rate": 9.798785789815545e-06, + "loss": 0.62, "step": 11074 }, { - "epoch": 2.3222897882155586, - "grad_norm": 4.20621855664333, - "learning_rate": 2.559520015401922e-06, - "loss": 0.1179, + "epoch": 1.5638237785936178, + "grad_norm": 2.989650454762499, + "learning_rate": 9.797261647837692e-06, + "loss": 0.5, "step": 11075 }, { - "epoch": 2.322499475781086, - "grad_norm": 5.291327580309708, - "learning_rate": 2.558007645544451e-06, - "loss": 0.1675, + "epoch": 1.5639649816436036, + "grad_norm": 3.918781523490743, + "learning_rate": 9.79573751057139e-06, + "loss": 0.6029, "step": 11076 }, { - "epoch": 2.3227091633466137, - "grad_norm": 4.2376471343947575, - "learning_rate": 2.556495657101994e-06, - "loss": 0.1519, + "epoch": 1.5641061846935895, + "grad_norm": 3.8081648351780832, + "learning_rate": 9.794213378052064e-06, + "loss": 0.6746, "step": 11077 }, { - "epoch": 2.322918850912141, - "grad_norm": 4.901123009345599, - "learning_rate": 2.5549840501520463e-06, - "loss": 0.1523, + "epoch": 1.5642473877435754, + "grad_norm": 3.3897958126792673, + "learning_rate": 9.792689250315126e-06, + "loss": 0.5509, "step": 11078 }, { - "epoch": 2.3231285384776683, - "grad_norm": 3.6203270072629885, - "learning_rate": 2.5534728247720785e-06, - "loss": 0.1021, + "epoch": 1.5643885907935613, + "grad_norm": 3.05927618414925, + "learning_rate": 9.791165127396003e-06, + "loss": 0.5128, "step": 11079 }, { - "epoch": 2.3233382260431954, - "grad_norm": 4.173326123180353, - "learning_rate": 2.5519619810395424e-06, - "loss": 0.1583, + "epoch": 1.5645297938435472, + "grad_norm": 3.1117284038679647, + "learning_rate": 9.789641009330113e-06, + "loss": 0.4977, "step": 11080 }, { - "epoch": 2.323547913608723, - "grad_norm": 4.573039223975011, - "learning_rate": 2.5504515190318713e-06, - "loss": 0.1425, + "epoch": 1.564670996893533, + "grad_norm": 3.167531385226866, + "learning_rate": 9.788116896152873e-06, + "loss": 0.5963, "step": 11081 }, { - "epoch": 2.3237576011742505, - "grad_norm": 3.5597321829338986, - "learning_rate": 2.5489414388264844e-06, - "loss": 0.112, + "epoch": 1.564812199943519, + "grad_norm": 3.502735151067711, + "learning_rate": 9.786592787899707e-06, + "loss": 0.5845, "step": 11082 }, { - "epoch": 2.3239672887397775, - "grad_norm": 3.8548556232107187, - "learning_rate": 2.547431740500771e-06, - "loss": 0.1264, + "epoch": 1.5649534029935046, + "grad_norm": 2.8889574180827404, + "learning_rate": 9.78506868460603e-06, + "loss": 0.4726, "step": 11083 }, { - "epoch": 2.324176976305305, - "grad_norm": 4.258845044629492, - "learning_rate": 2.5459224241321046e-06, - "loss": 0.1179, + "epoch": 1.5650946060434905, + "grad_norm": 3.2941953116583957, + "learning_rate": 9.783544586307266e-06, + "loss": 0.5367, "step": 11084 }, { - "epoch": 2.3243866638708326, - "grad_norm": 3.9706791643882413, - "learning_rate": 2.5444134897978456e-06, - "loss": 0.1248, + "epoch": 1.5652358090934764, + "grad_norm": 2.9708544972067394, + "learning_rate": 9.78202049303883e-06, + "loss": 0.4422, "step": 11085 }, { - "epoch": 2.3245963514363597, - "grad_norm": 4.442730687173412, - "learning_rate": 2.5429049375753224e-06, - "loss": 0.1654, + "epoch": 1.5653770121434623, + "grad_norm": 3.052020590486898, + "learning_rate": 9.780496404836146e-06, + "loss": 0.4874, "step": 11086 }, { - "epoch": 2.324806039001887, - "grad_norm": 4.493148525002239, - "learning_rate": 2.541396767541858e-06, - "loss": 0.1472, + "epoch": 1.5655182151934481, + "grad_norm": 3.280149907349971, + "learning_rate": 9.778972321734627e-06, + "loss": 0.5076, "step": 11087 }, { - "epoch": 2.3250157265674147, - "grad_norm": 3.459810062691372, - "learning_rate": 2.5398889797747463e-06, - "loss": 0.126, + "epoch": 1.565659418243434, + "grad_norm": 3.868921145375212, + "learning_rate": 9.7774482437697e-06, + "loss": 0.6917, "step": 11088 }, { - "epoch": 2.325225414132942, - "grad_norm": 4.158404352130959, - "learning_rate": 2.5383815743512595e-06, - "loss": 0.1586, + "epoch": 1.56580062129342, + "grad_norm": 3.358341175569998, + "learning_rate": 9.775924170976778e-06, + "loss": 0.5377, "step": 11089 }, { - "epoch": 2.3254351016984693, - "grad_norm": 5.193345078114732, - "learning_rate": 2.5368745513486615e-06, - "loss": 0.1493, + "epoch": 1.5659418243434058, + "grad_norm": 3.75340899765257, + "learning_rate": 9.77440010339128e-06, + "loss": 0.5067, "step": 11090 }, { - "epoch": 2.325644789263997, - "grad_norm": 4.2413299177364125, - "learning_rate": 2.5353679108441875e-06, - "loss": 0.0999, + "epoch": 1.5660830273933917, + "grad_norm": 4.191111917903983, + "learning_rate": 9.772876041048633e-06, + "loss": 0.5928, "step": 11091 }, { - "epoch": 2.325854476829524, - "grad_norm": 4.201990052105541, - "learning_rate": 2.5338616529150516e-06, - "loss": 0.1558, + "epoch": 1.5662242304433776, + "grad_norm": 3.914241081686757, + "learning_rate": 9.771351983984241e-06, + "loss": 0.6284, "step": 11092 }, { - "epoch": 2.3260641643950515, - "grad_norm": 4.343438918223055, - "learning_rate": 2.5323557776384554e-06, - "loss": 0.1697, + "epoch": 1.5663654334933634, + "grad_norm": 3.647270443355041, + "learning_rate": 9.769827932233533e-06, + "loss": 0.5623, "step": 11093 }, { - "epoch": 2.3262738519605786, - "grad_norm": 4.844150750056045, - "learning_rate": 2.5308502850915805e-06, - "loss": 0.1671, + "epoch": 1.5665066365433493, + "grad_norm": 2.874033044616517, + "learning_rate": 9.768303885831924e-06, + "loss": 0.4243, "step": 11094 }, { - "epoch": 2.326483539526106, - "grad_norm": 4.1837221109464675, - "learning_rate": 2.5293451753515828e-06, - "loss": 0.1594, + "epoch": 1.5666478395933352, + "grad_norm": 4.0481746159929, + "learning_rate": 9.766779844814833e-06, + "loss": 0.5955, "step": 11095 }, { - "epoch": 2.3266932270916336, - "grad_norm": 4.346740812430216, - "learning_rate": 2.527840448495599e-06, - "loss": 0.1703, + "epoch": 1.566789042643321, + "grad_norm": 3.725418615696769, + "learning_rate": 9.765255809217676e-06, + "loss": 0.6349, "step": 11096 }, { - "epoch": 2.3269029146571607, - "grad_norm": 6.610197328359388, - "learning_rate": 2.526336104600755e-06, - "loss": 0.1604, + "epoch": 1.566930245693307, + "grad_norm": 3.630195606492286, + "learning_rate": 9.763731779075874e-06, + "loss": 0.5769, "step": 11097 }, { - "epoch": 2.3271126022226882, - "grad_norm": 4.509159284862679, - "learning_rate": 2.5248321437441457e-06, - "loss": 0.185, + "epoch": 1.5670714487432928, + "grad_norm": 3.964939976753005, + "learning_rate": 9.762207754424845e-06, + "loss": 0.6642, "step": 11098 }, { - "epoch": 2.3273222897882153, - "grad_norm": 4.822726319811454, - "learning_rate": 2.523328566002857e-06, - "loss": 0.1701, + "epoch": 1.5672126517932787, + "grad_norm": 3.107827388510048, + "learning_rate": 9.760683735300006e-06, + "loss": 0.4407, "step": 11099 }, { - "epoch": 2.327531977353743, - "grad_norm": 3.2019779670230304, - "learning_rate": 2.521825371453943e-06, - "loss": 0.108, + "epoch": 1.5673538548432646, + "grad_norm": 3.1076273487729758, + "learning_rate": 9.759159721736772e-06, + "loss": 0.5139, "step": 11100 }, { - "epoch": 2.3277416649192704, - "grad_norm": 4.492662695131915, - "learning_rate": 2.520322560174453e-06, - "loss": 0.1752, + "epoch": 1.5674950578932505, + "grad_norm": 3.432455941254076, + "learning_rate": 9.757635713770558e-06, + "loss": 0.6043, "step": 11101 }, { - "epoch": 2.3279513524847975, - "grad_norm": 5.370252472622765, - "learning_rate": 2.5188201322414007e-06, - "loss": 0.1927, + "epoch": 1.5676362609432364, + "grad_norm": 3.4368827045144537, + "learning_rate": 9.75611171143679e-06, + "loss": 0.5597, "step": 11102 }, { - "epoch": 2.328161040050325, - "grad_norm": 5.014775960985252, - "learning_rate": 2.517318087731796e-06, - "loss": 0.1244, + "epoch": 1.5677774639932223, + "grad_norm": 3.8622686984056083, + "learning_rate": 9.75458771477088e-06, + "loss": 0.6613, "step": 11103 }, { - "epoch": 2.3283707276158525, - "grad_norm": 5.986217836138817, - "learning_rate": 2.515816426722617e-06, - "loss": 0.1733, + "epoch": 1.5679186670432081, + "grad_norm": 4.320354146873605, + "learning_rate": 9.753063723808243e-06, + "loss": 0.6806, "step": 11104 }, { - "epoch": 2.3285804151813796, - "grad_norm": 3.3451895055116108, - "learning_rate": 2.5143151492908237e-06, - "loss": 0.1026, + "epoch": 1.568059870093194, + "grad_norm": 4.237852916650013, + "learning_rate": 9.7515397385843e-06, + "loss": 0.611, "step": 11105 }, { - "epoch": 2.328790102746907, - "grad_norm": 3.5468633559378255, - "learning_rate": 2.5128142555133617e-06, - "loss": 0.1366, + "epoch": 1.56820107314318, + "grad_norm": 3.7235754967647066, + "learning_rate": 9.750015759134466e-06, + "loss": 0.4292, "step": 11106 }, { - "epoch": 2.3289997903124346, - "grad_norm": 4.89089769554914, - "learning_rate": 2.511313745467162e-06, - "loss": 0.1815, + "epoch": 1.5683422761931658, + "grad_norm": 3.7837946213250353, + "learning_rate": 9.748491785494156e-06, + "loss": 0.6491, "step": 11107 }, { - "epoch": 2.3292094778779617, - "grad_norm": 3.619281794223496, - "learning_rate": 2.5098136192291144e-06, - "loss": 0.1118, + "epoch": 1.5684834792431517, + "grad_norm": 4.106287930409589, + "learning_rate": 9.746967817698795e-06, + "loss": 0.5968, "step": 11108 }, { - "epoch": 2.3294191654434893, - "grad_norm": 4.641922129663621, - "learning_rate": 2.5083138768761104e-06, - "loss": 0.1807, + "epoch": 1.5686246822931376, + "grad_norm": 3.5460591945646063, + "learning_rate": 9.745443855783786e-06, + "loss": 0.5926, "step": 11109 }, { - "epoch": 2.329628853009017, - "grad_norm": 4.639482209149072, - "learning_rate": 2.506814518485017e-06, - "loss": 0.1421, + "epoch": 1.5687658853431234, + "grad_norm": 3.242944652347343, + "learning_rate": 9.743919899784555e-06, + "loss": 0.5926, "step": 11110 }, { - "epoch": 2.329838540574544, - "grad_norm": 3.360695971494131, - "learning_rate": 2.505315544132676e-06, - "loss": 0.1165, + "epoch": 1.5689070883931093, + "grad_norm": 3.4633127855378625, + "learning_rate": 9.742395949736513e-06, + "loss": 0.5304, "step": 11111 }, { - "epoch": 2.3300482281400714, - "grad_norm": 3.451296025969975, - "learning_rate": 2.503816953895909e-06, - "loss": 0.1051, + "epoch": 1.5690482914430952, + "grad_norm": 3.409286833019807, + "learning_rate": 9.740872005675079e-06, + "loss": 0.5793, "step": 11112 }, { - "epoch": 2.3302579157055985, - "grad_norm": 4.65163746412364, - "learning_rate": 2.5023187478515265e-06, - "loss": 0.1229, + "epoch": 1.569189494493081, + "grad_norm": 3.2526394310511493, + "learning_rate": 9.739348067635667e-06, + "loss": 0.512, "step": 11113 }, { - "epoch": 2.330467603271126, - "grad_norm": 4.441611796238513, - "learning_rate": 2.5008209260763106e-06, - "loss": 0.1266, + "epoch": 1.569330697543067, + "grad_norm": 2.6577685046314397, + "learning_rate": 9.737824135653693e-06, + "loss": 0.4477, "step": 11114 }, { - "epoch": 2.3306772908366535, - "grad_norm": 3.674120525448985, - "learning_rate": 2.499323488647032e-06, - "loss": 0.1177, + "epoch": 1.5694719005930529, + "grad_norm": 2.947304831597821, + "learning_rate": 9.736300209764574e-06, + "loss": 0.5018, "step": 11115 }, { - "epoch": 2.3308869784021806, - "grad_norm": 4.144654872796137, - "learning_rate": 2.4978264356404326e-06, - "loss": 0.1288, + "epoch": 1.5696131036430387, + "grad_norm": 3.466513350019249, + "learning_rate": 9.734776290003727e-06, + "loss": 0.632, "step": 11116 }, { - "epoch": 2.331096665967708, - "grad_norm": 6.127040321585709, - "learning_rate": 2.4963297671332385e-06, - "loss": 0.135, + "epoch": 1.5697543066930244, + "grad_norm": 3.862780294273257, + "learning_rate": 9.733252376406562e-06, + "loss": 0.5438, "step": 11117 }, { - "epoch": 2.3313063535332352, - "grad_norm": 5.07939243125553, - "learning_rate": 2.4948334832021582e-06, - "loss": 0.1486, + "epoch": 1.5698955097430103, + "grad_norm": 3.6429530295074284, + "learning_rate": 9.731728469008493e-06, + "loss": 0.6274, "step": 11118 }, { - "epoch": 2.3315160410987628, - "grad_norm": 3.7141400132187083, - "learning_rate": 2.4933375839238804e-06, - "loss": 0.1527, + "epoch": 1.5700367127929962, + "grad_norm": 3.677838987890372, + "learning_rate": 9.730204567844941e-06, + "loss": 0.5255, "step": 11119 }, { - "epoch": 2.3317257286642903, - "grad_norm": 4.070240532875839, - "learning_rate": 2.491842069375072e-06, - "loss": 0.1506, + "epoch": 1.570177915842982, + "grad_norm": 3.1903519921937185, + "learning_rate": 9.72868067295132e-06, + "loss": 0.5241, "step": 11120 }, { - "epoch": 2.3319354162298174, - "grad_norm": 4.021320115602753, - "learning_rate": 2.4903469396323777e-06, - "loss": 0.1366, + "epoch": 1.570319118892968, + "grad_norm": 3.137795872426895, + "learning_rate": 9.727156784363038e-06, + "loss": 0.5198, "step": 11121 }, { - "epoch": 2.332145103795345, - "grad_norm": 4.60248002982288, - "learning_rate": 2.4888521947724297e-06, - "loss": 0.1398, + "epoch": 1.5704603219429538, + "grad_norm": 4.029186221763382, + "learning_rate": 9.72563290211552e-06, + "loss": 0.7672, "step": 11122 }, { - "epoch": 2.3323547913608724, - "grad_norm": 4.2073382575084635, - "learning_rate": 2.487357834871831e-06, - "loss": 0.1157, + "epoch": 1.5706015249929397, + "grad_norm": 3.111058921337772, + "learning_rate": 9.724109026244173e-06, + "loss": 0.5642, "step": 11123 }, { - "epoch": 2.3325644789263995, - "grad_norm": 3.923175511126156, - "learning_rate": 2.4858638600071773e-06, - "loss": 0.1194, + "epoch": 1.5707427280429256, + "grad_norm": 3.129992160510293, + "learning_rate": 9.72258515678441e-06, + "loss": 0.5152, "step": 11124 }, { - "epoch": 2.332774166491927, - "grad_norm": 3.806675179879064, - "learning_rate": 2.484370270255032e-06, - "loss": 0.147, + "epoch": 1.5708839310929115, + "grad_norm": 3.605233650657966, + "learning_rate": 9.721061293771657e-06, + "loss": 0.5778, "step": 11125 }, { - "epoch": 2.3329838540574546, - "grad_norm": 4.879890854695098, - "learning_rate": 2.482877065691943e-06, - "loss": 0.1658, + "epoch": 1.5710251341428974, + "grad_norm": 3.163063785445116, + "learning_rate": 9.719537437241311e-06, + "loss": 0.4806, "step": 11126 }, { - "epoch": 2.3331935416229816, - "grad_norm": 4.558795994748064, - "learning_rate": 2.481384246394443e-06, - "loss": 0.1755, + "epoch": 1.5711663371928832, + "grad_norm": 3.6579257866096664, + "learning_rate": 9.718013587228797e-06, + "loss": 0.5214, "step": 11127 }, { - "epoch": 2.333403229188509, - "grad_norm": 5.464966829010082, - "learning_rate": 2.4798918124390457e-06, - "loss": 0.1264, + "epoch": 1.5713075402428691, + "grad_norm": 3.4923877038648308, + "learning_rate": 9.716489743769525e-06, + "loss": 0.5846, "step": 11128 }, { - "epoch": 2.3336129167540367, - "grad_norm": 4.785186881629059, - "learning_rate": 2.4783997639022306e-06, - "loss": 0.173, + "epoch": 1.571448743292855, + "grad_norm": 3.209850086627863, + "learning_rate": 9.714965906898909e-06, + "loss": 0.5516, "step": 11129 }, { - "epoch": 2.333822604319564, - "grad_norm": 5.786103214558417, - "learning_rate": 2.4769081008604734e-06, - "loss": 0.1663, + "epoch": 1.5715899463428409, + "grad_norm": 3.0696041405533574, + "learning_rate": 9.713442076652359e-06, + "loss": 0.4932, "step": 11130 }, { - "epoch": 2.3340322918850913, - "grad_norm": 5.021294785529363, - "learning_rate": 2.4754168233902266e-06, - "loss": 0.1492, + "epoch": 1.5717311493928268, + "grad_norm": 4.3294174459222035, + "learning_rate": 9.711918253065296e-06, + "loss": 0.6063, "step": 11131 }, { - "epoch": 2.3342419794506184, - "grad_norm": 4.458508579126794, - "learning_rate": 2.4739259315679177e-06, - "loss": 0.1361, + "epoch": 1.5718723524428126, + "grad_norm": 3.6518725136188688, + "learning_rate": 9.710394436173126e-06, + "loss": 0.6039, "step": 11132 }, { - "epoch": 2.334451667016146, - "grad_norm": 4.353386412620221, - "learning_rate": 2.4724354254699566e-06, - "loss": 0.1238, + "epoch": 1.5720135554927985, + "grad_norm": 3.3717098400414933, + "learning_rate": 9.708870626011268e-06, + "loss": 0.5964, "step": 11133 }, { - "epoch": 2.3346613545816735, - "grad_norm": 3.670337839950564, - "learning_rate": 2.4709453051727374e-06, - "loss": 0.1155, + "epoch": 1.5721547585427844, + "grad_norm": 3.5199314382898446, + "learning_rate": 9.707346822615127e-06, + "loss": 0.5676, "step": 11134 }, { - "epoch": 2.3348710421472005, - "grad_norm": 3.798832582505462, - "learning_rate": 2.469455570752628e-06, - "loss": 0.115, + "epoch": 1.5722959615927703, + "grad_norm": 3.1102686674681395, + "learning_rate": 9.70582302602012e-06, + "loss": 0.4788, "step": 11135 }, { - "epoch": 2.335080729712728, - "grad_norm": 5.335256265572671, - "learning_rate": 2.4679662222859833e-06, - "loss": 0.1785, + "epoch": 1.5724371646427562, + "grad_norm": 4.099911587272551, + "learning_rate": 9.704299236261658e-06, + "loss": 0.5582, "step": 11136 }, { - "epoch": 2.3352904172782556, - "grad_norm": 3.3346472757433463, - "learning_rate": 2.466477259849135e-06, - "loss": 0.1265, + "epoch": 1.572578367692742, + "grad_norm": 3.7927158025024026, + "learning_rate": 9.702775453375155e-06, + "loss": 0.6746, "step": 11137 }, { - "epoch": 2.3355001048437827, - "grad_norm": 3.2503663314227342, - "learning_rate": 2.46498868351839e-06, - "loss": 0.127, + "epoch": 1.572719570742728, + "grad_norm": 4.5018704852279265, + "learning_rate": 9.701251677396021e-06, + "loss": 0.7546, "step": 11138 }, { - "epoch": 2.33570979240931, - "grad_norm": 3.9354092279268693, - "learning_rate": 2.4635004933700435e-06, - "loss": 0.1445, + "epoch": 1.5728607737927138, + "grad_norm": 3.9411871448001734, + "learning_rate": 9.699727908359671e-06, + "loss": 0.6672, "step": 11139 }, { - "epoch": 2.3359194799748373, - "grad_norm": 4.466760372413868, - "learning_rate": 2.4620126894803707e-06, - "loss": 0.1934, + "epoch": 1.5730019768426997, + "grad_norm": 3.610970430498073, + "learning_rate": 9.698204146301513e-06, + "loss": 0.6281, "step": 11140 }, { - "epoch": 2.336129167540365, - "grad_norm": 4.837972487448911, - "learning_rate": 2.4605252719256214e-06, - "loss": 0.138, + "epoch": 1.5731431798926856, + "grad_norm": 3.3860477921863312, + "learning_rate": 9.696680391256961e-06, + "loss": 0.5119, "step": 11141 }, { - "epoch": 2.3363388551058923, - "grad_norm": 3.6373610076646883, - "learning_rate": 2.4590382407820256e-06, - "loss": 0.1216, + "epoch": 1.5732843829426715, + "grad_norm": 4.062895077140409, + "learning_rate": 9.69515664326143e-06, + "loss": 0.6455, "step": 11142 }, { - "epoch": 2.3365485426714194, - "grad_norm": 3.642740655601929, - "learning_rate": 2.4575515961258023e-06, - "loss": 0.1203, + "epoch": 1.5734255859926574, + "grad_norm": 4.261209456210413, + "learning_rate": 9.69363290235032e-06, + "loss": 0.7464, "step": 11143 }, { - "epoch": 2.336758230236947, - "grad_norm": 5.397866213767218, - "learning_rate": 2.456065338033139e-06, - "loss": 0.165, + "epoch": 1.5735667890426432, + "grad_norm": 3.4530757703896007, + "learning_rate": 9.692109168559051e-06, + "loss": 0.5825, "step": 11144 }, { - "epoch": 2.3369679178024745, - "grad_norm": 5.027257362815043, - "learning_rate": 2.4545794665802138e-06, - "loss": 0.1719, + "epoch": 1.5737079920926291, + "grad_norm": 3.4063931372513676, + "learning_rate": 9.690585441923036e-06, + "loss": 0.5211, "step": 11145 }, { - "epoch": 2.3371776053680016, - "grad_norm": 4.639955384818674, - "learning_rate": 2.4530939818431776e-06, - "loss": 0.143, + "epoch": 1.573849195142615, + "grad_norm": 3.7405373204807475, + "learning_rate": 9.689061722477677e-06, + "loss": 0.5495, "step": 11146 }, { - "epoch": 2.337387292933529, - "grad_norm": 4.718114897204182, - "learning_rate": 2.451608883898162e-06, - "loss": 0.1484, + "epoch": 1.573990398192601, + "grad_norm": 3.5261268228183735, + "learning_rate": 9.68753801025839e-06, + "loss": 0.5739, "step": 11147 }, { - "epoch": 2.3375969804990566, - "grad_norm": 3.320819271206125, - "learning_rate": 2.4501241728212832e-06, - "loss": 0.1325, + "epoch": 1.5741316012425868, + "grad_norm": 3.3466286137571433, + "learning_rate": 9.686014305300585e-06, + "loss": 0.5063, "step": 11148 }, { - "epoch": 2.3378066680645837, - "grad_norm": 4.944410894075514, - "learning_rate": 2.4486398486886398e-06, - "loss": 0.1665, + "epoch": 1.5742728042925727, + "grad_norm": 3.8482521502124176, + "learning_rate": 9.684490607639672e-06, + "loss": 0.6319, "step": 11149 }, { - "epoch": 2.3380163556301112, - "grad_norm": 4.064283298338463, - "learning_rate": 2.4471559115762965e-06, - "loss": 0.1423, + "epoch": 1.5744140073425585, + "grad_norm": 4.18327629071738, + "learning_rate": 9.682966917311065e-06, + "loss": 0.7063, "step": 11150 }, { - "epoch": 2.3382260431956383, - "grad_norm": 3.8582038757927535, - "learning_rate": 2.445672361560313e-06, - "loss": 0.1369, + "epoch": 1.5745552103925444, + "grad_norm": 3.418719044047853, + "learning_rate": 9.681443234350167e-06, + "loss": 0.6457, "step": 11151 }, { - "epoch": 2.338435730761166, - "grad_norm": 2.7660987789652647, - "learning_rate": 2.4441891987167244e-06, - "loss": 0.1054, + "epoch": 1.5746964134425303, + "grad_norm": 3.3037662419981975, + "learning_rate": 9.679919558792388e-06, + "loss": 0.5478, "step": 11152 }, { - "epoch": 2.3386454183266934, - "grad_norm": 3.5308357318584993, - "learning_rate": 2.442706423121546e-06, - "loss": 0.1393, + "epoch": 1.5748376164925162, + "grad_norm": 4.338057534805917, + "learning_rate": 9.678395890673142e-06, + "loss": 0.6632, "step": 11153 }, { - "epoch": 2.3388551058922205, - "grad_norm": 4.888909342710058, - "learning_rate": 2.4412240348507664e-06, - "loss": 0.1786, + "epoch": 1.574978819542502, + "grad_norm": 3.7126404460331313, + "learning_rate": 9.676872230027837e-06, + "loss": 0.5725, "step": 11154 }, { - "epoch": 2.339064793457748, - "grad_norm": 4.792551774103241, - "learning_rate": 2.439742033980369e-06, - "loss": 0.1774, + "epoch": 1.575120022592488, + "grad_norm": 3.606340342762098, + "learning_rate": 9.67534857689188e-06, + "loss": 0.5665, "step": 11155 }, { - "epoch": 2.3392744810232755, - "grad_norm": 4.151774811322938, - "learning_rate": 2.4382604205863027e-06, - "loss": 0.1133, + "epoch": 1.5752612256424738, + "grad_norm": 3.854393756602364, + "learning_rate": 9.673824931300684e-06, + "loss": 0.6855, "step": 11156 }, { - "epoch": 2.3394841685888026, - "grad_norm": 4.069224381708613, - "learning_rate": 2.436779194744506e-06, - "loss": 0.1703, + "epoch": 1.5754024286924597, + "grad_norm": 3.1031294234621134, + "learning_rate": 9.672301293289656e-06, + "loss": 0.5219, "step": 11157 }, { - "epoch": 2.33969385615433, - "grad_norm": 4.240548417416079, - "learning_rate": 2.435298356530895e-06, - "loss": 0.1252, + "epoch": 1.5755436317424456, + "grad_norm": 3.4147551400569562, + "learning_rate": 9.670777662894205e-06, + "loss": 0.5893, "step": 11158 }, { - "epoch": 2.339903543719857, - "grad_norm": 4.653745844938985, - "learning_rate": 2.4338179060213595e-06, - "loss": 0.1577, + "epoch": 1.5756848347924315, + "grad_norm": 3.670778798066699, + "learning_rate": 9.66925404014974e-06, + "loss": 0.561, "step": 11159 }, { - "epoch": 2.3401132312853847, - "grad_norm": 4.531056040998752, - "learning_rate": 2.4323378432917797e-06, - "loss": 0.1476, + "epoch": 1.5758260378424174, + "grad_norm": 3.6823617503628574, + "learning_rate": 9.667730425091666e-06, + "loss": 0.5661, "step": 11160 }, { - "epoch": 2.3403229188509123, - "grad_norm": 5.011904639270193, - "learning_rate": 2.4308581684180143e-06, - "loss": 0.2101, + "epoch": 1.5759672408924033, + "grad_norm": 2.663994636569955, + "learning_rate": 9.666206817755391e-06, + "loss": 0.4268, "step": 11161 }, { - "epoch": 2.3405326064164393, - "grad_norm": 4.190323218889102, - "learning_rate": 2.4293788814758944e-06, - "loss": 0.1607, + "epoch": 1.5761084439423891, + "grad_norm": 3.6624857098084296, + "learning_rate": 9.664683218176328e-06, + "loss": 0.5835, "step": 11162 }, { - "epoch": 2.340742293981967, - "grad_norm": 4.008122063183181, - "learning_rate": 2.4278999825412365e-06, - "loss": 0.1253, + "epoch": 1.576249646992375, + "grad_norm": 3.694992782382996, + "learning_rate": 9.663159626389882e-06, + "loss": 0.7569, "step": 11163 }, { - "epoch": 2.3409519815474944, - "grad_norm": 5.170535160916469, - "learning_rate": 2.426421471689836e-06, - "loss": 0.156, + "epoch": 1.576390850042361, + "grad_norm": 3.294604595579466, + "learning_rate": 9.661636042431459e-06, + "loss": 0.5882, "step": 11164 }, { - "epoch": 2.3411616691130215, - "grad_norm": 4.437653875537352, - "learning_rate": 2.424943348997474e-06, - "loss": 0.1445, + "epoch": 1.5765320530923468, + "grad_norm": 4.027183948534172, + "learning_rate": 9.660112466336471e-06, + "loss": 0.6504, "step": 11165 }, { - "epoch": 2.341371356678549, - "grad_norm": 3.975341883673828, - "learning_rate": 2.4234656145399036e-06, - "loss": 0.1078, + "epoch": 1.5766732561423327, + "grad_norm": 3.42165857973242, + "learning_rate": 9.658588898140322e-06, + "loss": 0.551, "step": 11166 }, { - "epoch": 2.3415810442440765, - "grad_norm": 3.552419209079955, - "learning_rate": 2.4219882683928586e-06, - "loss": 0.122, + "epoch": 1.5768144591923186, + "grad_norm": 3.754473194621875, + "learning_rate": 9.65706533787842e-06, + "loss": 0.5888, "step": 11167 }, { - "epoch": 2.3417907318096036, - "grad_norm": 4.642469040849319, - "learning_rate": 2.420511310632061e-06, - "loss": 0.1708, + "epoch": 1.5769556622423044, + "grad_norm": 4.009656175088606, + "learning_rate": 9.655541785586172e-06, + "loss": 0.6271, "step": 11168 }, { - "epoch": 2.342000419375131, - "grad_norm": 4.069633032479241, - "learning_rate": 2.4190347413332047e-06, - "loss": 0.1548, + "epoch": 1.5770968652922903, + "grad_norm": 3.6150160875133777, + "learning_rate": 9.65401824129898e-06, + "loss": 0.5772, "step": 11169 }, { - "epoch": 2.3422101069406582, - "grad_norm": 4.734112694723988, - "learning_rate": 2.417558560571964e-06, - "loss": 0.1283, + "epoch": 1.5772380683422762, + "grad_norm": 4.253786203302503, + "learning_rate": 9.65249470505226e-06, + "loss": 0.6682, "step": 11170 }, { - "epoch": 2.3424197945061858, - "grad_norm": 4.07517625565213, - "learning_rate": 2.4160827684240008e-06, - "loss": 0.1006, + "epoch": 1.577379271392262, + "grad_norm": 4.259862563088656, + "learning_rate": 9.65097117688141e-06, + "loss": 0.5373, "step": 11171 }, { - "epoch": 2.3426294820717133, - "grad_norm": 3.8345536193451273, - "learning_rate": 2.414607364964946e-06, - "loss": 0.1615, + "epoch": 1.577520474442248, + "grad_norm": 4.146078169472862, + "learning_rate": 9.64944765682184e-06, + "loss": 0.5498, "step": 11172 }, { - "epoch": 2.3428391696372404, - "grad_norm": 4.467830810595402, - "learning_rate": 2.4131323502704242e-06, - "loss": 0.1684, + "epoch": 1.5776616774922338, + "grad_norm": 5.016607537104329, + "learning_rate": 9.647924144908952e-06, + "loss": 0.7466, "step": 11173 }, { - "epoch": 2.343048857202768, - "grad_norm": 3.7720735765431277, - "learning_rate": 2.4116577244160267e-06, - "loss": 0.1208, + "epoch": 1.5778028805422197, + "grad_norm": 3.748765179592891, + "learning_rate": 9.646400641178157e-06, + "loss": 0.5581, "step": 11174 }, { - "epoch": 2.3432585447682954, - "grad_norm": 4.246121967852722, - "learning_rate": 2.41018348747733e-06, - "loss": 0.0985, + "epoch": 1.5779440835922056, + "grad_norm": 3.727112596077149, + "learning_rate": 9.64487714566486e-06, + "loss": 0.5806, "step": 11175 }, { - "epoch": 2.3434682323338225, - "grad_norm": 3.800634027535383, - "learning_rate": 2.4087096395298926e-06, - "loss": 0.1613, + "epoch": 1.5780852866421915, + "grad_norm": 4.187195240927972, + "learning_rate": 9.643353658404466e-06, + "loss": 0.6831, "step": 11176 }, { - "epoch": 2.34367791989935, - "grad_norm": 4.42094483434415, - "learning_rate": 2.4072361806492562e-06, - "loss": 0.1668, + "epoch": 1.5782264896921774, + "grad_norm": 3.2746960458220205, + "learning_rate": 9.641830179432375e-06, + "loss": 0.4916, "step": 11177 }, { - "epoch": 2.343887607464877, - "grad_norm": 5.520072812578669, - "learning_rate": 2.405763110910935e-06, - "loss": 0.1913, + "epoch": 1.5783676927421633, + "grad_norm": 3.758015195895359, + "learning_rate": 9.640306708783997e-06, + "loss": 0.6816, "step": 11178 }, { - "epoch": 2.3440972950304046, - "grad_norm": 4.224236033367063, - "learning_rate": 2.4042904303904223e-06, - "loss": 0.1521, + "epoch": 1.5785088957921491, + "grad_norm": 3.0461198064250086, + "learning_rate": 9.638783246494736e-06, + "loss": 0.523, "step": 11179 }, { - "epoch": 2.344306982595932, - "grad_norm": 3.3060666155429517, - "learning_rate": 2.402818139163202e-06, - "loss": 0.123, + "epoch": 1.578650098842135, + "grad_norm": 3.9453197221816376, + "learning_rate": 9.637259792599997e-06, + "loss": 0.6465, "step": 11180 }, { - "epoch": 2.3445166701614593, - "grad_norm": 3.7952607261739235, - "learning_rate": 2.4013462373047257e-06, - "loss": 0.1432, + "epoch": 1.578791301892121, + "grad_norm": 3.9675220811399545, + "learning_rate": 9.635736347135181e-06, + "loss": 0.5948, "step": 11181 }, { - "epoch": 2.344726357726987, - "grad_norm": 4.221770287683633, - "learning_rate": 2.3998747248904373e-06, - "loss": 0.1426, + "epoch": 1.5789325049421068, + "grad_norm": 3.035051689085451, + "learning_rate": 9.634212910135697e-06, + "loss": 0.4234, "step": 11182 }, { - "epoch": 2.3449360452925143, - "grad_norm": 4.120804713608238, - "learning_rate": 2.3984036019957504e-06, - "loss": 0.137, + "epoch": 1.5790737079920927, + "grad_norm": 4.378491587236108, + "learning_rate": 9.632689481636947e-06, + "loss": 0.6151, "step": 11183 }, { - "epoch": 2.3451457328580414, - "grad_norm": 4.447472762049035, - "learning_rate": 2.3969328686960604e-06, - "loss": 0.1479, + "epoch": 1.5792149110420786, + "grad_norm": 3.547151520873231, + "learning_rate": 9.631166061674338e-06, + "loss": 0.5934, "step": 11184 }, { - "epoch": 2.345355420423569, - "grad_norm": 3.8457602646333062, - "learning_rate": 2.3954625250667484e-06, - "loss": 0.137, + "epoch": 1.5793561140920644, + "grad_norm": 3.399086534804731, + "learning_rate": 9.629642650283269e-06, + "loss": 0.5356, "step": 11185 }, { - "epoch": 2.3455651079890965, - "grad_norm": 4.428127549534315, - "learning_rate": 2.393992571183177e-06, - "loss": 0.1601, + "epoch": 1.5794973171420503, + "grad_norm": 3.712493103682115, + "learning_rate": 9.62811924749914e-06, + "loss": 0.6129, "step": 11186 }, { - "epoch": 2.3457747955546235, - "grad_norm": 3.8481284171334456, - "learning_rate": 2.392523007120672e-06, - "loss": 0.156, + "epoch": 1.5796385201920362, + "grad_norm": 4.265492252758575, + "learning_rate": 9.626595853357363e-06, + "loss": 0.7135, "step": 11187 }, { - "epoch": 2.345984483120151, - "grad_norm": 3.9405610896941536, - "learning_rate": 2.3910538329545575e-06, - "loss": 0.1397, + "epoch": 1.579779723242022, + "grad_norm": 4.6190798214618685, + "learning_rate": 9.625072467893337e-06, + "loss": 0.6967, "step": 11188 }, { - "epoch": 2.346194170685678, - "grad_norm": 4.135945331579041, - "learning_rate": 2.3895850487601345e-06, - "loss": 0.1469, + "epoch": 1.579920926292008, + "grad_norm": 3.1428948457095816, + "learning_rate": 9.623549091142466e-06, + "loss": 0.4825, "step": 11189 }, { - "epoch": 2.3464038582512057, - "grad_norm": 5.304821708474476, - "learning_rate": 2.3881166546126764e-06, - "loss": 0.1515, + "epoch": 1.5800621293419939, + "grad_norm": 3.3399242049377142, + "learning_rate": 9.622025723140147e-06, + "loss": 0.5189, "step": 11190 }, { - "epoch": 2.346613545816733, - "grad_norm": 5.043851230035632, - "learning_rate": 2.3866486505874396e-06, - "loss": 0.1347, + "epoch": 1.5802033323919797, + "grad_norm": 4.0164075983890255, + "learning_rate": 9.620502363921791e-06, + "loss": 0.6711, "step": 11191 }, { - "epoch": 2.3468232333822603, - "grad_norm": 4.604645548923234, - "learning_rate": 2.3851810367596673e-06, - "loss": 0.1493, + "epoch": 1.5803445354419656, + "grad_norm": 3.818673653770434, + "learning_rate": 9.618979013522796e-06, + "loss": 0.7537, "step": 11192 }, { - "epoch": 2.347032920947788, - "grad_norm": 4.567374445157383, - "learning_rate": 2.3837138132045713e-06, - "loss": 0.1587, + "epoch": 1.5804857384919515, + "grad_norm": 4.165124339632033, + "learning_rate": 9.617455671978565e-06, + "loss": 0.7325, "step": 11193 }, { - "epoch": 2.3472426085133153, - "grad_norm": 3.9930927157004685, - "learning_rate": 2.3822469799973547e-06, - "loss": 0.1638, + "epoch": 1.5806269415419374, + "grad_norm": 3.270997018985407, + "learning_rate": 9.615932339324497e-06, + "loss": 0.5079, "step": 11194 }, { - "epoch": 2.3474522960788424, - "grad_norm": 4.196364133885519, - "learning_rate": 2.380780537213192e-06, - "loss": 0.1076, + "epoch": 1.5807681445919233, + "grad_norm": 3.245054925774382, + "learning_rate": 9.614409015595994e-06, + "loss": 0.5557, "step": 11195 }, { - "epoch": 2.34766198364437, - "grad_norm": 5.220985055518679, - "learning_rate": 2.3793144849272396e-06, - "loss": 0.1455, + "epoch": 1.5809093476419092, + "grad_norm": 2.9501316070334944, + "learning_rate": 9.612885700828462e-06, + "loss": 0.4902, "step": 11196 }, { - "epoch": 2.347871671209897, - "grad_norm": 4.994038567378567, - "learning_rate": 2.377848823214638e-06, - "loss": 0.1777, + "epoch": 1.581050550691895, + "grad_norm": 3.8085980136800974, + "learning_rate": 9.611362395057298e-06, + "loss": 0.6442, "step": 11197 }, { - "epoch": 2.3480813587754246, - "grad_norm": 4.550143944029153, - "learning_rate": 2.3763835521505053e-06, - "loss": 0.132, + "epoch": 1.581191753741881, + "grad_norm": 4.012298162017754, + "learning_rate": 9.609839098317902e-06, + "loss": 0.5711, "step": 11198 }, { - "epoch": 2.348291046340952, - "grad_norm": 3.0942931355476104, - "learning_rate": 2.3749186718099392e-06, - "loss": 0.1384, + "epoch": 1.5813329567918668, + "grad_norm": 3.7920180602476417, + "learning_rate": 9.60831581064568e-06, + "loss": 0.5392, "step": 11199 }, { - "epoch": 2.348500733906479, - "grad_norm": 4.216851733223235, - "learning_rate": 2.3734541822680133e-06, - "loss": 0.1654, + "epoch": 1.5814741598418527, + "grad_norm": 3.9036343173034123, + "learning_rate": 9.606792532076028e-06, + "loss": 0.6025, "step": 11200 }, { - "epoch": 2.3487104214720067, - "grad_norm": 4.4839489504743035, - "learning_rate": 2.37199008359979e-06, - "loss": 0.1558, + "epoch": 1.5816153628918386, + "grad_norm": 3.7275581323273106, + "learning_rate": 9.60526926264435e-06, + "loss": 0.5693, "step": 11201 }, { - "epoch": 2.3489201090375342, - "grad_norm": 3.5548296669417856, - "learning_rate": 2.370526375880303e-06, - "loss": 0.1307, + "epoch": 1.5817565659418245, + "grad_norm": 3.0866456604471995, + "learning_rate": 9.603746002386043e-06, + "loss": 0.522, "step": 11202 }, { - "epoch": 2.3491297966030613, - "grad_norm": 3.8556444621193, - "learning_rate": 2.3690630591845755e-06, - "loss": 0.1192, + "epoch": 1.5818977689918103, + "grad_norm": 3.7452058794964427, + "learning_rate": 9.602222751336506e-06, + "loss": 0.607, "step": 11203 }, { - "epoch": 2.349339484168589, - "grad_norm": 4.683060179590733, - "learning_rate": 2.3676001335876e-06, - "loss": 0.1684, + "epoch": 1.5820389720417962, + "grad_norm": 4.289463779249865, + "learning_rate": 9.60069950953114e-06, + "loss": 0.7462, "step": 11204 }, { - "epoch": 2.3495491717341164, - "grad_norm": 4.433197954025936, - "learning_rate": 2.366137599164353e-06, - "loss": 0.1745, + "epoch": 1.582180175091782, + "grad_norm": 3.0533154656328336, + "learning_rate": 9.599176277005346e-06, + "loss": 0.486, "step": 11205 }, { - "epoch": 2.3497588592996435, - "grad_norm": 4.3634272325132395, - "learning_rate": 2.3646754559897944e-06, - "loss": 0.163, + "epoch": 1.582321378141768, + "grad_norm": 3.3330127820416524, + "learning_rate": 9.597653053794521e-06, + "loss": 0.5786, "step": 11206 }, { - "epoch": 2.349968546865171, - "grad_norm": 4.302417164974167, - "learning_rate": 2.363213704138867e-06, - "loss": 0.1817, + "epoch": 1.5824625811917539, + "grad_norm": 3.544312837164597, + "learning_rate": 9.596129839934066e-06, + "loss": 0.6925, "step": 11207 }, { - "epoch": 2.350178234430698, - "grad_norm": 4.821051137584733, - "learning_rate": 2.3617523436864774e-06, - "loss": 0.1674, + "epoch": 1.5826037842417398, + "grad_norm": 3.226260401780507, + "learning_rate": 9.59460663545938e-06, + "loss": 0.5123, "step": 11208 }, { - "epoch": 2.3503879219962256, - "grad_norm": 4.574793610296586, - "learning_rate": 2.3602913747075274e-06, - "loss": 0.1422, + "epoch": 1.5827449872917256, + "grad_norm": 3.335073771056366, + "learning_rate": 9.593083440405863e-06, + "loss": 0.5729, "step": 11209 }, { - "epoch": 2.350597609561753, - "grad_norm": 4.706867981686905, - "learning_rate": 2.3588307972768976e-06, - "loss": 0.1545, + "epoch": 1.5828861903417115, + "grad_norm": 4.17714860613498, + "learning_rate": 9.591560254808909e-06, + "loss": 0.6956, "step": 11210 }, { - "epoch": 2.35080729712728, - "grad_norm": 3.9910241863390024, - "learning_rate": 2.357370611469443e-06, - "loss": 0.1336, + "epoch": 1.5830273933916974, + "grad_norm": 3.277945067769901, + "learning_rate": 9.590037078703919e-06, + "loss": 0.5296, "step": 11211 }, { - "epoch": 2.3510169846928077, - "grad_norm": 5.7962172419756826, - "learning_rate": 2.3559108173599974e-06, - "loss": 0.1698, + "epoch": 1.5831685964416833, + "grad_norm": 3.522995047203345, + "learning_rate": 9.588513912126286e-06, + "loss": 0.5386, "step": 11212 }, { - "epoch": 2.3512266722583353, - "grad_norm": 4.603743608511823, - "learning_rate": 2.354451415023382e-06, - "loss": 0.1416, + "epoch": 1.5833097994916692, + "grad_norm": 3.2949595741317563, + "learning_rate": 9.586990755111416e-06, + "loss": 0.525, "step": 11213 }, { - "epoch": 2.3514363598238623, - "grad_norm": 6.474164427921448, - "learning_rate": 2.352992404534391e-06, - "loss": 0.1804, + "epoch": 1.583451002541655, + "grad_norm": 3.0444153711657393, + "learning_rate": 9.585467607694702e-06, + "loss": 0.5005, "step": 11214 }, { - "epoch": 2.35164604738939, - "grad_norm": 5.325426079370344, - "learning_rate": 2.3515337859678032e-06, - "loss": 0.1954, + "epoch": 1.583592205591641, + "grad_norm": 3.630987814798, + "learning_rate": 9.583944469911541e-06, + "loss": 0.5716, "step": 11215 }, { - "epoch": 2.351855734954917, - "grad_norm": 4.0750923977894065, - "learning_rate": 2.350075559398376e-06, - "loss": 0.1405, + "epoch": 1.5837334086416268, + "grad_norm": 3.4885849727715206, + "learning_rate": 9.58242134179733e-06, + "loss": 0.6652, "step": 11216 }, { - "epoch": 2.3520654225204445, - "grad_norm": 3.957539865346227, - "learning_rate": 2.3486177249008413e-06, - "loss": 0.1328, + "epoch": 1.5838746116916127, + "grad_norm": 3.919528878245229, + "learning_rate": 9.580898223387468e-06, + "loss": 0.5567, "step": 11217 }, { - "epoch": 2.352275110085972, - "grad_norm": 5.002165358277466, - "learning_rate": 2.3471602825499186e-06, - "loss": 0.156, + "epoch": 1.5840158147415986, + "grad_norm": 2.9326633353070504, + "learning_rate": 9.579375114717352e-06, + "loss": 0.4851, "step": 11218 }, { - "epoch": 2.352484797651499, - "grad_norm": 4.52823497995379, - "learning_rate": 2.3457032324203076e-06, - "loss": 0.1311, + "epoch": 1.5841570177915842, + "grad_norm": 3.623847998141257, + "learning_rate": 9.577852015822376e-06, + "loss": 0.5845, "step": 11219 }, { - "epoch": 2.3526944852170266, - "grad_norm": 4.697879679746434, - "learning_rate": 2.3442465745866814e-06, - "loss": 0.1604, + "epoch": 1.5842982208415701, + "grad_norm": 3.4264844825491347, + "learning_rate": 9.576328926737936e-06, + "loss": 0.5093, "step": 11220 }, { - "epoch": 2.352904172782554, - "grad_norm": 4.166875455636941, - "learning_rate": 2.3427903091236936e-06, - "loss": 0.1494, + "epoch": 1.584439423891556, + "grad_norm": 3.67255249128012, + "learning_rate": 9.574805847499426e-06, + "loss": 0.5804, "step": 11221 }, { - "epoch": 2.3531138603480812, - "grad_norm": 3.0467560653214933, - "learning_rate": 2.341334436105984e-06, - "loss": 0.11, + "epoch": 1.584580626941542, + "grad_norm": 3.6172447286762335, + "learning_rate": 9.573282778142246e-06, + "loss": 0.5891, "step": 11222 }, { - "epoch": 2.3533235479136088, - "grad_norm": 4.069252138371243, - "learning_rate": 2.3398789556081703e-06, - "loss": 0.1403, + "epoch": 1.5847218299915278, + "grad_norm": 3.9521573932919942, + "learning_rate": 9.571759718701792e-06, + "loss": 0.6431, "step": 11223 }, { - "epoch": 2.3535332354791363, - "grad_norm": 4.864315720825959, - "learning_rate": 2.3384238677048455e-06, - "loss": 0.1382, + "epoch": 1.5848630330415137, + "grad_norm": 4.4825441058194375, + "learning_rate": 9.570236669213454e-06, + "loss": 0.6971, "step": 11224 }, { - "epoch": 2.3537429230446634, - "grad_norm": 3.7488576398269746, - "learning_rate": 2.3369691724705834e-06, - "loss": 0.1221, + "epoch": 1.5850042360914995, + "grad_norm": 3.854757549437913, + "learning_rate": 9.568713629712632e-06, + "loss": 0.5865, "step": 11225 }, { - "epoch": 2.353952610610191, - "grad_norm": 5.645603803680687, - "learning_rate": 2.3355148699799447e-06, - "loss": 0.1716, + "epoch": 1.5851454391414854, + "grad_norm": 3.060590796833434, + "learning_rate": 9.567190600234722e-06, + "loss": 0.4982, "step": 11226 }, { - "epoch": 2.3541622981757184, - "grad_norm": 4.515834626835535, - "learning_rate": 2.3340609603074595e-06, - "loss": 0.1656, + "epoch": 1.5852866421914713, + "grad_norm": 2.9079771007440933, + "learning_rate": 9.565667580815114e-06, + "loss": 0.4857, "step": 11227 }, { - "epoch": 2.3543719857412455, - "grad_norm": 3.9994883561642482, - "learning_rate": 2.332607443527649e-06, - "loss": 0.144, + "epoch": 1.5854278452414572, + "grad_norm": 2.9279652359640287, + "learning_rate": 9.564144571489202e-06, + "loss": 0.4696, "step": 11228 }, { - "epoch": 2.354581673306773, - "grad_norm": 4.501584322690949, - "learning_rate": 2.3311543197150044e-06, - "loss": 0.1381, + "epoch": 1.585569048291443, + "grad_norm": 3.4162371607903106, + "learning_rate": 9.562621572292381e-06, + "loss": 0.4991, "step": 11229 }, { - "epoch": 2.3547913608723, - "grad_norm": 4.768313099543709, - "learning_rate": 2.329701588944e-06, - "loss": 0.145, + "epoch": 1.585710251341429, + "grad_norm": 3.542799640516143, + "learning_rate": 9.561098583260047e-06, + "loss": 0.5903, "step": 11230 }, { - "epoch": 2.3550010484378276, - "grad_norm": 4.840034553262808, - "learning_rate": 2.3282492512890954e-06, - "loss": 0.1607, + "epoch": 1.5858514543914148, + "grad_norm": 3.5202184455330525, + "learning_rate": 9.559575604427594e-06, + "loss": 0.5898, "step": 11231 }, { - "epoch": 2.355210736003355, - "grad_norm": 4.142409711055538, - "learning_rate": 2.326797306824723e-06, - "loss": 0.1531, + "epoch": 1.5859926574414007, + "grad_norm": 3.476806909504271, + "learning_rate": 9.558052635830413e-06, + "loss": 0.5919, "step": 11232 }, { - "epoch": 2.3554204235688823, - "grad_norm": 3.6331544244758955, - "learning_rate": 2.3253457556252933e-06, - "loss": 0.095, + "epoch": 1.5861338604913866, + "grad_norm": 3.615478786416845, + "learning_rate": 9.556529677503896e-06, + "loss": 0.6464, "step": 11233 }, { - "epoch": 2.35563011113441, - "grad_norm": 3.8257343232881804, - "learning_rate": 2.3238945977652063e-06, - "loss": 0.1574, + "epoch": 1.5862750635413725, + "grad_norm": 3.59967894584123, + "learning_rate": 9.555006729483438e-06, + "loss": 0.5725, "step": 11234 }, { - "epoch": 2.355839798699937, - "grad_norm": 3.918103285416472, - "learning_rate": 2.322443833318836e-06, - "loss": 0.1478, + "epoch": 1.5864162665913584, + "grad_norm": 3.640724900589293, + "learning_rate": 9.553483791804435e-06, + "loss": 0.703, "step": 11235 }, { - "epoch": 2.3560494862654644, - "grad_norm": 4.350963230021017, - "learning_rate": 2.320993462360536e-06, - "loss": 0.17, + "epoch": 1.5865574696413443, + "grad_norm": 3.5607970765438646, + "learning_rate": 9.551960864502275e-06, + "loss": 0.5689, "step": 11236 }, { - "epoch": 2.356259173830992, - "grad_norm": 5.2636941171505285, - "learning_rate": 2.319543484964636e-06, - "loss": 0.1694, + "epoch": 1.5866986726913301, + "grad_norm": 3.898890531768452, + "learning_rate": 9.550437947612349e-06, + "loss": 0.6552, "step": 11237 }, { - "epoch": 2.356468861396519, - "grad_norm": 4.685358765623728, - "learning_rate": 2.3180939012054573e-06, - "loss": 0.1665, + "epoch": 1.586839875741316, + "grad_norm": 2.670820209539343, + "learning_rate": 9.548915041170049e-06, + "loss": 0.4229, "step": 11238 }, { - "epoch": 2.3566785489620465, - "grad_norm": 5.163522010701617, - "learning_rate": 2.316644711157285e-06, - "loss": 0.1799, + "epoch": 1.586981078791302, + "grad_norm": 2.964087986383132, + "learning_rate": 9.54739214521077e-06, + "loss": 0.4412, "step": 11239 }, { - "epoch": 2.356888236527574, - "grad_norm": 4.394498321197905, - "learning_rate": 2.3151959148944025e-06, - "loss": 0.1558, + "epoch": 1.5871222818412878, + "grad_norm": 3.7599311535691875, + "learning_rate": 9.545869259769904e-06, + "loss": 0.6676, "step": 11240 }, { - "epoch": 2.357097924093101, - "grad_norm": 4.408834672970853, - "learning_rate": 2.3137475124910567e-06, - "loss": 0.1504, + "epoch": 1.5872634848912737, + "grad_norm": 3.676183150198646, + "learning_rate": 9.544346384882837e-06, + "loss": 0.6329, "step": 11241 }, { - "epoch": 2.3573076116586287, - "grad_norm": 3.5871876794783613, - "learning_rate": 2.31229950402148e-06, - "loss": 0.1275, + "epoch": 1.5874046879412596, + "grad_norm": 3.3132097751079783, + "learning_rate": 9.542823520584968e-06, + "loss": 0.5407, "step": 11242 }, { - "epoch": 2.357517299224156, - "grad_norm": 4.579123733696252, - "learning_rate": 2.3108518895598863e-06, - "loss": 0.1319, + "epoch": 1.5875458909912454, + "grad_norm": 3.534256889945538, + "learning_rate": 9.541300666911682e-06, + "loss": 0.5544, "step": 11243 }, { - "epoch": 2.3577269867896833, - "grad_norm": 4.871814528670959, - "learning_rate": 2.309404669180473e-06, - "loss": 0.1557, + "epoch": 1.5876870940412313, + "grad_norm": 4.137219170842502, + "learning_rate": 9.539777823898368e-06, + "loss": 0.5389, "step": 11244 }, { - "epoch": 2.357936674355211, - "grad_norm": 5.278381770408729, - "learning_rate": 2.307957842957409e-06, - "loss": 0.1701, + "epoch": 1.5878282970912172, + "grad_norm": 3.853430190032524, + "learning_rate": 9.538254991580421e-06, + "loss": 0.6068, "step": 11245 }, { - "epoch": 2.3581463619207383, - "grad_norm": 4.1075139751892396, - "learning_rate": 2.3065114109648446e-06, - "loss": 0.14, + "epoch": 1.587969500141203, + "grad_norm": 3.4239930765006727, + "learning_rate": 9.536732169993225e-06, + "loss": 0.5557, "step": 11246 }, { - "epoch": 2.3583560494862654, - "grad_norm": 4.896966515416556, - "learning_rate": 2.3050653732769156e-06, - "loss": 0.1332, + "epoch": 1.588110703191189, + "grad_norm": 3.5258178039665267, + "learning_rate": 9.535209359172176e-06, + "loss": 0.5205, "step": 11247 }, { - "epoch": 2.358565737051793, - "grad_norm": 4.665470239518378, - "learning_rate": 2.303619729967733e-06, - "loss": 0.1679, + "epoch": 1.5882519062411748, + "grad_norm": 3.3719082604687114, + "learning_rate": 9.53368655915266e-06, + "loss": 0.6185, "step": 11248 }, { - "epoch": 2.35877542461732, - "grad_norm": 4.364236556757041, - "learning_rate": 2.3021744811113842e-06, - "loss": 0.1875, + "epoch": 1.5883931092911607, + "grad_norm": 3.3092244330069644, + "learning_rate": 9.532163769970068e-06, + "loss": 0.5054, "step": 11249 }, { - "epoch": 2.3589851121828476, - "grad_norm": 4.6961183711298, - "learning_rate": 2.300729626781949e-06, - "loss": 0.1535, + "epoch": 1.5885343123411466, + "grad_norm": 3.4176099050286464, + "learning_rate": 9.530640991659785e-06, + "loss": 0.5105, "step": 11250 }, { - "epoch": 2.359194799748375, - "grad_norm": 4.312771944096224, - "learning_rate": 2.29928516705347e-06, - "loss": 0.121, + "epoch": 1.5886755153911325, + "grad_norm": 3.0915700140611584, + "learning_rate": 9.529118224257205e-06, + "loss": 0.551, "step": 11251 }, { - "epoch": 2.359404487313902, - "grad_norm": 4.488213940548144, - "learning_rate": 2.2978411019999857e-06, - "loss": 0.1395, + "epoch": 1.5888167184411184, + "grad_norm": 4.156022780626483, + "learning_rate": 9.527595467797716e-06, + "loss": 0.6111, "step": 11252 }, { - "epoch": 2.3596141748794297, - "grad_norm": 4.599365551675834, - "learning_rate": 2.296397431695504e-06, - "loss": 0.1538, + "epoch": 1.588957921491104, + "grad_norm": 3.5468453458356266, + "learning_rate": 9.526072722316702e-06, + "loss": 0.4939, "step": 11253 }, { - "epoch": 2.359823862444957, - "grad_norm": 4.048907471489787, - "learning_rate": 2.294954156214012e-06, - "loss": 0.1509, + "epoch": 1.58909912454109, + "grad_norm": 4.193239879000057, + "learning_rate": 9.524549987849555e-06, + "loss": 0.5581, "step": 11254 }, { - "epoch": 2.3600335500104843, - "grad_norm": 5.430306725560685, - "learning_rate": 2.293511275629483e-06, - "loss": 0.1932, + "epoch": 1.5892403275910758, + "grad_norm": 5.497588720526756, + "learning_rate": 9.523027264431657e-06, + "loss": 0.7577, "step": 11255 }, { - "epoch": 2.360243237576012, - "grad_norm": 3.900783582179952, - "learning_rate": 2.29206879001587e-06, - "loss": 0.1101, + "epoch": 1.5893815306410617, + "grad_norm": 3.0229178836483577, + "learning_rate": 9.521504552098402e-06, + "loss": 0.5255, "step": 11256 }, { - "epoch": 2.360452925141539, - "grad_norm": 5.1910383344913225, - "learning_rate": 2.2906266994471005e-06, - "loss": 0.1816, + "epoch": 1.5895227336910476, + "grad_norm": 3.4180075130263288, + "learning_rate": 9.519981850885175e-06, + "loss": 0.5485, "step": 11257 }, { - "epoch": 2.3606626127070665, - "grad_norm": 5.132216885670787, - "learning_rate": 2.289185003997081e-06, - "loss": 0.1974, + "epoch": 1.5896639367410335, + "grad_norm": 3.6110206683336545, + "learning_rate": 9.518459160827359e-06, + "loss": 0.5432, "step": 11258 }, { - "epoch": 2.360872300272594, - "grad_norm": 4.284468269150263, - "learning_rate": 2.2877437037397066e-06, - "loss": 0.1422, + "epoch": 1.5898051397910193, + "grad_norm": 3.2566560559147923, + "learning_rate": 9.516936481960346e-06, + "loss": 0.4577, "step": 11259 }, { - "epoch": 2.361081987838121, - "grad_norm": 4.750823194590404, - "learning_rate": 2.2863027987488405e-06, - "loss": 0.1851, + "epoch": 1.5899463428410052, + "grad_norm": 4.519051168853164, + "learning_rate": 9.515413814319524e-06, + "loss": 0.8019, "step": 11260 }, { - "epoch": 2.3612916754036486, - "grad_norm": 4.335870240201018, - "learning_rate": 2.2848622890983387e-06, - "loss": 0.1262, + "epoch": 1.590087545890991, + "grad_norm": 2.4722158494120205, + "learning_rate": 9.513891157940275e-06, + "loss": 0.3847, "step": 11261 }, { - "epoch": 2.361501362969176, - "grad_norm": 4.441702641374946, - "learning_rate": 2.2834221748620244e-06, - "loss": 0.1463, + "epoch": 1.590228748940977, + "grad_norm": 4.504250911409081, + "learning_rate": 9.512368512857983e-06, + "loss": 0.7845, "step": 11262 }, { - "epoch": 2.361711050534703, - "grad_norm": 3.829497424626141, - "learning_rate": 2.281982456113705e-06, - "loss": 0.1641, + "epoch": 1.5903699519909629, + "grad_norm": 3.269899112068281, + "learning_rate": 9.510845879108039e-06, + "loss": 0.5187, "step": 11263 }, { - "epoch": 2.3619207381002307, - "grad_norm": 5.0208891224968575, - "learning_rate": 2.280543132927172e-06, - "loss": 0.202, + "epoch": 1.5905111550409488, + "grad_norm": 3.0655292570020545, + "learning_rate": 9.50932325672582e-06, + "loss": 0.5643, "step": 11264 }, { - "epoch": 2.3621304256657583, - "grad_norm": 4.47880916261504, - "learning_rate": 2.279104205376197e-06, - "loss": 0.149, + "epoch": 1.5906523580909346, + "grad_norm": 3.405556567171546, + "learning_rate": 9.50780064574672e-06, + "loss": 0.6112, "step": 11265 }, { - "epoch": 2.3623401132312853, - "grad_norm": 4.964393521400657, - "learning_rate": 2.277665673534516e-06, - "loss": 0.1506, + "epoch": 1.5907935611409205, + "grad_norm": 3.8013483328947495, + "learning_rate": 9.506278046206122e-06, + "loss": 0.5363, "step": 11266 }, { - "epoch": 2.362549800796813, - "grad_norm": 5.27619897170765, - "learning_rate": 2.2762275374758645e-06, - "loss": 0.2112, + "epoch": 1.5909347641909064, + "grad_norm": 3.3801577086628796, + "learning_rate": 9.504755458139406e-06, + "loss": 0.5288, "step": 11267 }, { - "epoch": 2.36275948836234, - "grad_norm": 4.231354239254277, - "learning_rate": 2.2747897972739495e-06, - "loss": 0.1351, + "epoch": 1.5910759672408923, + "grad_norm": 3.3056317004240108, + "learning_rate": 9.50323288158196e-06, + "loss": 0.5937, "step": 11268 }, { - "epoch": 2.3629691759278675, - "grad_norm": 3.6769278303354516, - "learning_rate": 2.2733524530024576e-06, - "loss": 0.1308, + "epoch": 1.5912171702908782, + "grad_norm": 3.237177588336155, + "learning_rate": 9.50171031656917e-06, + "loss": 0.4068, "step": 11269 }, { - "epoch": 2.363178863493395, - "grad_norm": 3.721202586159344, - "learning_rate": 2.2719155047350494e-06, - "loss": 0.1404, + "epoch": 1.591358373340864, + "grad_norm": 3.112666842772039, + "learning_rate": 9.500187763136416e-06, + "loss": 0.4845, "step": 11270 }, { - "epoch": 2.363388551058922, - "grad_norm": 4.362660945352736, - "learning_rate": 2.2704789525453787e-06, - "loss": 0.1441, + "epoch": 1.59149957639085, + "grad_norm": 3.8532064532825747, + "learning_rate": 9.498665221319083e-06, + "loss": 0.6531, "step": 11271 }, { - "epoch": 2.3635982386244496, - "grad_norm": 4.048020554720356, - "learning_rate": 2.269042796507065e-06, - "loss": 0.1562, + "epoch": 1.5916407794408358, + "grad_norm": 3.4707956218516527, + "learning_rate": 9.49714269115255e-06, + "loss": 0.575, "step": 11272 }, { - "epoch": 2.3638079261899767, - "grad_norm": 3.9024374849643366, - "learning_rate": 2.2676070366937198e-06, - "loss": 0.1307, + "epoch": 1.5917819824908217, + "grad_norm": 2.6317156917970794, + "learning_rate": 9.495620172672205e-06, + "loss": 0.4932, "step": 11273 }, { - "epoch": 2.3640176137555042, - "grad_norm": 4.294012118742287, - "learning_rate": 2.266171673178924e-06, - "loss": 0.1134, + "epoch": 1.5919231855408076, + "grad_norm": 3.196866364480932, + "learning_rate": 9.494097665913432e-06, + "loss": 0.584, "step": 11274 }, { - "epoch": 2.3642273013210318, - "grad_norm": 3.539284346271426, - "learning_rate": 2.2647367060362425e-06, - "loss": 0.1316, + "epoch": 1.5920643885907935, + "grad_norm": 3.954481929356727, + "learning_rate": 9.492575170911609e-06, + "loss": 0.6039, "step": 11275 }, { - "epoch": 2.364436988886559, - "grad_norm": 4.67855629688292, - "learning_rate": 2.263302135339219e-06, - "loss": 0.1788, + "epoch": 1.5922055916407793, + "grad_norm": 4.709897176294105, + "learning_rate": 9.491052687702118e-06, + "loss": 0.6215, "step": 11276 }, { - "epoch": 2.3646466764520864, - "grad_norm": 3.8126406372524966, - "learning_rate": 2.2618679611613835e-06, - "loss": 0.1435, + "epoch": 1.5923467946907652, + "grad_norm": 3.5664540669062403, + "learning_rate": 9.489530216320348e-06, + "loss": 0.6339, "step": 11277 }, { - "epoch": 2.364856364017614, - "grad_norm": 4.489008585505364, - "learning_rate": 2.2604341835762365e-06, - "loss": 0.14, + "epoch": 1.5924879977407511, + "grad_norm": 3.1019887139590883, + "learning_rate": 9.488007756801672e-06, + "loss": 0.524, "step": 11278 }, { - "epoch": 2.365066051583141, - "grad_norm": 4.168373813091158, - "learning_rate": 2.2590008026572574e-06, - "loss": 0.1752, + "epoch": 1.592629200790737, + "grad_norm": 3.2178717963217918, + "learning_rate": 9.486485309181475e-06, + "loss": 0.5405, "step": 11279 }, { - "epoch": 2.3652757391486685, - "grad_norm": 3.59226251558483, - "learning_rate": 2.257567818477917e-06, - "loss": 0.1255, + "epoch": 1.5927704038407229, + "grad_norm": 3.2189293881127554, + "learning_rate": 9.484962873495137e-06, + "loss": 0.5748, "step": 11280 }, { - "epoch": 2.365485426714196, - "grad_norm": 3.7222066569754095, - "learning_rate": 2.2561352311116515e-06, - "loss": 0.1146, + "epoch": 1.5929116068907088, + "grad_norm": 3.5072974984185397, + "learning_rate": 9.483440449778038e-06, + "loss": 0.5385, "step": 11281 }, { - "epoch": 2.365695114279723, - "grad_norm": 3.4824075514027606, - "learning_rate": 2.2547030406318893e-06, - "loss": 0.119, + "epoch": 1.5930528099406946, + "grad_norm": 3.803119612856957, + "learning_rate": 9.481918038065561e-06, + "loss": 0.49, "step": 11282 }, { - "epoch": 2.3659048018452506, - "grad_norm": 5.130048207605561, - "learning_rate": 2.2532712471120265e-06, - "loss": 0.154, + "epoch": 1.5931940129906805, + "grad_norm": 3.115319673748481, + "learning_rate": 9.480395638393087e-06, + "loss": 0.5605, "step": 11283 }, { - "epoch": 2.366114489410778, - "grad_norm": 3.908958754012853, - "learning_rate": 2.2518398506254514e-06, - "loss": 0.148, + "epoch": 1.5933352160406664, + "grad_norm": 3.3315665959347904, + "learning_rate": 9.478873250795991e-06, + "loss": 0.5677, "step": 11284 }, { - "epoch": 2.3663241769763053, - "grad_norm": 3.5358698543550147, - "learning_rate": 2.250408851245519e-06, - "loss": 0.1138, + "epoch": 1.5934764190906523, + "grad_norm": 4.457979646366254, + "learning_rate": 9.477350875309656e-06, + "loss": 0.7286, "step": 11285 }, { - "epoch": 2.366533864541833, - "grad_norm": 4.2586166159511585, - "learning_rate": 2.248978249045578e-06, - "loss": 0.1298, + "epoch": 1.5936176221406382, + "grad_norm": 4.1469190795336415, + "learning_rate": 9.475828511969466e-06, + "loss": 0.7007, "step": 11286 }, { - "epoch": 2.36674355210736, - "grad_norm": 3.5956355255098633, - "learning_rate": 2.247548044098944e-06, - "loss": 0.1169, + "epoch": 1.593758825190624, + "grad_norm": 4.620226721679112, + "learning_rate": 9.47430616081079e-06, + "loss": 0.7525, "step": 11287 }, { - "epoch": 2.3669532396728874, - "grad_norm": 5.123813424369603, - "learning_rate": 2.2461182364789167e-06, - "loss": 0.1749, + "epoch": 1.59390002824061, + "grad_norm": 3.5736974815355422, + "learning_rate": 9.472783821869015e-06, + "loss": 0.5487, "step": 11288 }, { - "epoch": 2.367162927238415, - "grad_norm": 3.8338985477232863, - "learning_rate": 2.2446888262587785e-06, - "loss": 0.1346, + "epoch": 1.5940412312905958, + "grad_norm": 3.5230912502195846, + "learning_rate": 9.471261495179512e-06, + "loss": 0.5829, "step": 11289 }, { - "epoch": 2.367372614803942, - "grad_norm": 6.150829654304728, - "learning_rate": 2.2432598135117934e-06, - "loss": 0.1625, + "epoch": 1.5941824343405817, + "grad_norm": 3.715534105824283, + "learning_rate": 9.469739180777666e-06, + "loss": 0.5941, "step": 11290 }, { - "epoch": 2.3675823023694695, - "grad_norm": 4.407254728450832, - "learning_rate": 2.2418311983111916e-06, - "loss": 0.1243, + "epoch": 1.5943236373905676, + "grad_norm": 4.207111218858554, + "learning_rate": 9.468216878698853e-06, + "loss": 0.5703, "step": 11291 }, { - "epoch": 2.3677919899349966, - "grad_norm": 3.8861953760367918, - "learning_rate": 2.240402980730196e-06, - "loss": 0.1438, + "epoch": 1.5944648404405535, + "grad_norm": 4.503600661328856, + "learning_rate": 9.466694588978448e-06, + "loss": 0.6384, "step": 11292 }, { - "epoch": 2.368001677500524, - "grad_norm": 4.873657041011035, - "learning_rate": 2.23897516084201e-06, - "loss": 0.1774, + "epoch": 1.5946060434905394, + "grad_norm": 4.424046018806635, + "learning_rate": 9.46517231165183e-06, + "loss": 0.7301, "step": 11293 }, { - "epoch": 2.3682113650660517, - "grad_norm": 5.103229242582608, - "learning_rate": 2.2375477387198064e-06, - "loss": 0.134, + "epoch": 1.5947472465405252, + "grad_norm": 4.249249333654446, + "learning_rate": 9.463650046754383e-06, + "loss": 0.6142, "step": 11294 }, { - "epoch": 2.3684210526315788, - "grad_norm": 4.760257857212763, - "learning_rate": 2.2361207144367412e-06, - "loss": 0.1749, + "epoch": 1.5948884495905111, + "grad_norm": 3.4305991884933817, + "learning_rate": 9.462127794321471e-06, + "loss": 0.6643, "step": 11295 }, { - "epoch": 2.3686307401971063, - "grad_norm": 3.2985139816363027, - "learning_rate": 2.2346940880659594e-06, - "loss": 0.1215, + "epoch": 1.595029652640497, + "grad_norm": 4.318204878908951, + "learning_rate": 9.460605554388479e-06, + "loss": 0.8148, "step": 11296 }, { - "epoch": 2.368840427762634, - "grad_norm": 4.871932870780834, - "learning_rate": 2.2332678596805713e-06, - "loss": 0.1469, + "epoch": 1.595170855690483, + "grad_norm": 4.91535074562223, + "learning_rate": 9.45908332699078e-06, + "loss": 0.7256, "step": 11297 }, { - "epoch": 2.369050115328161, - "grad_norm": 4.815596081549712, - "learning_rate": 2.231842029353678e-06, - "loss": 0.1399, + "epoch": 1.5953120587404688, + "grad_norm": 3.469379108464661, + "learning_rate": 9.45756111216375e-06, + "loss": 0.6175, "step": 11298 }, { - "epoch": 2.3692598028936884, - "grad_norm": 4.761103157052505, - "learning_rate": 2.2304165971583535e-06, - "loss": 0.1538, + "epoch": 1.5954532617904547, + "grad_norm": 3.7556653844580588, + "learning_rate": 9.456038909942766e-06, + "loss": 0.7179, "step": 11299 }, { - "epoch": 2.369469490459216, - "grad_norm": 4.404011998087831, - "learning_rate": 2.2289915631676517e-06, - "loss": 0.1286, + "epoch": 1.5955944648404405, + "grad_norm": 3.409670120788086, + "learning_rate": 9.454516720363203e-06, + "loss": 0.5579, "step": 11300 }, { - "epoch": 2.369679178024743, - "grad_norm": 4.4000847308442745, - "learning_rate": 2.2275669274546107e-06, - "loss": 0.1421, + "epoch": 1.5957356678904264, + "grad_norm": 3.7682275718258795, + "learning_rate": 9.452994543460435e-06, + "loss": 0.6658, "step": 11301 }, { - "epoch": 2.3698888655902706, - "grad_norm": 4.448380977190623, - "learning_rate": 2.2261426900922476e-06, - "loss": 0.1549, + "epoch": 1.5958768709404123, + "grad_norm": 3.678026855501289, + "learning_rate": 9.45147237926984e-06, + "loss": 0.5511, "step": 11302 }, { - "epoch": 2.370098553155798, - "grad_norm": 5.318164023679161, - "learning_rate": 2.2247188511535556e-06, - "loss": 0.1947, + "epoch": 1.5960180739903982, + "grad_norm": 4.565926952994871, + "learning_rate": 9.449950227826792e-06, + "loss": 0.7009, "step": 11303 }, { - "epoch": 2.370308240721325, - "grad_norm": 4.039420609352076, - "learning_rate": 2.2232954107115047e-06, - "loss": 0.1657, + "epoch": 1.596159277040384, + "grad_norm": 3.4482186951381606, + "learning_rate": 9.44842808916666e-06, + "loss": 0.6643, "step": 11304 }, { - "epoch": 2.3705179282868527, - "grad_norm": 3.8977114846128815, - "learning_rate": 2.2218723688390543e-06, - "loss": 0.1346, + "epoch": 1.59630048009037, + "grad_norm": 3.5742434835774683, + "learning_rate": 9.446905963324821e-06, + "loss": 0.4886, "step": 11305 }, { - "epoch": 2.37072761585238, - "grad_norm": 4.013103170084518, - "learning_rate": 2.220449725609133e-06, - "loss": 0.1219, + "epoch": 1.5964416831403558, + "grad_norm": 3.084608618110162, + "learning_rate": 9.445383850336648e-06, + "loss": 0.4382, "step": 11306 }, { - "epoch": 2.3709373034179073, - "grad_norm": 4.46814215352868, - "learning_rate": 2.21902748109466e-06, - "loss": 0.1515, + "epoch": 1.5965828861903417, + "grad_norm": 3.115246318805226, + "learning_rate": 9.443861750237515e-06, + "loss": 0.4847, "step": 11307 }, { - "epoch": 2.371146990983435, - "grad_norm": 4.465558093998286, - "learning_rate": 2.217605635368523e-06, - "loss": 0.1346, + "epoch": 1.5967240892403276, + "grad_norm": 4.418877262295845, + "learning_rate": 9.442339663062795e-06, + "loss": 0.7691, "step": 11308 }, { - "epoch": 2.371356678548962, - "grad_norm": 3.177579802145799, - "learning_rate": 2.216184188503592e-06, - "loss": 0.1074, + "epoch": 1.5968652922903135, + "grad_norm": 3.8277079376677214, + "learning_rate": 9.44081758884786e-06, + "loss": 0.6911, "step": 11309 }, { - "epoch": 2.3715663661144895, - "grad_norm": 5.203711256515378, - "learning_rate": 2.2147631405727244e-06, - "loss": 0.1604, + "epoch": 1.5970064953402994, + "grad_norm": 3.758596591826772, + "learning_rate": 9.439295527628083e-06, + "loss": 0.6841, "step": 11310 }, { - "epoch": 2.3717760536800165, - "grad_norm": 5.374258829077727, - "learning_rate": 2.21334249164875e-06, - "loss": 0.1637, + "epoch": 1.5971476983902853, + "grad_norm": 3.740000724715808, + "learning_rate": 9.437773479438838e-06, + "loss": 0.558, "step": 11311 }, { - "epoch": 2.371985741245544, - "grad_norm": 4.0002657928439564, - "learning_rate": 2.211922241804474e-06, - "loss": 0.1753, + "epoch": 1.5972889014402711, + "grad_norm": 3.4617649812541225, + "learning_rate": 9.436251444315492e-06, + "loss": 0.5579, "step": 11312 }, { - "epoch": 2.3721954288110716, - "grad_norm": 3.7921261806721183, - "learning_rate": 2.210502391112691e-06, - "loss": 0.1189, + "epoch": 1.597430104490257, + "grad_norm": 3.72300367802169, + "learning_rate": 9.43472942229342e-06, + "loss": 0.6297, "step": 11313 }, { - "epoch": 2.3724051163765987, - "grad_norm": 3.4986333616972134, - "learning_rate": 2.2090829396461745e-06, - "loss": 0.1361, + "epoch": 1.597571307540243, + "grad_norm": 3.944197716187972, + "learning_rate": 9.433207413407991e-06, + "loss": 0.5866, "step": 11314 }, { - "epoch": 2.372614803942126, - "grad_norm": 4.660136249176361, - "learning_rate": 2.207663887477669e-06, - "loss": 0.1445, + "epoch": 1.5977125105902288, + "grad_norm": 3.6508968740511154, + "learning_rate": 9.431685417694576e-06, + "loss": 0.6011, "step": 11315 }, { - "epoch": 2.3728244915076537, - "grad_norm": 3.896538656855729, - "learning_rate": 2.2062452346799025e-06, - "loss": 0.1369, + "epoch": 1.5978537136402147, + "grad_norm": 3.170941060307321, + "learning_rate": 9.430163435188549e-06, + "loss": 0.6122, "step": 11316 }, { - "epoch": 2.373034179073181, - "grad_norm": 4.475671952457565, - "learning_rate": 2.204826981325587e-06, - "loss": 0.175, + "epoch": 1.5979949166902006, + "grad_norm": 3.6737008594756433, + "learning_rate": 9.428641465925277e-06, + "loss": 0.6578, "step": 11317 }, { - "epoch": 2.3732438666387083, - "grad_norm": 3.70276803597874, - "learning_rate": 2.2034091274874058e-06, - "loss": 0.0964, + "epoch": 1.5981361197401864, + "grad_norm": 3.44023474313468, + "learning_rate": 9.42711950994013e-06, + "loss": 0.5273, "step": 11318 }, { - "epoch": 2.373453554204236, - "grad_norm": 4.256423600081551, - "learning_rate": 2.2019916732380332e-06, - "loss": 0.1221, + "epoch": 1.5982773227901723, + "grad_norm": 3.093778540179183, + "learning_rate": 9.425597567268477e-06, + "loss": 0.5018, "step": 11319 }, { - "epoch": 2.373663241769763, - "grad_norm": 3.885684905381357, - "learning_rate": 2.200574618650112e-06, - "loss": 0.124, + "epoch": 1.5984185258401582, + "grad_norm": 3.2711134313869255, + "learning_rate": 9.424075637945692e-06, + "loss": 0.5057, "step": 11320 }, { - "epoch": 2.3738729293352905, - "grad_norm": 4.16887216957322, - "learning_rate": 2.1991579637962657e-06, - "loss": 0.1557, + "epoch": 1.598559728890144, + "grad_norm": 3.682204619364394, + "learning_rate": 9.422553722007139e-06, + "loss": 0.6009, "step": 11321 }, { - "epoch": 2.374082616900818, - "grad_norm": 3.5837698356434484, - "learning_rate": 2.1977417087491048e-06, - "loss": 0.1222, + "epoch": 1.59870093194013, + "grad_norm": 3.7237461847377795, + "learning_rate": 9.421031819488188e-06, + "loss": 0.572, "step": 11322 }, { - "epoch": 2.374292304466345, - "grad_norm": 4.848672981230273, - "learning_rate": 2.1963258535812147e-06, - "loss": 0.1543, + "epoch": 1.5988421349901158, + "grad_norm": 3.001718263432985, + "learning_rate": 9.419509930424206e-06, + "loss": 0.5149, "step": 11323 }, { - "epoch": 2.3745019920318726, - "grad_norm": 4.235418771268135, - "learning_rate": 2.194910398365161e-06, - "loss": 0.1363, + "epoch": 1.5989833380401017, + "grad_norm": 3.4167324887857506, + "learning_rate": 9.417988054850561e-06, + "loss": 0.5746, "step": 11324 }, { - "epoch": 2.3747116795973997, - "grad_norm": 4.435760444507633, - "learning_rate": 2.193495343173483e-06, - "loss": 0.1165, + "epoch": 1.5991245410900876, + "grad_norm": 4.187941240630047, + "learning_rate": 9.416466192802626e-06, + "loss": 0.6145, "step": 11325 }, { - "epoch": 2.3749213671629272, - "grad_norm": 4.025518248985951, - "learning_rate": 2.1920806880787114e-06, - "loss": 0.1458, + "epoch": 1.5992657441400735, + "grad_norm": 3.841395202067567, + "learning_rate": 9.414944344315765e-06, + "loss": 0.5932, "step": 11326 }, { - "epoch": 2.3751310547284548, - "grad_norm": 4.485366469424289, - "learning_rate": 2.1906664331533466e-06, - "loss": 0.1266, + "epoch": 1.5994069471900594, + "grad_norm": 3.8614867760777054, + "learning_rate": 9.41342250942534e-06, + "loss": 0.7972, "step": 11327 }, { - "epoch": 2.375340742293982, - "grad_norm": 3.8943925683870435, - "learning_rate": 2.189252578469869e-06, - "loss": 0.1374, + "epoch": 1.5995481502400453, + "grad_norm": 3.337319612051983, + "learning_rate": 9.411900688166731e-06, + "loss": 0.6081, "step": 11328 }, { - "epoch": 2.3755504298595094, - "grad_norm": 3.248312120250793, - "learning_rate": 2.187839124100747e-06, - "loss": 0.1048, + "epoch": 1.5996893532900311, + "grad_norm": 4.3641814300421435, + "learning_rate": 9.410378880575288e-06, + "loss": 0.5812, "step": 11329 }, { - "epoch": 2.3757601174250365, - "grad_norm": 7.013154711627669, - "learning_rate": 2.186426070118416e-06, - "loss": 0.1974, + "epoch": 1.599830556340017, + "grad_norm": 3.835037756548726, + "learning_rate": 9.40885708668639e-06, + "loss": 0.4974, "step": 11330 }, { - "epoch": 2.375969804990564, - "grad_norm": 4.5600487610600355, - "learning_rate": 2.185013416595303e-06, - "loss": 0.1457, + "epoch": 1.599971759390003, + "grad_norm": 4.222625326106407, + "learning_rate": 9.407335306535396e-06, + "loss": 0.6487, "step": 11331 }, { - "epoch": 2.3761794925560915, - "grad_norm": 4.118171270966639, - "learning_rate": 2.183601163603808e-06, - "loss": 0.1589, + "epoch": 1.6001129624399888, + "grad_norm": 3.632956254439459, + "learning_rate": 9.40581354015767e-06, + "loss": 0.5592, "step": 11332 }, { - "epoch": 2.3763891801216186, - "grad_norm": 5.5203675605358615, - "learning_rate": 2.182189311216307e-06, - "loss": 0.2032, + "epoch": 1.6002541654899747, + "grad_norm": 3.8621024996093722, + "learning_rate": 9.404291787588586e-06, + "loss": 0.624, "step": 11333 }, { - "epoch": 2.376598867687146, - "grad_norm": 4.584080484818716, - "learning_rate": 2.180777859505163e-06, - "loss": 0.1354, + "epoch": 1.6003953685399606, + "grad_norm": 3.718051762722175, + "learning_rate": 9.402770048863502e-06, + "loss": 0.5687, "step": 11334 }, { - "epoch": 2.3768085552526736, - "grad_norm": 4.484336487965244, - "learning_rate": 2.1793668085427187e-06, - "loss": 0.1821, + "epoch": 1.6005365715899464, + "grad_norm": 3.6064803951845836, + "learning_rate": 9.401248324017784e-06, + "loss": 0.6267, "step": 11335 }, { - "epoch": 2.3770182428182007, - "grad_norm": 3.824433790449907, - "learning_rate": 2.1779561584012888e-06, - "loss": 0.143, + "epoch": 1.6006777746399323, + "grad_norm": 2.9429865359547476, + "learning_rate": 9.399726613086794e-06, + "loss": 0.3927, "step": 11336 }, { - "epoch": 2.3772279303837283, - "grad_norm": 4.082295600417207, - "learning_rate": 2.1765459091531705e-06, - "loss": 0.1549, + "epoch": 1.6008189776899182, + "grad_norm": 4.2227226964052855, + "learning_rate": 9.398204916105906e-06, + "loss": 0.6092, "step": 11337 }, { - "epoch": 2.377437617949256, - "grad_norm": 5.1659436411971384, - "learning_rate": 2.175136060870646e-06, - "loss": 0.1764, + "epoch": 1.600960180739904, + "grad_norm": 3.36063975570979, + "learning_rate": 9.396683233110468e-06, + "loss": 0.4865, "step": 11338 }, { - "epoch": 2.377647305514783, - "grad_norm": 5.8472588511752726, - "learning_rate": 2.173726613625967e-06, - "loss": 0.1607, + "epoch": 1.60110138378989, + "grad_norm": 4.213171044545515, + "learning_rate": 9.395161564135853e-06, + "loss": 0.6979, "step": 11339 }, { - "epoch": 2.3778569930803104, - "grad_norm": 5.395234774935025, - "learning_rate": 2.1723175674913755e-06, - "loss": 0.1309, + "epoch": 1.6012425868398759, + "grad_norm": 3.635254239973643, + "learning_rate": 9.393639909217423e-06, + "loss": 0.622, "step": 11340 }, { - "epoch": 2.378066680645838, - "grad_norm": 3.802500942698872, - "learning_rate": 2.1709089225390854e-06, - "loss": 0.1156, + "epoch": 1.6013837898898617, + "grad_norm": 3.1645457720667025, + "learning_rate": 9.392118268390538e-06, + "loss": 0.4899, "step": 11341 }, { - "epoch": 2.378276368211365, - "grad_norm": 3.1416349046092207, - "learning_rate": 2.16950067884129e-06, - "loss": 0.0874, + "epoch": 1.6015249929398476, + "grad_norm": 3.9798424444459815, + "learning_rate": 9.390596641690563e-06, + "loss": 0.6185, "step": 11342 }, { - "epoch": 2.3784860557768925, - "grad_norm": 3.7829500043774127, - "learning_rate": 2.1680928364701646e-06, - "loss": 0.1447, + "epoch": 1.6016661959898335, + "grad_norm": 3.2134414640467743, + "learning_rate": 9.38907502915286e-06, + "loss": 0.5834, "step": 11343 }, { - "epoch": 2.3786957433424196, - "grad_norm": 4.429311859701148, - "learning_rate": 2.1666853954978695e-06, - "loss": 0.153, + "epoch": 1.6018073990398194, + "grad_norm": 3.607365884488, + "learning_rate": 9.387553430812786e-06, + "loss": 0.5146, "step": 11344 }, { - "epoch": 2.378905430907947, - "grad_norm": 3.449635741097212, - "learning_rate": 2.1652783559965328e-06, - "loss": 0.1229, + "epoch": 1.6019486020898053, + "grad_norm": 3.391152273275, + "learning_rate": 9.386031846705712e-06, + "loss": 0.57, "step": 11345 }, { - "epoch": 2.3791151184734747, - "grad_norm": 4.167732533730095, - "learning_rate": 2.1638717180382662e-06, - "loss": 0.1148, + "epoch": 1.6020898051397912, + "grad_norm": 4.158000755099663, + "learning_rate": 9.384510276866988e-06, + "loss": 0.7712, "step": 11346 }, { - "epoch": 2.3793248060390018, - "grad_norm": 4.230588248415238, - "learning_rate": 2.162465481695165e-06, - "loss": 0.1476, + "epoch": 1.602231008189777, + "grad_norm": 3.572838696066777, + "learning_rate": 9.382988721331981e-06, + "loss": 0.5174, "step": 11347 }, { - "epoch": 2.3795344936045293, - "grad_norm": 4.543630005415105, - "learning_rate": 2.1610596470393076e-06, - "loss": 0.1189, + "epoch": 1.602372211239763, + "grad_norm": 3.156490738543254, + "learning_rate": 9.381467180136049e-06, + "loss": 0.4081, "step": 11348 }, { - "epoch": 2.3797441811700564, - "grad_norm": 4.115427992809845, - "learning_rate": 2.1596542141427335e-06, - "loss": 0.1374, + "epoch": 1.6025134142897488, + "grad_norm": 3.6446906427129817, + "learning_rate": 9.379945653314553e-06, + "loss": 0.5105, "step": 11349 }, { - "epoch": 2.379953868735584, - "grad_norm": 3.18391371695857, - "learning_rate": 2.15824918307748e-06, - "loss": 0.1039, + "epoch": 1.6026546173397347, + "grad_norm": 3.2513041554379236, + "learning_rate": 9.37842414090285e-06, + "loss": 0.5657, "step": 11350 }, { - "epoch": 2.3801635563011114, - "grad_norm": 5.090937041776184, - "learning_rate": 2.156844553915558e-06, - "loss": 0.1573, + "epoch": 1.6027958203897206, + "grad_norm": 4.003797374091856, + "learning_rate": 9.376902642936303e-06, + "loss": 0.6413, "step": 11351 }, { - "epoch": 2.3803732438666385, - "grad_norm": 6.802472181111065, - "learning_rate": 2.155440326728957e-06, - "loss": 0.2057, + "epoch": 1.6029370234397065, + "grad_norm": 3.3978747099673456, + "learning_rate": 9.375381159450271e-06, + "loss": 0.6529, "step": 11352 }, { - "epoch": 2.380582931432166, - "grad_norm": 5.572264663303104, - "learning_rate": 2.1540365015896427e-06, - "loss": 0.1783, + "epoch": 1.6030782264896923, + "grad_norm": 3.8537107665402095, + "learning_rate": 9.373859690480113e-06, + "loss": 0.6417, "step": 11353 }, { - "epoch": 2.3807926189976936, - "grad_norm": 4.984515869009512, - "learning_rate": 2.152633078569567e-06, - "loss": 0.1828, + "epoch": 1.6032194295396782, + "grad_norm": 4.361763831636235, + "learning_rate": 9.372338236061183e-06, + "loss": 0.6116, "step": 11354 }, { - "epoch": 2.3810023065632206, - "grad_norm": 4.415013843638168, - "learning_rate": 2.151230057740654e-06, - "loss": 0.1307, + "epoch": 1.603360632589664, + "grad_norm": 3.0522082725796325, + "learning_rate": 9.370816796228838e-06, + "loss": 0.5423, "step": 11355 }, { - "epoch": 2.381211994128748, - "grad_norm": 3.950344556764105, - "learning_rate": 2.1498274391748177e-06, - "loss": 0.1437, + "epoch": 1.6035018356396498, + "grad_norm": 3.2973094462014156, + "learning_rate": 9.369295371018442e-06, + "loss": 0.5917, "step": 11356 }, { - "epoch": 2.3814216816942757, - "grad_norm": 4.97114052206461, - "learning_rate": 2.1484252229439397e-06, - "loss": 0.1458, + "epoch": 1.6036430386896356, + "grad_norm": 3.455909044713918, + "learning_rate": 9.36777396046535e-06, + "loss": 0.5263, "step": 11357 }, { - "epoch": 2.381631369259803, - "grad_norm": 4.13293003913876, - "learning_rate": 2.147023409119884e-06, - "loss": 0.1211, + "epoch": 1.6037842417396215, + "grad_norm": 3.7412227522211428, + "learning_rate": 9.366252564604914e-06, + "loss": 0.5576, "step": 11358 }, { - "epoch": 2.3818410568253303, - "grad_norm": 4.21688639233053, - "learning_rate": 2.1456219977744985e-06, - "loss": 0.1561, + "epoch": 1.6039254447896074, + "grad_norm": 4.081817331951791, + "learning_rate": 9.364731183472497e-06, + "loss": 0.689, "step": 11359 }, { - "epoch": 2.382050744390858, - "grad_norm": 5.493665500049063, - "learning_rate": 2.144220988979612e-06, - "loss": 0.1525, + "epoch": 1.6040666478395933, + "grad_norm": 4.124961631371808, + "learning_rate": 9.363209817103455e-06, + "loss": 0.7466, "step": 11360 }, { - "epoch": 2.382260431956385, - "grad_norm": 3.3391852782402243, - "learning_rate": 2.1428203828070247e-06, - "loss": 0.107, + "epoch": 1.6042078508895792, + "grad_norm": 3.825424559044004, + "learning_rate": 9.361688465533139e-06, + "loss": 0.6208, "step": 11361 }, { - "epoch": 2.3824701195219125, - "grad_norm": 4.338641993242093, - "learning_rate": 2.141420179328517e-06, - "loss": 0.1473, + "epoch": 1.604349053939565, + "grad_norm": 3.9542263475677992, + "learning_rate": 9.360167128796913e-06, + "loss": 0.5741, "step": 11362 }, { - "epoch": 2.3826798070874395, - "grad_norm": 3.9598423837772954, - "learning_rate": 2.1400203786158593e-06, - "loss": 0.149, + "epoch": 1.604490256989551, + "grad_norm": 4.432923820622169, + "learning_rate": 9.35864580693012e-06, + "loss": 0.7222, "step": 11363 }, { - "epoch": 2.382889494652967, - "grad_norm": 4.159956637258988, - "learning_rate": 2.1386209807407854e-06, - "loss": 0.1229, + "epoch": 1.6046314600395368, + "grad_norm": 3.8668004930869726, + "learning_rate": 9.357124499968124e-06, + "loss": 0.5995, "step": 11364 }, { - "epoch": 2.3830991822184946, - "grad_norm": 3.997759874769324, - "learning_rate": 2.137221985775024e-06, - "loss": 0.1179, + "epoch": 1.6047726630895227, + "grad_norm": 3.1745374549704177, + "learning_rate": 9.355603207946277e-06, + "loss": 0.5082, "step": 11365 }, { - "epoch": 2.3833088697840217, - "grad_norm": 4.401835478252226, - "learning_rate": 2.135823393790274e-06, - "loss": 0.1627, + "epoch": 1.6049138661395086, + "grad_norm": 3.8753301613352633, + "learning_rate": 9.354081930899935e-06, + "loss": 0.5369, "step": 11366 }, { - "epoch": 2.383518557349549, - "grad_norm": 4.260673265776229, - "learning_rate": 2.1344252048582115e-06, - "loss": 0.1655, + "epoch": 1.6050550691894945, + "grad_norm": 3.4399737349204003, + "learning_rate": 9.352560668864445e-06, + "loss": 0.5899, "step": 11367 }, { - "epoch": 2.3837282449150763, - "grad_norm": 3.782840508042572, - "learning_rate": 2.1330274190504995e-06, - "loss": 0.1469, + "epoch": 1.6051962722394804, + "grad_norm": 4.171357801868226, + "learning_rate": 9.351039421875169e-06, + "loss": 0.6639, "step": 11368 }, { - "epoch": 2.383937932480604, - "grad_norm": 4.261506147440046, - "learning_rate": 2.131630036438783e-06, - "loss": 0.1178, + "epoch": 1.6053374752894662, + "grad_norm": 4.040160376176862, + "learning_rate": 9.349518189967455e-06, + "loss": 0.6296, "step": 11369 }, { - "epoch": 2.3841476200461313, - "grad_norm": 4.673290365816312, - "learning_rate": 2.130233057094668e-06, - "loss": 0.1854, + "epoch": 1.6054786783394521, + "grad_norm": 3.2155511270363504, + "learning_rate": 9.347996973176661e-06, + "loss": 0.6098, "step": 11370 }, { - "epoch": 2.3843573076116584, - "grad_norm": 5.916636588351488, - "learning_rate": 2.128836481089758e-06, - "loss": 0.1211, + "epoch": 1.605619881389438, + "grad_norm": 4.399564604852058, + "learning_rate": 9.346475771538135e-06, + "loss": 0.8228, "step": 11371 }, { - "epoch": 2.384566995177186, - "grad_norm": 4.73529721946947, - "learning_rate": 2.127440308495633e-06, - "loss": 0.1535, + "epoch": 1.605761084439424, + "grad_norm": 3.805017827852861, + "learning_rate": 9.344954585087226e-06, + "loss": 0.6104, "step": 11372 }, { - "epoch": 2.3847766827427135, - "grad_norm": 3.674921725905945, - "learning_rate": 2.126044539383847e-06, - "loss": 0.1276, + "epoch": 1.6059022874894098, + "grad_norm": 3.499509402164501, + "learning_rate": 9.343433413859291e-06, + "loss": 0.5769, "step": 11373 }, { - "epoch": 2.3849863703082406, - "grad_norm": 4.876113723118431, - "learning_rate": 2.1246491738259324e-06, - "loss": 0.1861, + "epoch": 1.6060434905393957, + "grad_norm": 3.4205755272621223, + "learning_rate": 9.34191225788968e-06, + "loss": 0.5757, "step": 11374 }, { - "epoch": 2.385196057873768, - "grad_norm": 4.106452377394905, - "learning_rate": 2.1232542118934085e-06, - "loss": 0.153, + "epoch": 1.6061846935893815, + "grad_norm": 3.279612718323163, + "learning_rate": 9.340391117213742e-06, + "loss": 0.5788, "step": 11375 }, { - "epoch": 2.3854057454392956, - "grad_norm": 5.321737130549812, - "learning_rate": 2.121859653657765e-06, - "loss": 0.1792, + "epoch": 1.6063258966393674, + "grad_norm": 3.809012473573495, + "learning_rate": 9.338869991866833e-06, + "loss": 0.6492, "step": 11376 }, { - "epoch": 2.3856154330048227, - "grad_norm": 4.447496872863494, - "learning_rate": 2.1204654991904818e-06, - "loss": 0.1551, + "epoch": 1.6064670996893533, + "grad_norm": 3.6212294217801646, + "learning_rate": 9.3373488818843e-06, + "loss": 0.6535, "step": 11377 }, { - "epoch": 2.3858251205703502, - "grad_norm": 4.624052651765212, - "learning_rate": 2.1190717485630073e-06, - "loss": 0.1714, + "epoch": 1.6066083027393392, + "grad_norm": 3.1915815566669785, + "learning_rate": 9.335827787301492e-06, + "loss": 0.5438, "step": 11378 }, { - "epoch": 2.3860348081358778, - "grad_norm": 5.343328107944146, - "learning_rate": 2.1176784018467733e-06, - "loss": 0.17, + "epoch": 1.606749505789325, + "grad_norm": 4.033966809319184, + "learning_rate": 9.33430670815376e-06, + "loss": 0.6405, "step": 11379 }, { - "epoch": 2.386244495701405, - "grad_norm": 4.307459477668443, - "learning_rate": 2.116285459113191e-06, - "loss": 0.1401, + "epoch": 1.606890708839311, + "grad_norm": 3.1291980312090106, + "learning_rate": 9.332785644476452e-06, + "loss": 0.4917, "step": 11380 }, { - "epoch": 2.3864541832669324, - "grad_norm": 4.082646392258286, - "learning_rate": 2.1148929204336555e-06, - "loss": 0.1741, + "epoch": 1.6070319118892968, + "grad_norm": 3.249701665199538, + "learning_rate": 9.331264596304916e-06, + "loss": 0.5861, "step": 11381 }, { - "epoch": 2.3866638708324595, - "grad_norm": 4.264783186472907, - "learning_rate": 2.1135007858795344e-06, - "loss": 0.1255, + "epoch": 1.6071731149392827, + "grad_norm": 3.447666218549116, + "learning_rate": 9.329743563674505e-06, + "loss": 0.583, "step": 11382 }, { - "epoch": 2.386873558397987, - "grad_norm": 4.501729745039637, - "learning_rate": 2.1121090555221725e-06, - "loss": 0.1439, + "epoch": 1.6073143179892686, + "grad_norm": 3.409616850094076, + "learning_rate": 9.328222546620561e-06, + "loss": 0.4972, "step": 11383 }, { - "epoch": 2.3870832459635145, - "grad_norm": 3.5376355657527356, - "learning_rate": 2.110717729432906e-06, - "loss": 0.1314, + "epoch": 1.6074555210392545, + "grad_norm": 3.598417721879181, + "learning_rate": 9.326701545178434e-06, + "loss": 0.4803, "step": 11384 }, { - "epoch": 2.3872929335290416, - "grad_norm": 4.4515916047713056, - "learning_rate": 2.1093268076830366e-06, - "loss": 0.1441, + "epoch": 1.6075967240892404, + "grad_norm": 3.4418914789527477, + "learning_rate": 9.325180559383474e-06, + "loss": 0.5268, "step": 11385 }, { - "epoch": 2.387502621094569, - "grad_norm": 5.06269533628943, - "learning_rate": 2.107936290343856e-06, - "loss": 0.1307, + "epoch": 1.6077379271392263, + "grad_norm": 3.69581986894758, + "learning_rate": 9.323659589271028e-06, + "loss": 0.5857, "step": 11386 }, { - "epoch": 2.3877123086600966, - "grad_norm": 4.25874185087666, - "learning_rate": 2.106546177486629e-06, - "loss": 0.1351, + "epoch": 1.6078791301892121, + "grad_norm": 2.8864170161221487, + "learning_rate": 9.322138634876441e-06, + "loss": 0.4341, "step": 11387 }, { - "epoch": 2.3879219962256237, - "grad_norm": 5.065248224682678, - "learning_rate": 2.105156469182599e-06, - "loss": 0.1561, + "epoch": 1.608020333239198, + "grad_norm": 4.247618024445971, + "learning_rate": 9.320617696235058e-06, + "loss": 0.6131, "step": 11388 }, { - "epoch": 2.3881316837911513, - "grad_norm": 5.028934161245213, - "learning_rate": 2.1037671655029947e-06, - "loss": 0.1482, + "epoch": 1.608161536289184, + "grad_norm": 2.9540149238773794, + "learning_rate": 9.319096773382226e-06, + "loss": 0.4708, "step": 11389 }, { - "epoch": 2.3883413713566783, - "grad_norm": 3.787209473207291, - "learning_rate": 2.1023782665190174e-06, - "loss": 0.1209, + "epoch": 1.6083027393391696, + "grad_norm": 3.049907652025075, + "learning_rate": 9.317575866353293e-06, + "loss": 0.4741, "step": 11390 }, { - "epoch": 2.388551058922206, - "grad_norm": 3.940795359785619, - "learning_rate": 2.1009897723018504e-06, - "loss": 0.1384, + "epoch": 1.6084439423891554, + "grad_norm": 3.301496593915166, + "learning_rate": 9.316054975183599e-06, + "loss": 0.5341, "step": 11391 }, { - "epoch": 2.3887607464877334, - "grad_norm": 4.1603141896842875, - "learning_rate": 2.0996016829226575e-06, - "loss": 0.1617, + "epoch": 1.6085851454391413, + "grad_norm": 3.480463194768759, + "learning_rate": 9.314534099908492e-06, + "loss": 0.5381, "step": 11392 }, { - "epoch": 2.3889704340532605, - "grad_norm": 4.1160318716928135, - "learning_rate": 2.098213998452584e-06, - "loss": 0.1543, + "epoch": 1.6087263484891272, + "grad_norm": 3.134143740734321, + "learning_rate": 9.31301324056332e-06, + "loss": 0.5905, "step": 11393 }, { - "epoch": 2.389180121618788, - "grad_norm": 4.73458972853516, - "learning_rate": 2.0968267189627467e-06, - "loss": 0.1601, + "epoch": 1.608867551539113, + "grad_norm": 3.231908372735029, + "learning_rate": 9.31149239718342e-06, + "loss": 0.5409, "step": 11394 }, { - "epoch": 2.3893898091843155, - "grad_norm": 4.622585266131426, - "learning_rate": 2.0954398445242465e-06, - "loss": 0.1662, + "epoch": 1.609008754589099, + "grad_norm": 4.84584740111127, + "learning_rate": 9.309971569804142e-06, + "loss": 0.6683, "step": 11395 }, { - "epoch": 2.3895994967498426, - "grad_norm": 4.5333241776694475, - "learning_rate": 2.094053375208165e-06, - "loss": 0.1284, + "epoch": 1.6091499576390849, + "grad_norm": 4.040890264827595, + "learning_rate": 9.308450758460828e-06, + "loss": 0.7536, "step": 11396 }, { - "epoch": 2.38980918431537, - "grad_norm": 4.900293764428171, - "learning_rate": 2.092667311085559e-06, - "loss": 0.1406, + "epoch": 1.6092911606890707, + "grad_norm": 3.754019868984418, + "learning_rate": 9.306929963188818e-06, + "loss": 0.6148, "step": 11397 }, { - "epoch": 2.3900188718808977, - "grad_norm": 3.5778838554273467, - "learning_rate": 2.091281652227469e-06, - "loss": 0.1105, + "epoch": 1.6094323637390566, + "grad_norm": 3.494932812279176, + "learning_rate": 9.305409184023455e-06, + "loss": 0.5682, "step": 11398 }, { - "epoch": 2.3902285594464248, - "grad_norm": 4.248576431372044, - "learning_rate": 2.0898963987049126e-06, - "loss": 0.13, + "epoch": 1.6095735667890425, + "grad_norm": 2.9425859888938763, + "learning_rate": 9.303888421000082e-06, + "loss": 0.4495, "step": 11399 }, { - "epoch": 2.3904382470119523, - "grad_norm": 4.186385611051059, - "learning_rate": 2.088511550588882e-06, - "loss": 0.1509, + "epoch": 1.6097147698390284, + "grad_norm": 4.422906114266241, + "learning_rate": 9.302367674154043e-06, + "loss": 0.7421, "step": 11400 }, { - "epoch": 2.3906479345774794, - "grad_norm": 4.162849241691574, - "learning_rate": 2.0871271079503562e-06, - "loss": 0.1305, + "epoch": 1.6098559728890143, + "grad_norm": 3.8083903789148814, + "learning_rate": 9.300846943520678e-06, + "loss": 0.4867, "step": 11401 }, { - "epoch": 2.390857622143007, - "grad_norm": 3.673584043444878, - "learning_rate": 2.085743070860293e-06, - "loss": 0.1118, + "epoch": 1.6099971759390002, + "grad_norm": 3.2633686037987117, + "learning_rate": 9.299326229135326e-06, + "loss": 0.5135, "step": 11402 }, { - "epoch": 2.3910673097085344, - "grad_norm": 6.927093421897617, - "learning_rate": 2.084359439389624e-06, - "loss": 0.136, + "epoch": 1.610138378988986, + "grad_norm": 3.591668166439758, + "learning_rate": 9.297805531033333e-06, + "loss": 0.6042, "step": 11403 }, { - "epoch": 2.3912769972740615, - "grad_norm": 3.5282438111794847, - "learning_rate": 2.08297621360926e-06, - "loss": 0.1164, + "epoch": 1.610279582038972, + "grad_norm": 3.604416964885435, + "learning_rate": 9.296284849250038e-06, + "loss": 0.5568, "step": 11404 }, { - "epoch": 2.391486684839589, - "grad_norm": 4.342567716477776, - "learning_rate": 2.0815933935900967e-06, - "loss": 0.1507, + "epoch": 1.6104207850889578, + "grad_norm": 3.8535030340212693, + "learning_rate": 9.294764183820775e-06, + "loss": 0.5374, "step": 11405 }, { - "epoch": 2.3916963724051166, - "grad_norm": 4.346386007382226, - "learning_rate": 2.08021097940301e-06, - "loss": 0.111, + "epoch": 1.6105619881389437, + "grad_norm": 3.2605163307514546, + "learning_rate": 9.293243534780887e-06, + "loss": 0.6078, "step": 11406 }, { - "epoch": 2.3919060599706436, - "grad_norm": 4.4997643715996976, - "learning_rate": 2.0788289711188426e-06, - "loss": 0.1312, + "epoch": 1.6107031911889296, + "grad_norm": 3.34578317061908, + "learning_rate": 9.291722902165717e-06, + "loss": 0.5268, "step": 11407 }, { - "epoch": 2.392115747536171, - "grad_norm": 4.546432927933047, - "learning_rate": 2.0774473688084282e-06, - "loss": 0.1278, + "epoch": 1.6108443942389155, + "grad_norm": 3.0058356451552197, + "learning_rate": 9.290202286010602e-06, + "loss": 0.4501, "step": 11408 }, { - "epoch": 2.3923254351016983, - "grad_norm": 3.8990030364645683, - "learning_rate": 2.0760661725425814e-06, - "loss": 0.1277, + "epoch": 1.6109855972889013, + "grad_norm": 3.247636865015121, + "learning_rate": 9.288681686350876e-06, + "loss": 0.4609, "step": 11409 }, { - "epoch": 2.392535122667226, - "grad_norm": 4.100017993630564, - "learning_rate": 2.0746853823920856e-06, - "loss": 0.1711, + "epoch": 1.6111268003388872, + "grad_norm": 3.6188566002023905, + "learning_rate": 9.28716110322188e-06, + "loss": 0.6016, "step": 11410 }, { - "epoch": 2.3927448102327533, - "grad_norm": 4.328232125327681, - "learning_rate": 2.0733049984277066e-06, - "loss": 0.1321, + "epoch": 1.611268003388873, + "grad_norm": 3.7784196370461354, + "learning_rate": 9.285640536658955e-06, + "loss": 0.6666, "step": 11411 }, { - "epoch": 2.3929544977982804, - "grad_norm": 4.840124516390636, - "learning_rate": 2.071925020720198e-06, - "loss": 0.1613, + "epoch": 1.611409206438859, + "grad_norm": 3.179085127501755, + "learning_rate": 9.284119986697433e-06, + "loss": 0.5825, "step": 11412 }, { - "epoch": 2.393164185363808, - "grad_norm": 4.519081450246461, - "learning_rate": 2.070545449340279e-06, - "loss": 0.1172, + "epoch": 1.6115504094888449, + "grad_norm": 3.433480749698441, + "learning_rate": 9.282599453372658e-06, + "loss": 0.5404, "step": 11413 }, { - "epoch": 2.3933738729293355, - "grad_norm": 3.3685284372121838, - "learning_rate": 2.0691662843586622e-06, - "loss": 0.1151, + "epoch": 1.6116916125388308, + "grad_norm": 3.7289997230840375, + "learning_rate": 9.281078936719958e-06, + "loss": 0.616, "step": 11414 }, { - "epoch": 2.3935835604948625, - "grad_norm": 5.652389583849986, - "learning_rate": 2.067787525846028e-06, - "loss": 0.197, + "epoch": 1.6118328155888166, + "grad_norm": 3.549414000986242, + "learning_rate": 9.279558436774672e-06, + "loss": 0.5333, "step": 11415 }, { - "epoch": 2.39379324806039, - "grad_norm": 4.134359512670427, - "learning_rate": 2.0664091738730385e-06, - "loss": 0.1316, + "epoch": 1.6119740186388025, + "grad_norm": 3.3018646040711634, + "learning_rate": 9.278037953572138e-06, + "loss": 0.6019, "step": 11416 }, { - "epoch": 2.3940029356259176, - "grad_norm": 3.242631990343553, - "learning_rate": 2.065031228510339e-06, - "loss": 0.1207, + "epoch": 1.6121152216887884, + "grad_norm": 3.6310317938466303, + "learning_rate": 9.27651748714769e-06, + "loss": 0.5761, "step": 11417 }, { - "epoch": 2.3942126231914447, - "grad_norm": 3.7407478336153672, - "learning_rate": 2.0636536898285544e-06, - "loss": 0.1295, + "epoch": 1.6122564247387743, + "grad_norm": 3.0430218341922384, + "learning_rate": 9.274997037536663e-06, + "loss": 0.4497, "step": 11418 }, { - "epoch": 2.394422310756972, - "grad_norm": 4.278124123850817, - "learning_rate": 2.062276557898283e-06, - "loss": 0.184, + "epoch": 1.6123976277887602, + "grad_norm": 3.6105981359621397, + "learning_rate": 9.273476604774392e-06, + "loss": 0.5825, "step": 11419 }, { - "epoch": 2.3946319983224993, - "grad_norm": 6.019886705361297, - "learning_rate": 2.0608998327901033e-06, - "loss": 0.1577, + "epoch": 1.612538830838746, + "grad_norm": 3.3723956007794773, + "learning_rate": 9.271956188896211e-06, + "loss": 0.5061, "step": 11420 }, { - "epoch": 2.394841685888027, - "grad_norm": 4.269722429717349, - "learning_rate": 2.0595235145745783e-06, - "loss": 0.1244, + "epoch": 1.612680033888732, + "grad_norm": 3.712957273039877, + "learning_rate": 9.270435789937456e-06, + "loss": 0.5988, "step": 11421 }, { - "epoch": 2.3950513734535543, - "grad_norm": 4.618525136019961, - "learning_rate": 2.0581476033222425e-06, - "loss": 0.1656, + "epoch": 1.6128212369387178, + "grad_norm": 3.264288332216479, + "learning_rate": 9.268915407933457e-06, + "loss": 0.5252, "step": 11422 }, { - "epoch": 2.3952610610190814, - "grad_norm": 4.267773007934097, - "learning_rate": 2.05677209910362e-06, - "loss": 0.1314, + "epoch": 1.6129624399887037, + "grad_norm": 3.5506279295417396, + "learning_rate": 9.267395042919546e-06, + "loss": 0.5547, "step": 11423 }, { - "epoch": 2.395470748584609, - "grad_norm": 4.877740221286599, - "learning_rate": 2.0553970019892035e-06, - "loss": 0.1708, + "epoch": 1.6131036430386896, + "grad_norm": 3.5301058525034796, + "learning_rate": 9.265874694931059e-06, + "loss": 0.4538, "step": 11424 }, { - "epoch": 2.3956804361501365, - "grad_norm": 4.445038521792703, - "learning_rate": 2.0540223120494673e-06, - "loss": 0.1465, + "epoch": 1.6132448460886755, + "grad_norm": 3.830680095046199, + "learning_rate": 9.264354364003327e-06, + "loss": 0.5508, "step": 11425 }, { - "epoch": 2.3958901237156636, - "grad_norm": 4.462170813606795, - "learning_rate": 2.05264802935487e-06, - "loss": 0.1584, + "epoch": 1.6133860491386613, + "grad_norm": 3.3410712446951423, + "learning_rate": 9.262834050171683e-06, + "loss": 0.5843, "step": 11426 }, { - "epoch": 2.396099811281191, - "grad_norm": 4.469517611365437, - "learning_rate": 2.0512741539758496e-06, - "loss": 0.1416, + "epoch": 1.6135272521886472, + "grad_norm": 2.682275796130066, + "learning_rate": 9.261313753471454e-06, + "loss": 0.4682, "step": 11427 }, { - "epoch": 2.396309498846718, - "grad_norm": 4.3615891500851385, - "learning_rate": 2.049900685982811e-06, - "loss": 0.1524, + "epoch": 1.6136684552386331, + "grad_norm": 3.799234468620082, + "learning_rate": 9.259793473937977e-06, + "loss": 0.619, "step": 11428 }, { - "epoch": 2.3965191864122457, - "grad_norm": 4.3709207912636945, - "learning_rate": 2.0485276254461493e-06, - "loss": 0.1384, + "epoch": 1.613809658288619, + "grad_norm": 3.8764690052946817, + "learning_rate": 9.25827321160658e-06, + "loss": 0.4993, "step": 11429 }, { - "epoch": 2.3967288739777732, - "grad_norm": 3.9168267165179333, - "learning_rate": 2.047154972436243e-06, - "loss": 0.1537, + "epoch": 1.6139508613386049, + "grad_norm": 3.085789962010113, + "learning_rate": 9.256752966512595e-06, + "loss": 0.4504, "step": 11430 }, { - "epoch": 2.3969385615433003, - "grad_norm": 4.838711024804494, - "learning_rate": 2.0457827270234354e-06, - "loss": 0.1908, + "epoch": 1.6140920643885908, + "grad_norm": 3.0717316672305826, + "learning_rate": 9.255232738691348e-06, + "loss": 0.5065, "step": 11431 }, { - "epoch": 2.397148249108828, - "grad_norm": 4.460202403931419, - "learning_rate": 2.044410889278058e-06, - "loss": 0.1106, + "epoch": 1.6142332674385766, + "grad_norm": 4.056763950383977, + "learning_rate": 9.253712528178169e-06, + "loss": 0.7655, "step": 11432 }, { - "epoch": 2.3973579366743554, - "grad_norm": 3.4130607815526264, - "learning_rate": 2.043039459270422e-06, - "loss": 0.1339, + "epoch": 1.6143744704885625, + "grad_norm": 4.117490789867247, + "learning_rate": 9.25219233500839e-06, + "loss": 0.6416, "step": 11433 }, { - "epoch": 2.3975676242398825, - "grad_norm": 4.209958485952366, - "learning_rate": 2.041668437070813e-06, - "loss": 0.1111, + "epoch": 1.6145156735385484, + "grad_norm": 3.9538518593169014, + "learning_rate": 9.25067215921734e-06, + "loss": 0.5429, "step": 11434 }, { - "epoch": 2.39777731180541, - "grad_norm": 4.41147714653408, - "learning_rate": 2.040297822749502e-06, - "loss": 0.1685, + "epoch": 1.6146568765885343, + "grad_norm": 3.6452557084965314, + "learning_rate": 9.249152000840341e-06, + "loss": 0.5865, "step": 11435 }, { - "epoch": 2.3979869993709375, - "grad_norm": 3.733852203362261, - "learning_rate": 2.0389276163767326e-06, - "loss": 0.1117, + "epoch": 1.6147980796385202, + "grad_norm": 3.2645830097819757, + "learning_rate": 9.24763185991273e-06, + "loss": 0.5082, "step": 11436 }, { - "epoch": 2.3981966869364646, - "grad_norm": 3.2002577989476846, - "learning_rate": 2.0375578180227274e-06, - "loss": 0.1255, + "epoch": 1.614939282688506, + "grad_norm": 3.8684664232524058, + "learning_rate": 9.246111736469826e-06, + "loss": 0.5291, "step": 11437 }, { - "epoch": 2.398406374501992, - "grad_norm": 4.4309621013440585, - "learning_rate": 2.036188427757695e-06, - "loss": 0.1391, + "epoch": 1.615080485738492, + "grad_norm": 4.0221868977221895, + "learning_rate": 9.244591630546964e-06, + "loss": 0.5926, "step": 11438 }, { - "epoch": 2.398616062067519, - "grad_norm": 6.349846989759825, - "learning_rate": 2.034819445651821e-06, - "loss": 0.1683, + "epoch": 1.6152216887884778, + "grad_norm": 3.6557608759755236, + "learning_rate": 9.243071542179464e-06, + "loss": 0.6402, "step": 11439 }, { - "epoch": 2.3988257496330467, - "grad_norm": 5.70336901652201, - "learning_rate": 2.0334508717752646e-06, - "loss": 0.1562, + "epoch": 1.6153628918384637, + "grad_norm": 3.10503470597323, + "learning_rate": 9.241551471402654e-06, + "loss": 0.4938, "step": 11440 }, { - "epoch": 2.3990354371985743, - "grad_norm": 4.0296362329724085, - "learning_rate": 2.0320827061981665e-06, - "loss": 0.1582, + "epoch": 1.6155040948884496, + "grad_norm": 3.800180310819801, + "learning_rate": 9.240031418251858e-06, + "loss": 0.5428, "step": 11441 }, { - "epoch": 2.3992451247641013, - "grad_norm": 3.3567670685125783, - "learning_rate": 2.030714948990652e-06, - "loss": 0.1137, + "epoch": 1.6156452979384355, + "grad_norm": 3.663123094946641, + "learning_rate": 9.238511382762408e-06, + "loss": 0.572, "step": 11442 }, { - "epoch": 2.399454812329629, - "grad_norm": 4.282495544580757, - "learning_rate": 2.029347600222815e-06, - "loss": 0.1662, + "epoch": 1.6157865009884214, + "grad_norm": 4.389651237357117, + "learning_rate": 9.236991364969623e-06, + "loss": 0.6495, "step": 11443 }, { - "epoch": 2.3996644998951564, - "grad_norm": 3.68115551077214, - "learning_rate": 2.027980659964741e-06, - "loss": 0.1312, + "epoch": 1.6159277040384072, + "grad_norm": 2.7317224530034707, + "learning_rate": 9.235471364908826e-06, + "loss": 0.4348, "step": 11444 }, { - "epoch": 2.3998741874606835, - "grad_norm": 3.699127362803209, - "learning_rate": 2.026614128286485e-06, - "loss": 0.1221, + "epoch": 1.6160689070883931, + "grad_norm": 3.147665021137086, + "learning_rate": 9.233951382615346e-06, + "loss": 0.501, "step": 11445 }, { - "epoch": 2.400083875026211, - "grad_norm": 4.500273948371631, - "learning_rate": 2.0252480052580804e-06, - "loss": 0.159, + "epoch": 1.616210110138379, + "grad_norm": 3.045467137559147, + "learning_rate": 9.232431418124507e-06, + "loss": 0.5739, "step": 11446 }, { - "epoch": 2.400293562591738, - "grad_norm": 4.995282686499235, - "learning_rate": 2.0238822909495482e-06, - "loss": 0.168, + "epoch": 1.616351313188365, + "grad_norm": 3.3426136222229363, + "learning_rate": 9.230911471471632e-06, + "loss": 0.5815, "step": 11447 }, { - "epoch": 2.4005032501572656, - "grad_norm": 6.759062377194313, - "learning_rate": 2.022516985430888e-06, - "loss": 0.141, + "epoch": 1.6164925162383508, + "grad_norm": 3.8046112094099285, + "learning_rate": 9.22939154269204e-06, + "loss": 0.6096, "step": 11448 }, { - "epoch": 2.400712937722793, - "grad_norm": 5.918246469024353, - "learning_rate": 2.0211520887720626e-06, - "loss": 0.1346, + "epoch": 1.6166337192883367, + "grad_norm": 3.669521717658185, + "learning_rate": 9.227871631821053e-06, + "loss": 0.6184, "step": 11449 }, { - "epoch": 2.4009226252883202, - "grad_norm": 6.923755534804394, - "learning_rate": 2.0197876010430305e-06, - "loss": 0.2228, + "epoch": 1.6167749223383225, + "grad_norm": 3.8456507281238417, + "learning_rate": 9.226351738893999e-06, + "loss": 0.5203, "step": 11450 }, { - "epoch": 2.4011323128538478, - "grad_norm": 4.340432402031409, - "learning_rate": 2.0184235223137283e-06, - "loss": 0.1493, + "epoch": 1.6169161253883084, + "grad_norm": 3.836827050704987, + "learning_rate": 9.224831863946196e-06, + "loss": 0.6337, "step": 11451 }, { - "epoch": 2.4013420004193753, - "grad_norm": 3.4915353747652533, - "learning_rate": 2.017059852654064e-06, - "loss": 0.1006, + "epoch": 1.6170573284382943, + "grad_norm": 3.9307164889203117, + "learning_rate": 9.223312007012965e-06, + "loss": 0.6902, "step": 11452 }, { - "epoch": 2.4015516879849024, - "grad_norm": 4.673543733894659, - "learning_rate": 2.0156965921339256e-06, - "loss": 0.1653, + "epoch": 1.6171985314882802, + "grad_norm": 3.0580807837150896, + "learning_rate": 9.221792168129626e-06, + "loss": 0.5204, "step": 11453 }, { - "epoch": 2.40176137555043, - "grad_norm": 4.1003943044441185, - "learning_rate": 2.0143337408231868e-06, - "loss": 0.1461, + "epoch": 1.617339734538266, + "grad_norm": 3.9779124386815434, + "learning_rate": 9.220272347331502e-06, + "loss": 0.494, "step": 11454 }, { - "epoch": 2.4019710631159574, - "grad_norm": 4.376403938617877, - "learning_rate": 2.012971298791693e-06, - "loss": 0.1584, + "epoch": 1.617480937588252, + "grad_norm": 3.5493775760252224, + "learning_rate": 9.218752544653916e-06, + "loss": 0.578, "step": 11455 }, { - "epoch": 2.4021807506814845, - "grad_norm": 4.066676680163921, - "learning_rate": 2.0116092661092735e-06, - "loss": 0.1505, + "epoch": 1.6176221406382378, + "grad_norm": 3.2265586483290236, + "learning_rate": 9.217232760132181e-06, + "loss": 0.5154, "step": 11456 }, { - "epoch": 2.402390438247012, - "grad_norm": 4.0670722778198, - "learning_rate": 2.010247642845735e-06, - "loss": 0.1473, + "epoch": 1.6177633436882237, + "grad_norm": 3.9466579985462764, + "learning_rate": 9.215712993801617e-06, + "loss": 0.6769, "step": 11457 }, { - "epoch": 2.402600125812539, - "grad_norm": 5.110728670920862, - "learning_rate": 2.008886429070861e-06, - "loss": 0.1413, + "epoch": 1.6179045467382096, + "grad_norm": 4.0867450809848105, + "learning_rate": 9.214193245697544e-06, + "loss": 0.6085, "step": 11458 }, { - "epoch": 2.4028098133780667, - "grad_norm": 4.269787250327614, - "learning_rate": 2.007525624854416e-06, - "loss": 0.1374, + "epoch": 1.6180457497881955, + "grad_norm": 3.957474546915718, + "learning_rate": 9.212673515855281e-06, + "loss": 0.7352, "step": 11459 }, { - "epoch": 2.403019500943594, - "grad_norm": 3.9932069343500736, - "learning_rate": 2.006165230266148e-06, - "loss": 0.1251, + "epoch": 1.6181869528381814, + "grad_norm": 4.165411482349635, + "learning_rate": 9.211153804310146e-06, + "loss": 0.7516, "step": 11460 }, { - "epoch": 2.4032291885091213, - "grad_norm": 3.664657598078533, - "learning_rate": 2.004805245375777e-06, - "loss": 0.1283, + "epoch": 1.6183281558881673, + "grad_norm": 5.6516395621115025, + "learning_rate": 9.209634111097455e-06, + "loss": 0.8028, "step": 11461 }, { - "epoch": 2.403438876074649, - "grad_norm": 2.878912428334256, - "learning_rate": 2.0034456702530003e-06, - "loss": 0.0843, + "epoch": 1.6184693589381531, + "grad_norm": 2.974021278236827, + "learning_rate": 9.208114436252528e-06, + "loss": 0.4429, "step": 11462 }, { - "epoch": 2.4036485636401763, - "grad_norm": 3.870955256401124, - "learning_rate": 2.0020865049675065e-06, - "loss": 0.1336, + "epoch": 1.618610561988139, + "grad_norm": 3.682497681930752, + "learning_rate": 9.206594779810677e-06, + "loss": 0.6529, "step": 11463 }, { - "epoch": 2.4038582512057034, - "grad_norm": 5.563738492685919, - "learning_rate": 2.000727749588948e-06, - "loss": 0.1756, + "epoch": 1.618751765038125, + "grad_norm": 3.1232952207524796, + "learning_rate": 9.205075141807226e-06, + "loss": 0.5425, "step": 11464 }, { - "epoch": 2.404067938771231, - "grad_norm": 4.506380017819829, - "learning_rate": 1.9993694041869684e-06, - "loss": 0.13, + "epoch": 1.6188929680881108, + "grad_norm": 3.343716844711635, + "learning_rate": 9.203555522277483e-06, + "loss": 0.5365, "step": 11465 }, { - "epoch": 2.404277626336758, - "grad_norm": 3.2383140499319802, - "learning_rate": 1.99801146883118e-06, - "loss": 0.1237, + "epoch": 1.6190341711380967, + "grad_norm": 3.3630860294803266, + "learning_rate": 9.20203592125676e-06, + "loss": 0.4743, "step": 11466 }, { - "epoch": 2.4044873139022855, - "grad_norm": 4.192634138357596, - "learning_rate": 1.996653943591186e-06, - "loss": 0.1475, + "epoch": 1.6191753741880826, + "grad_norm": 3.688116287881678, + "learning_rate": 9.200516338780383e-06, + "loss": 0.7152, "step": 11467 }, { - "epoch": 2.404697001467813, - "grad_norm": 3.52117273596207, - "learning_rate": 1.995296828536559e-06, - "loss": 0.1349, + "epoch": 1.6193165772380684, + "grad_norm": 3.4295213211326048, + "learning_rate": 9.19899677488366e-06, + "loss": 0.6646, "step": 11468 }, { - "epoch": 2.40490668903334, - "grad_norm": 4.591496306345599, - "learning_rate": 1.9939401237368493e-06, - "loss": 0.1733, + "epoch": 1.6194577802880543, + "grad_norm": 3.3350479990407624, + "learning_rate": 9.197477229601906e-06, + "loss": 0.4779, "step": 11469 }, { - "epoch": 2.4051163765988677, - "grad_norm": 4.239550845333539, - "learning_rate": 1.9925838292615975e-06, - "loss": 0.1372, + "epoch": 1.6195989833380402, + "grad_norm": 3.430215564111373, + "learning_rate": 9.195957702970434e-06, + "loss": 0.5263, "step": 11470 }, { - "epoch": 2.405326064164395, - "grad_norm": 3.7184402561968986, - "learning_rate": 1.9912279451803097e-06, - "loss": 0.1535, + "epoch": 1.619740186388026, + "grad_norm": 3.4262831811850236, + "learning_rate": 9.194438195024557e-06, + "loss": 0.4742, "step": 11471 }, { - "epoch": 2.4055357517299223, - "grad_norm": 4.516264710314278, - "learning_rate": 1.989872471562483e-06, - "loss": 0.1189, + "epoch": 1.619881389438012, + "grad_norm": 3.8027703088907785, + "learning_rate": 9.192918705799594e-06, + "loss": 0.6977, "step": 11472 }, { - "epoch": 2.40574543929545, - "grad_norm": 4.860444897327862, - "learning_rate": 1.988517408477585e-06, - "loss": 0.1164, + "epoch": 1.6200225924879978, + "grad_norm": 2.9707886375376646, + "learning_rate": 9.191399235330847e-06, + "loss": 0.4985, "step": 11473 }, { - "epoch": 2.4059551268609773, - "grad_norm": 4.028536062763015, - "learning_rate": 1.9871627559950624e-06, - "loss": 0.1411, + "epoch": 1.6201637955379837, + "grad_norm": 3.5345818460417284, + "learning_rate": 9.189879783653633e-06, + "loss": 0.5932, "step": 11474 }, { - "epoch": 2.4061648144265044, - "grad_norm": 3.578787247828352, - "learning_rate": 1.985808514184345e-06, - "loss": 0.1224, + "epoch": 1.6203049985879696, + "grad_norm": 4.130455434434312, + "learning_rate": 9.188360350803261e-06, + "loss": 0.6321, "step": 11475 }, { - "epoch": 2.406374501992032, - "grad_norm": 3.4048350340908367, - "learning_rate": 1.984454683114846e-06, - "loss": 0.1004, + "epoch": 1.6204462016379555, + "grad_norm": 3.5187610272191314, + "learning_rate": 9.186840936815047e-06, + "loss": 0.5363, "step": 11476 }, { - "epoch": 2.4065841895575595, - "grad_norm": 3.4537123628553097, - "learning_rate": 1.9831012628559453e-06, - "loss": 0.1181, + "epoch": 1.6205874046879414, + "grad_norm": 3.3976777417583066, + "learning_rate": 9.185321541724296e-06, + "loss": 0.5981, "step": 11477 }, { - "epoch": 2.4067938771230866, - "grad_norm": 3.820163674785773, - "learning_rate": 1.981748253477007e-06, - "loss": 0.1375, + "epoch": 1.6207286077379273, + "grad_norm": 3.624689606861724, + "learning_rate": 9.18380216556632e-06, + "loss": 0.5188, "step": 11478 }, { - "epoch": 2.407003564688614, - "grad_norm": 5.391529433941355, - "learning_rate": 1.98039565504738e-06, - "loss": 0.1498, + "epoch": 1.6208698107879131, + "grad_norm": 3.999733421540868, + "learning_rate": 9.182282808376433e-06, + "loss": 0.5602, "step": 11479 }, { - "epoch": 2.407213252254141, - "grad_norm": 4.247035697643328, - "learning_rate": 1.9790434676363825e-06, - "loss": 0.1267, + "epoch": 1.621011013837899, + "grad_norm": 3.1191943046581554, + "learning_rate": 9.180763470189938e-06, + "loss": 0.5492, "step": 11480 }, { - "epoch": 2.4074229398196687, - "grad_norm": 4.595513143211677, - "learning_rate": 1.9776916913133215e-06, - "loss": 0.1407, + "epoch": 1.621152216887885, + "grad_norm": 3.710152350327996, + "learning_rate": 9.17924415104215e-06, + "loss": 0.5354, "step": 11481 }, { - "epoch": 2.4076326273851962, - "grad_norm": 4.500333988958926, - "learning_rate": 1.9763403261474756e-06, - "loss": 0.1424, + "epoch": 1.6212934199378708, + "grad_norm": 4.134203799647902, + "learning_rate": 9.17772485096837e-06, + "loss": 0.7752, "step": 11482 }, { - "epoch": 2.4078423149507233, - "grad_norm": 4.907634201009687, - "learning_rate": 1.9749893722081026e-06, - "loss": 0.122, + "epoch": 1.6214346229878567, + "grad_norm": 2.8361081331420857, + "learning_rate": 9.176205570003907e-06, + "loss": 0.4266, "step": 11483 }, { - "epoch": 2.408052002516251, - "grad_norm": 3.512054274838209, - "learning_rate": 1.9736388295644413e-06, - "loss": 0.1229, + "epoch": 1.6215758260378426, + "grad_norm": 3.9558270783895146, + "learning_rate": 9.174686308184075e-06, + "loss": 0.574, "step": 11484 }, { - "epoch": 2.408261690081778, - "grad_norm": 6.235302119705029, - "learning_rate": 1.972288698285717e-06, - "loss": 0.1705, + "epoch": 1.6217170290878284, + "grad_norm": 4.542474100839712, + "learning_rate": 9.173167065544174e-06, + "loss": 0.647, "step": 11485 }, { - "epoch": 2.4084713776473055, - "grad_norm": 3.451189742314622, - "learning_rate": 1.9709389784411146e-06, - "loss": 0.1302, + "epoch": 1.6218582321378143, + "grad_norm": 5.0557416143056475, + "learning_rate": 9.171647842119515e-06, + "loss": 0.7258, "step": 11486 }, { - "epoch": 2.408681065212833, - "grad_norm": 3.981396903585127, - "learning_rate": 1.9695896700998162e-06, - "loss": 0.1389, + "epoch": 1.6219994351878002, + "grad_norm": 3.711286680256037, + "learning_rate": 9.170128637945399e-06, + "loss": 0.5109, "step": 11487 }, { - "epoch": 2.40889075277836, - "grad_norm": 4.824694066435134, - "learning_rate": 1.9682407733309773e-06, - "loss": 0.1703, + "epoch": 1.622140638237786, + "grad_norm": 3.540540716650895, + "learning_rate": 9.16860945305714e-06, + "loss": 0.5174, "step": 11488 }, { - "epoch": 2.4091004403438876, - "grad_norm": 4.997114448024437, - "learning_rate": 1.966892288203729e-06, - "loss": 0.1439, + "epoch": 1.622281841287772, + "grad_norm": 2.846590798212462, + "learning_rate": 9.167090287490036e-06, + "loss": 0.4981, "step": 11489 }, { - "epoch": 2.409310127909415, - "grad_norm": 4.5509291088658435, - "learning_rate": 1.9655442147871818e-06, - "loss": 0.1408, + "epoch": 1.6224230443377579, + "grad_norm": 3.914974483468677, + "learning_rate": 9.165571141279397e-06, + "loss": 0.5963, "step": 11490 }, { - "epoch": 2.409519815474942, - "grad_norm": 3.8545978549538567, - "learning_rate": 1.9641965531504304e-06, - "loss": 0.1119, + "epoch": 1.6225642473877437, + "grad_norm": 5.143665342149202, + "learning_rate": 9.16405201446052e-06, + "loss": 0.9135, "step": 11491 }, { - "epoch": 2.4097295030404697, - "grad_norm": 4.802458653596581, - "learning_rate": 1.9628493033625406e-06, - "loss": 0.1442, + "epoch": 1.6227054504377294, + "grad_norm": 3.8344111981329867, + "learning_rate": 9.162532907068713e-06, + "loss": 0.5533, "step": 11492 }, { - "epoch": 2.4099391906059973, - "grad_norm": 3.7853202941746265, - "learning_rate": 1.9615024654925663e-06, - "loss": 0.1016, + "epoch": 1.6228466534877153, + "grad_norm": 3.583887879163046, + "learning_rate": 9.161013819139281e-06, + "loss": 0.6299, "step": 11493 }, { - "epoch": 2.4101488781715243, - "grad_norm": 3.6928092437390667, - "learning_rate": 1.960156039609533e-06, - "loss": 0.118, + "epoch": 1.6229878565377012, + "grad_norm": 3.111689455447244, + "learning_rate": 9.159494750707527e-06, + "loss": 0.4288, "step": 11494 }, { - "epoch": 2.410358565737052, - "grad_norm": 3.831769847557559, - "learning_rate": 1.9588100257824438e-06, - "loss": 0.1225, + "epoch": 1.623129059587687, + "grad_norm": 3.16934714658762, + "learning_rate": 9.157975701808748e-06, + "loss": 0.5186, "step": 11495 }, { - "epoch": 2.4105682533025794, - "grad_norm": 3.5504085280365416, - "learning_rate": 1.9574644240802886e-06, - "loss": 0.1177, + "epoch": 1.623270262637673, + "grad_norm": 3.4067801570415814, + "learning_rate": 9.156456672478252e-06, + "loss": 0.5885, "step": 11496 }, { - "epoch": 2.4107779408681065, - "grad_norm": 4.409311481631019, - "learning_rate": 1.9561192345720326e-06, - "loss": 0.1392, + "epoch": 1.6234114656876588, + "grad_norm": 2.995649718460358, + "learning_rate": 9.15493766275134e-06, + "loss": 0.5654, "step": 11497 }, { - "epoch": 2.410987628433634, - "grad_norm": 3.2102298837153476, - "learning_rate": 1.954774457326617e-06, - "loss": 0.0828, + "epoch": 1.6235526687376447, + "grad_norm": 4.166573088486783, + "learning_rate": 9.153418672663313e-06, + "loss": 0.6494, "step": 11498 }, { - "epoch": 2.411197315999161, - "grad_norm": 5.153629986384603, - "learning_rate": 1.953430092412962e-06, - "loss": 0.164, + "epoch": 1.6236938717876306, + "grad_norm": 4.068773996161427, + "learning_rate": 9.151899702249469e-06, + "loss": 0.7747, "step": 11499 }, { - "epoch": 2.4114070035646886, - "grad_norm": 4.403420521189587, - "learning_rate": 1.9520861398999735e-06, - "loss": 0.1596, + "epoch": 1.6238350748376165, + "grad_norm": 3.8135481642863645, + "learning_rate": 9.15038075154511e-06, + "loss": 0.7104, "step": 11500 }, { - "epoch": 2.411616691130216, - "grad_norm": 4.438476879761939, - "learning_rate": 1.9507425998565268e-06, - "loss": 0.1503, + "epoch": 1.6239762778876023, + "grad_norm": 3.88478767465394, + "learning_rate": 9.148861820585532e-06, + "loss": 0.6726, "step": 11501 }, { - "epoch": 2.4118263786957432, - "grad_norm": 4.619566409163347, - "learning_rate": 1.949399472351484e-06, - "loss": 0.1464, + "epoch": 1.6241174809375882, + "grad_norm": 4.068399318013497, + "learning_rate": 9.14734290940604e-06, + "loss": 0.5918, "step": 11502 }, { - "epoch": 2.4120360662612708, - "grad_norm": 4.942655211386968, - "learning_rate": 1.948056757453681e-06, - "loss": 0.1509, + "epoch": 1.6242586839875741, + "grad_norm": 2.893845222872589, + "learning_rate": 9.145824018041933e-06, + "loss": 0.4831, "step": 11503 }, { - "epoch": 2.412245753826798, - "grad_norm": 5.237247911650881, - "learning_rate": 1.946714455231934e-06, - "loss": 0.1692, + "epoch": 1.62439988703756, + "grad_norm": 3.3631636664365847, + "learning_rate": 9.144305146528502e-06, + "loss": 0.5335, "step": 11504 }, { - "epoch": 2.4124554413923254, - "grad_norm": 4.4252204177736285, - "learning_rate": 1.945372565755037e-06, - "loss": 0.1611, + "epoch": 1.6245410900875459, + "grad_norm": 3.471024391545202, + "learning_rate": 9.142786294901053e-06, + "loss": 0.6186, "step": 11505 }, { - "epoch": 2.412665128957853, - "grad_norm": 3.358056187973224, - "learning_rate": 1.9440310890917726e-06, - "loss": 0.1274, + "epoch": 1.6246822931375318, + "grad_norm": 3.318218049031006, + "learning_rate": 9.141267463194883e-06, + "loss": 0.5074, "step": 11506 }, { - "epoch": 2.41287481652338, - "grad_norm": 3.055634201741452, - "learning_rate": 1.9426900253108803e-06, - "loss": 0.0825, + "epoch": 1.6248234961875176, + "grad_norm": 3.6813297910775575, + "learning_rate": 9.139748651445282e-06, + "loss": 0.5535, "step": 11507 }, { - "epoch": 2.4130845040889075, - "grad_norm": 3.7323797757306134, - "learning_rate": 1.9413493744810996e-06, - "loss": 0.1231, + "epoch": 1.6249646992375035, + "grad_norm": 3.0385854338410576, + "learning_rate": 9.138229859687553e-06, + "loss": 0.5154, "step": 11508 }, { - "epoch": 2.413294191654435, - "grad_norm": 3.8998550964059198, - "learning_rate": 1.9400091366711438e-06, - "loss": 0.1375, + "epoch": 1.6251059022874894, + "grad_norm": 3.8153256753532383, + "learning_rate": 9.136711087956987e-06, + "loss": 0.6295, "step": 11509 }, { - "epoch": 2.413503879219962, - "grad_norm": 3.490424782197383, - "learning_rate": 1.938669311949699e-06, - "loss": 0.1205, + "epoch": 1.6252471053374753, + "grad_norm": 3.6525156994543693, + "learning_rate": 9.135192336288885e-06, + "loss": 0.6167, "step": 11510 }, { - "epoch": 2.4137135667854897, - "grad_norm": 4.894179441895004, - "learning_rate": 1.9373299003854306e-06, - "loss": 0.1181, + "epoch": 1.6253883083874612, + "grad_norm": 3.5299607342751114, + "learning_rate": 9.133673604718539e-06, + "loss": 0.5329, "step": 11511 }, { - "epoch": 2.413923254351017, - "grad_norm": 5.526502008675969, - "learning_rate": 1.9359909020469913e-06, - "loss": 0.1615, + "epoch": 1.625529511437447, + "grad_norm": 3.9193799244784713, + "learning_rate": 9.132154893281244e-06, + "loss": 0.6109, "step": 11512 }, { - "epoch": 2.4141329419165443, - "grad_norm": 5.1778528258589835, - "learning_rate": 1.934652317003002e-06, - "loss": 0.1366, + "epoch": 1.625670714487433, + "grad_norm": 3.1626437893624346, + "learning_rate": 9.130636202012295e-06, + "loss": 0.5648, "step": 11513 }, { - "epoch": 2.414342629482072, - "grad_norm": 5.391625647861582, - "learning_rate": 1.9333141453220726e-06, - "loss": 0.1791, + "epoch": 1.6258119175374188, + "grad_norm": 3.4153261556785726, + "learning_rate": 9.129117530946986e-06, + "loss": 0.6096, "step": 11514 }, { - "epoch": 2.4145523170475993, - "grad_norm": 3.8537333118109562, - "learning_rate": 1.9319763870727846e-06, - "loss": 0.1142, + "epoch": 1.6259531205874047, + "grad_norm": 2.991248413613067, + "learning_rate": 9.127598880120609e-06, + "loss": 0.4923, "step": 11515 }, { - "epoch": 2.4147620046131264, - "grad_norm": 4.483120008508993, - "learning_rate": 1.9306390423236976e-06, - "loss": 0.1506, + "epoch": 1.6260943236373906, + "grad_norm": 3.892538536476327, + "learning_rate": 9.126080249568457e-06, + "loss": 0.7174, "step": 11516 }, { - "epoch": 2.414971692178654, - "grad_norm": 5.387763111627581, - "learning_rate": 1.9293021111433553e-06, - "loss": 0.1606, + "epoch": 1.6262355266873765, + "grad_norm": 4.4163355885953015, + "learning_rate": 9.124561639325822e-06, + "loss": 0.8466, "step": 11517 }, { - "epoch": 2.415181379744181, - "grad_norm": 3.8602041090411285, - "learning_rate": 1.9279655936002796e-06, - "loss": 0.1416, + "epoch": 1.6263767297373624, + "grad_norm": 3.4759339470906423, + "learning_rate": 9.123043049427996e-06, + "loss": 0.5959, "step": 11518 }, { - "epoch": 2.4153910673097085, - "grad_norm": 4.518715966468808, - "learning_rate": 1.926629489762968e-06, - "loss": 0.1424, + "epoch": 1.6265179327873482, + "grad_norm": 3.7615902482097234, + "learning_rate": 9.12152447991027e-06, + "loss": 0.5919, "step": 11519 }, { - "epoch": 2.415600754875236, - "grad_norm": 4.074535804199905, - "learning_rate": 1.9252937996998954e-06, - "loss": 0.1508, + "epoch": 1.6266591358373341, + "grad_norm": 3.394861259063789, + "learning_rate": 9.120005930807939e-06, + "loss": 0.6215, "step": 11520 }, { - "epoch": 2.415810442440763, - "grad_norm": 4.360592726929684, - "learning_rate": 1.923958523479522e-06, - "loss": 0.1045, + "epoch": 1.62680033888732, + "grad_norm": 3.6331528178504437, + "learning_rate": 9.118487402156287e-06, + "loss": 0.6082, "step": 11521 }, { - "epoch": 2.4160201300062907, - "grad_norm": 5.057960642591632, - "learning_rate": 1.9226236611702797e-06, - "loss": 0.1813, + "epoch": 1.626941541937306, + "grad_norm": 3.4316714509501645, + "learning_rate": 9.116968893990609e-06, + "loss": 0.5372, "step": 11522 }, { - "epoch": 2.4162298175718178, - "grad_norm": 3.8854775163669584, - "learning_rate": 1.9212892128405857e-06, - "loss": 0.1212, + "epoch": 1.6270827449872918, + "grad_norm": 4.217321140647538, + "learning_rate": 9.115450406346193e-06, + "loss": 0.6689, "step": 11523 }, { - "epoch": 2.4164395051373453, - "grad_norm": 3.9785381479426363, - "learning_rate": 1.919955178558832e-06, - "loss": 0.1331, + "epoch": 1.6272239480372777, + "grad_norm": 3.8668776711298958, + "learning_rate": 9.113931939258327e-06, + "loss": 0.6212, "step": 11524 }, { - "epoch": 2.416649192702873, - "grad_norm": 5.301440180779231, - "learning_rate": 1.9186215583933877e-06, - "loss": 0.1786, + "epoch": 1.6273651510872635, + "grad_norm": 3.5111067330113457, + "learning_rate": 9.112413492762301e-06, + "loss": 0.6579, "step": 11525 }, { - "epoch": 2.4168588802684, - "grad_norm": 4.6769825934515055, - "learning_rate": 1.917288352412604e-06, - "loss": 0.1316, + "epoch": 1.6275063541372492, + "grad_norm": 3.2136366491056325, + "learning_rate": 9.110895066893398e-06, + "loss": 0.5303, "step": 11526 }, { - "epoch": 2.4170685678339274, - "grad_norm": 3.351400773552443, - "learning_rate": 1.9159555606848144e-06, - "loss": 0.1155, + "epoch": 1.627647557187235, + "grad_norm": 3.3408156993630524, + "learning_rate": 9.109376661686913e-06, + "loss": 0.5375, "step": 11527 }, { - "epoch": 2.417278255399455, - "grad_norm": 4.0787514179447495, - "learning_rate": 1.9146231832783234e-06, - "loss": 0.1513, + "epoch": 1.627788760237221, + "grad_norm": 3.90658722124255, + "learning_rate": 9.10785827717813e-06, + "loss": 0.6226, "step": 11528 }, { - "epoch": 2.417487942964982, - "grad_norm": 3.8377240176352605, - "learning_rate": 1.913291220261414e-06, - "loss": 0.121, + "epoch": 1.6279299632872068, + "grad_norm": 3.319663934444403, + "learning_rate": 9.106339913402334e-06, + "loss": 0.6134, "step": 11529 }, { - "epoch": 2.4176976305305096, - "grad_norm": 4.530193763261716, - "learning_rate": 1.9119596717023593e-06, - "loss": 0.158, + "epoch": 1.6280711663371927, + "grad_norm": 3.5857409642918836, + "learning_rate": 9.104821570394811e-06, + "loss": 0.6019, "step": 11530 }, { - "epoch": 2.417907318096037, - "grad_norm": 3.571589664579052, - "learning_rate": 1.910628537669398e-06, - "loss": 0.125, + "epoch": 1.6282123693871786, + "grad_norm": 3.8296707886010366, + "learning_rate": 9.103303248190855e-06, + "loss": 0.5249, "step": 11531 }, { - "epoch": 2.418117005661564, - "grad_norm": 4.046000040405084, - "learning_rate": 1.909297818230752e-06, - "loss": 0.1585, + "epoch": 1.6283535724371645, + "grad_norm": 3.6073856317893176, + "learning_rate": 9.101784946825739e-06, + "loss": 0.6535, "step": 11532 }, { - "epoch": 2.4183266932270917, - "grad_norm": 4.099647154459045, - "learning_rate": 1.907967513454626e-06, - "loss": 0.1574, + "epoch": 1.6284947754871504, + "grad_norm": 3.292959027186167, + "learning_rate": 9.100266666334753e-06, + "loss": 0.5453, "step": 11533 }, { - "epoch": 2.4185363807926192, - "grad_norm": 4.140979480189836, - "learning_rate": 1.9066376234092021e-06, - "loss": 0.1425, + "epoch": 1.6286359785371363, + "grad_norm": 3.18509472060752, + "learning_rate": 9.098748406753181e-06, + "loss": 0.4927, "step": 11534 }, { - "epoch": 2.4187460683581463, - "grad_norm": 3.9554759533095316, - "learning_rate": 1.9053081481626379e-06, - "loss": 0.1323, + "epoch": 1.6287771815871221, + "grad_norm": 3.386844312718868, + "learning_rate": 9.097230168116306e-06, + "loss": 0.5204, "step": 11535 }, { - "epoch": 2.418955755923674, - "grad_norm": 3.589748420736371, - "learning_rate": 1.9039790877830677e-06, - "loss": 0.1319, + "epoch": 1.628918384637108, + "grad_norm": 3.8018359683657916, + "learning_rate": 9.095711950459412e-06, + "loss": 0.5145, "step": 11536 }, { - "epoch": 2.419165443489201, - "grad_norm": 3.796030733400382, - "learning_rate": 1.9026504423386127e-06, - "loss": 0.1294, + "epoch": 1.629059587687094, + "grad_norm": 3.4178554819877536, + "learning_rate": 9.094193753817784e-06, + "loss": 0.5508, "step": 11537 }, { - "epoch": 2.4193751310547285, - "grad_norm": 4.459965383244707, - "learning_rate": 1.901322211897365e-06, - "loss": 0.119, + "epoch": 1.6292007907370798, + "grad_norm": 3.8400718774567584, + "learning_rate": 9.0926755782267e-06, + "loss": 0.5979, "step": 11538 }, { - "epoch": 2.419584818620256, - "grad_norm": 4.805784570267907, - "learning_rate": 1.8999943965274036e-06, - "loss": 0.153, + "epoch": 1.6293419937870657, + "grad_norm": 4.148113618299864, + "learning_rate": 9.091157423721445e-06, + "loss": 0.7121, "step": 11539 }, { - "epoch": 2.419794506185783, - "grad_norm": 4.8437393146058625, - "learning_rate": 1.8986669962967773e-06, - "loss": 0.1836, + "epoch": 1.6294831968370516, + "grad_norm": 3.866072511509366, + "learning_rate": 9.0896392903373e-06, + "loss": 0.5941, "step": 11540 }, { - "epoch": 2.4200041937513106, - "grad_norm": 3.672803682336201, - "learning_rate": 1.8973400112735152e-06, - "loss": 0.1023, + "epoch": 1.6296243998870374, + "grad_norm": 3.5857225572190377, + "learning_rate": 9.088121178109544e-06, + "loss": 0.5914, "step": 11541 }, { - "epoch": 2.4202138813168377, - "grad_norm": 5.4751474122569705, - "learning_rate": 1.8960134415256325e-06, - "loss": 0.1621, + "epoch": 1.6297656029370233, + "grad_norm": 3.4846823477917597, + "learning_rate": 9.086603087073457e-06, + "loss": 0.5053, "step": 11542 }, { - "epoch": 2.420423568882365, - "grad_norm": 3.981012327132323, - "learning_rate": 1.8946872871211185e-06, - "loss": 0.1335, + "epoch": 1.6299068059870092, + "grad_norm": 3.374143095991624, + "learning_rate": 9.085085017264322e-06, + "loss": 0.5092, "step": 11543 }, { - "epoch": 2.4206332564478927, - "grad_norm": 5.222317740462597, - "learning_rate": 1.8933615481279388e-06, - "loss": 0.1587, + "epoch": 1.630048009036995, + "grad_norm": 4.889668855369143, + "learning_rate": 9.083566968717412e-06, + "loss": 0.5402, "step": 11544 }, { - "epoch": 2.42084294401342, - "grad_norm": 4.334153410153875, - "learning_rate": 1.8920362246140366e-06, - "loss": 0.1298, + "epoch": 1.630189212086981, + "grad_norm": 3.8638760272976844, + "learning_rate": 9.082048941468012e-06, + "loss": 0.6618, "step": 11545 }, { - "epoch": 2.4210526315789473, - "grad_norm": 3.966153720263226, - "learning_rate": 1.8907113166473444e-06, - "loss": 0.1425, + "epoch": 1.6303304151369669, + "grad_norm": 2.662091944305651, + "learning_rate": 9.080530935551398e-06, + "loss": 0.44, "step": 11546 }, { - "epoch": 2.421262319144475, - "grad_norm": 5.0173451169824, - "learning_rate": 1.8893868242957614e-06, - "loss": 0.1734, + "epoch": 1.6304716181869527, + "grad_norm": 3.1556897719783517, + "learning_rate": 9.079012951002847e-06, + "loss": 0.4738, "step": 11547 }, { - "epoch": 2.421472006710002, - "grad_norm": 4.104423916512362, - "learning_rate": 1.8880627476271684e-06, - "loss": 0.1625, + "epoch": 1.6306128212369386, + "grad_norm": 3.7095184243235564, + "learning_rate": 9.077494987857644e-06, + "loss": 0.4831, "step": 11548 }, { - "epoch": 2.4216816942755295, - "grad_norm": 4.677099452799848, - "learning_rate": 1.8867390867094316e-06, - "loss": 0.1556, + "epoch": 1.6307540242869245, + "grad_norm": 3.747975553703809, + "learning_rate": 9.07597704615105e-06, + "loss": 0.5575, "step": 11549 }, { - "epoch": 2.421891381841057, - "grad_norm": 4.218474731361375, - "learning_rate": 1.8854158416103852e-06, - "loss": 0.1301, + "epoch": 1.6308952273369104, + "grad_norm": 3.6433056037429115, + "learning_rate": 9.074459125918356e-06, + "loss": 0.5153, "step": 11550 }, { - "epoch": 2.422101069406584, - "grad_norm": 3.8116331369338243, - "learning_rate": 1.884093012397854e-06, - "loss": 0.1491, + "epoch": 1.6310364303868963, + "grad_norm": 3.784166155695553, + "learning_rate": 9.07294122719483e-06, + "loss": 0.5383, "step": 11551 }, { - "epoch": 2.4223107569721116, - "grad_norm": 5.279907844456461, - "learning_rate": 1.8827705991396327e-06, - "loss": 0.1791, + "epoch": 1.6311776334368822, + "grad_norm": 3.0263482787969034, + "learning_rate": 9.071423350015747e-06, + "loss": 0.4786, "step": 11552 }, { - "epoch": 2.422520444537639, - "grad_norm": 4.275882782585302, - "learning_rate": 1.881448601903494e-06, - "loss": 0.1426, + "epoch": 1.631318836486868, + "grad_norm": 3.6197375099960096, + "learning_rate": 9.069905494416387e-06, + "loss": 0.6055, "step": 11553 }, { - "epoch": 2.4227301321031662, - "grad_norm": 3.7942859000921616, - "learning_rate": 1.8801270207571953e-06, - "loss": 0.0933, + "epoch": 1.631460039536854, + "grad_norm": 3.7938306032590714, + "learning_rate": 9.068387660432023e-06, + "loss": 0.6526, "step": 11554 }, { - "epoch": 2.4229398196686938, - "grad_norm": 4.17888874628217, - "learning_rate": 1.8788058557684718e-06, - "loss": 0.1657, + "epoch": 1.6316012425868398, + "grad_norm": 2.911314886297144, + "learning_rate": 9.066869848097925e-06, + "loss": 0.5598, "step": 11555 }, { - "epoch": 2.423149507234221, - "grad_norm": 4.8385185436556695, - "learning_rate": 1.8774851070050349e-06, - "loss": 0.1639, + "epoch": 1.6317424456368257, + "grad_norm": 3.3981094269155734, + "learning_rate": 9.06535205744937e-06, + "loss": 0.5442, "step": 11556 }, { - "epoch": 2.4233591947997484, - "grad_norm": 3.648020519856198, - "learning_rate": 1.8761647745345712e-06, - "loss": 0.1371, + "epoch": 1.6318836486868116, + "grad_norm": 3.0223971206067364, + "learning_rate": 9.063834288521632e-06, + "loss": 0.4559, "step": 11557 }, { - "epoch": 2.423568882365276, - "grad_norm": 4.438671663400621, - "learning_rate": 1.8748448584247547e-06, - "loss": 0.1784, + "epoch": 1.6320248517367975, + "grad_norm": 3.5633770183814453, + "learning_rate": 9.062316541349978e-06, + "loss": 0.6565, "step": 11558 }, { - "epoch": 2.423778569930803, - "grad_norm": 4.724968961977691, - "learning_rate": 1.8735253587432301e-06, - "loss": 0.1543, + "epoch": 1.6321660547867833, + "grad_norm": 3.396417179006909, + "learning_rate": 9.060798815969682e-06, + "loss": 0.6375, "step": 11559 }, { - "epoch": 2.4239882574963305, - "grad_norm": 4.7704754760958155, - "learning_rate": 1.8722062755576277e-06, - "loss": 0.1645, + "epoch": 1.6323072578367692, + "grad_norm": 4.163513774326099, + "learning_rate": 9.059281112416017e-06, + "loss": 0.6467, "step": 11560 }, { - "epoch": 2.4241979450618576, - "grad_norm": 3.884495031465616, - "learning_rate": 1.87088760893555e-06, - "loss": 0.1529, + "epoch": 1.632448460886755, + "grad_norm": 3.406094622860866, + "learning_rate": 9.057763430724252e-06, + "loss": 0.6088, "step": 11561 }, { - "epoch": 2.424407632627385, - "grad_norm": 4.170765093902806, - "learning_rate": 1.8695693589445797e-06, - "loss": 0.1341, + "epoch": 1.632589663936741, + "grad_norm": 3.9509449518622857, + "learning_rate": 9.056245770929659e-06, + "loss": 0.5203, "step": 11562 }, { - "epoch": 2.4246173201929127, - "grad_norm": 3.6493213125385418, - "learning_rate": 1.8682515256522804e-06, - "loss": 0.1053, + "epoch": 1.6327308669867269, + "grad_norm": 3.397514115868934, + "learning_rate": 9.054728133067505e-06, + "loss": 0.5057, "step": 11563 }, { - "epoch": 2.4248270077584397, - "grad_norm": 4.0516949637488135, - "learning_rate": 1.8669341091262005e-06, - "loss": 0.149, + "epoch": 1.6328720700367128, + "grad_norm": 3.6975593244433655, + "learning_rate": 9.053210517173061e-06, + "loss": 0.6212, "step": 11564 }, { - "epoch": 2.4250366953239673, - "grad_norm": 3.917069968248065, - "learning_rate": 1.8656171094338472e-06, - "loss": 0.119, + "epoch": 1.6330132730866986, + "grad_norm": 4.674956106627507, + "learning_rate": 9.051692923281601e-06, + "loss": 0.6475, "step": 11565 }, { - "epoch": 2.425246382889495, - "grad_norm": 4.661929632093208, - "learning_rate": 1.8643005266427262e-06, - "loss": 0.1653, + "epoch": 1.6331544761366845, + "grad_norm": 3.4501371469836437, + "learning_rate": 9.050175351428381e-06, + "loss": 0.531, "step": 11566 }, { - "epoch": 2.425456070455022, - "grad_norm": 4.587427803078533, - "learning_rate": 1.862984360820317e-06, - "loss": 0.1725, + "epoch": 1.6332956791866704, + "grad_norm": 3.443554838270805, + "learning_rate": 9.048657801648679e-06, + "loss": 0.5842, "step": 11567 }, { - "epoch": 2.4256657580205494, - "grad_norm": 5.219391024111514, - "learning_rate": 1.861668612034071e-06, - "loss": 0.1495, + "epoch": 1.6334368822366563, + "grad_norm": 3.5089365997365007, + "learning_rate": 9.04714027397776e-06, + "loss": 0.5598, "step": 11568 }, { - "epoch": 2.425875445586077, - "grad_norm": 3.9288525951703788, - "learning_rate": 1.8603532803514225e-06, - "loss": 0.1328, + "epoch": 1.6335780852866422, + "grad_norm": 3.385298515528883, + "learning_rate": 9.045622768450884e-06, + "loss": 0.53, "step": 11569 }, { - "epoch": 2.426085133151604, - "grad_norm": 4.411072407997811, - "learning_rate": 1.859038365839787e-06, - "loss": 0.1409, + "epoch": 1.633719288336628, + "grad_norm": 3.8451569369150347, + "learning_rate": 9.044105285103327e-06, + "loss": 0.5548, "step": 11570 }, { - "epoch": 2.4262948207171315, - "grad_norm": 4.802286478172426, - "learning_rate": 1.8577238685665532e-06, - "loss": 0.1659, + "epoch": 1.633860491386614, + "grad_norm": 4.186020276868135, + "learning_rate": 9.04258782397035e-06, + "loss": 0.7828, "step": 11571 }, { - "epoch": 2.426504508282659, - "grad_norm": 4.46855863915047, - "learning_rate": 1.856409788599096e-06, - "loss": 0.1217, + "epoch": 1.6340016944365998, + "grad_norm": 3.485194153066215, + "learning_rate": 9.04107038508722e-06, + "loss": 0.6646, "step": 11572 }, { - "epoch": 2.426714195848186, - "grad_norm": 4.276144776655258, - "learning_rate": 1.8550961260047618e-06, - "loss": 0.1495, + "epoch": 1.6341428974865857, + "grad_norm": 3.779725659166283, + "learning_rate": 9.039552968489196e-06, + "loss": 0.596, "step": 11573 }, { - "epoch": 2.4269238834137137, - "grad_norm": 4.19834843643309, - "learning_rate": 1.853782880850874e-06, - "loss": 0.1741, + "epoch": 1.6342841005365716, + "grad_norm": 4.337594000633033, + "learning_rate": 9.038035574211553e-06, + "loss": 0.7644, "step": 11574 }, { - "epoch": 2.4271335709792408, - "grad_norm": 5.535748703773797, - "learning_rate": 1.8524700532047435e-06, - "loss": 0.1669, + "epoch": 1.6344253035865575, + "grad_norm": 3.5664909953956654, + "learning_rate": 9.036518202289542e-06, + "loss": 0.5933, "step": 11575 }, { - "epoch": 2.4273432585447683, - "grad_norm": 4.9004793071666555, - "learning_rate": 1.8511576431336564e-06, - "loss": 0.1383, + "epoch": 1.6345665066365433, + "grad_norm": 3.90140336096162, + "learning_rate": 9.035000852758433e-06, + "loss": 0.6046, "step": 11576 }, { - "epoch": 2.427552946110296, - "grad_norm": 3.8938871754300837, - "learning_rate": 1.8498456507048734e-06, - "loss": 0.1485, + "epoch": 1.6347077096865292, + "grad_norm": 4.037443451269912, + "learning_rate": 9.033483525653488e-06, + "loss": 0.5509, "step": 11577 }, { - "epoch": 2.427762633675823, - "grad_norm": 3.8075118637340415, - "learning_rate": 1.848534075985634e-06, - "loss": 0.1247, + "epoch": 1.6348489127365151, + "grad_norm": 3.6012277490470765, + "learning_rate": 9.031966221009966e-06, + "loss": 0.6167, "step": 11578 }, { - "epoch": 2.4279723212413504, - "grad_norm": 4.546713399719087, - "learning_rate": 1.8472229190431634e-06, - "loss": 0.1447, + "epoch": 1.634990115786501, + "grad_norm": 3.5972015434588958, + "learning_rate": 9.030448938863134e-06, + "loss": 0.5026, "step": 11579 }, { - "epoch": 2.4281820088068775, - "grad_norm": 3.8183160676963706, - "learning_rate": 1.8459121799446567e-06, - "loss": 0.1499, + "epoch": 1.6351313188364869, + "grad_norm": 3.4807323234758, + "learning_rate": 9.028931679248249e-06, + "loss": 0.5067, "step": 11580 }, { - "epoch": 2.428391696372405, - "grad_norm": 3.6908101921378824, - "learning_rate": 1.8446018587572945e-06, - "loss": 0.1485, + "epoch": 1.6352725218864728, + "grad_norm": 4.48096648096107, + "learning_rate": 9.027414442200571e-06, + "loss": 0.6753, "step": 11581 }, { - "epoch": 2.4286013839379326, - "grad_norm": 4.3182762037486455, - "learning_rate": 1.8432919555482331e-06, - "loss": 0.1142, + "epoch": 1.6354137249364586, + "grad_norm": 3.4129221459465886, + "learning_rate": 9.025897227755367e-06, + "loss": 0.4925, "step": 11582 }, { - "epoch": 2.4288110715034597, - "grad_norm": 5.244094145869, - "learning_rate": 1.8419824703846034e-06, - "loss": 0.1668, + "epoch": 1.6355549279864445, + "grad_norm": 3.7662343387655923, + "learning_rate": 9.024380035947883e-06, + "loss": 0.6248, "step": 11583 }, { - "epoch": 2.429020759068987, - "grad_norm": 4.4707026346222225, - "learning_rate": 1.8406734033335205e-06, - "loss": 0.1416, + "epoch": 1.6356961310364304, + "grad_norm": 4.342212575471232, + "learning_rate": 9.022862866813392e-06, + "loss": 0.7819, "step": 11584 }, { - "epoch": 2.4292304466345147, - "grad_norm": 5.644774200398411, - "learning_rate": 1.839364754462084e-06, - "loss": 0.193, + "epoch": 1.6358373340864163, + "grad_norm": 3.735964799923185, + "learning_rate": 9.021345720387142e-06, + "loss": 0.5661, "step": 11585 }, { - "epoch": 2.429440134200042, - "grad_norm": 3.421698419337396, - "learning_rate": 1.8380565238373515e-06, - "loss": 0.1233, + "epoch": 1.6359785371364022, + "grad_norm": 3.373516409355235, + "learning_rate": 9.019828596704394e-06, + "loss": 0.5485, "step": 11586 }, { - "epoch": 2.4296498217655693, - "grad_norm": 4.270402520694216, - "learning_rate": 1.8367487115263782e-06, - "loss": 0.1313, + "epoch": 1.636119740186388, + "grad_norm": 4.039193743491625, + "learning_rate": 9.018311495800408e-06, + "loss": 0.5988, "step": 11587 }, { - "epoch": 2.429859509331097, - "grad_norm": 4.758688444196812, - "learning_rate": 1.8354413175961927e-06, - "loss": 0.1459, + "epoch": 1.636260943236374, + "grad_norm": 3.4935358871002813, + "learning_rate": 9.016794417710439e-06, + "loss": 0.56, "step": 11588 }, { - "epoch": 2.430069196896624, - "grad_norm": 3.9493660715397336, - "learning_rate": 1.8341343421138046e-06, - "loss": 0.1261, + "epoch": 1.6364021462863598, + "grad_norm": 3.3572630398661523, + "learning_rate": 9.015277362469744e-06, + "loss": 0.5261, "step": 11589 }, { - "epoch": 2.4302788844621515, - "grad_norm": 3.0969277406306164, - "learning_rate": 1.8328277851461896e-06, - "loss": 0.1015, + "epoch": 1.6365433493363457, + "grad_norm": 3.646575002307307, + "learning_rate": 9.013760330113575e-06, + "loss": 0.5867, "step": 11590 }, { - "epoch": 2.430488572027679, - "grad_norm": 3.86641177599819, - "learning_rate": 1.831521646760316e-06, - "loss": 0.1376, + "epoch": 1.6366845523863316, + "grad_norm": 3.2243823301475487, + "learning_rate": 9.012243320677196e-06, + "loss": 0.4536, "step": 11591 }, { - "epoch": 2.430698259593206, - "grad_norm": 3.285235110605288, - "learning_rate": 1.830215927023128e-06, - "loss": 0.1121, + "epoch": 1.6368257554363175, + "grad_norm": 3.4989008214476933, + "learning_rate": 9.010726334195851e-06, + "loss": 0.4807, "step": 11592 }, { - "epoch": 2.4309079471587336, - "grad_norm": 5.338087363722873, - "learning_rate": 1.8289106260015443e-06, - "loss": 0.188, + "epoch": 1.6369669584863034, + "grad_norm": 3.278247928794583, + "learning_rate": 9.009209370704799e-06, + "loss": 0.5297, "step": 11593 }, { - "epoch": 2.4311176347242607, - "grad_norm": 4.103614429871245, - "learning_rate": 1.8276057437624595e-06, - "loss": 0.1497, + "epoch": 1.6371081615362892, + "grad_norm": 3.249729718127036, + "learning_rate": 9.007692430239294e-06, + "loss": 0.4263, "step": 11594 }, { - "epoch": 2.431327322289788, - "grad_norm": 5.219029463349436, - "learning_rate": 1.8263012803727576e-06, - "loss": 0.1236, + "epoch": 1.6372493645862751, + "grad_norm": 3.3352889872241014, + "learning_rate": 9.006175512834587e-06, + "loss": 0.5223, "step": 11595 }, { - "epoch": 2.4315370098553157, - "grad_norm": 3.509086954565987, - "learning_rate": 1.8249972358992884e-06, - "loss": 0.1201, + "epoch": 1.637390567636261, + "grad_norm": 3.3257139773722484, + "learning_rate": 9.004658618525932e-06, + "loss": 0.5041, "step": 11596 }, { - "epoch": 2.431746697420843, - "grad_norm": 4.467464805532375, - "learning_rate": 1.8236936104088932e-06, - "loss": 0.1506, + "epoch": 1.637531770686247, + "grad_norm": 3.8197145481940344, + "learning_rate": 9.003141747348583e-06, + "loss": 0.5644, "step": 11597 }, { - "epoch": 2.4319563849863703, - "grad_norm": 4.114036883713786, - "learning_rate": 1.8223904039683816e-06, - "loss": 0.1327, + "epoch": 1.6376729737362328, + "grad_norm": 3.8281063187059647, + "learning_rate": 9.001624899337785e-06, + "loss": 0.6946, "step": 11598 }, { - "epoch": 2.4321660725518974, - "grad_norm": 3.716981971534856, - "learning_rate": 1.8210876166445412e-06, - "loss": 0.1318, + "epoch": 1.6378141767862187, + "grad_norm": 4.611649663642448, + "learning_rate": 9.000108074528802e-06, + "loss": 0.7563, "step": 11599 }, { - "epoch": 2.432375760117425, - "grad_norm": 5.047725538649979, - "learning_rate": 1.8197852485041479e-06, - "loss": 0.1637, + "epoch": 1.6379553798362045, + "grad_norm": 3.2407059015567685, + "learning_rate": 8.998591272956866e-06, + "loss": 0.5158, "step": 11600 }, { - "epoch": 2.4325854476829525, - "grad_norm": 3.9681602273539385, - "learning_rate": 1.8184832996139501e-06, - "loss": 0.1274, + "epoch": 1.6380965828861904, + "grad_norm": 3.744898450661428, + "learning_rate": 8.997074494657242e-06, + "loss": 0.6524, "step": 11601 }, { - "epoch": 2.4327951352484796, - "grad_norm": 3.789515392775734, - "learning_rate": 1.8171817700406736e-06, - "loss": 0.159, + "epoch": 1.6382377859361763, + "grad_norm": 3.492412176349796, + "learning_rate": 8.995557739665172e-06, + "loss": 0.6129, "step": 11602 }, { - "epoch": 2.433004822814007, - "grad_norm": 4.015874604666652, - "learning_rate": 1.8158806598510204e-06, - "loss": 0.1092, + "epoch": 1.6383789889861622, + "grad_norm": 3.8879497258602815, + "learning_rate": 8.994041008015906e-06, + "loss": 0.6497, "step": 11603 }, { - "epoch": 2.4332145103795346, - "grad_norm": 4.872027005285861, - "learning_rate": 1.8145799691116805e-06, - "loss": 0.1606, + "epoch": 1.638520192036148, + "grad_norm": 3.6641362988110395, + "learning_rate": 8.99252429974469e-06, + "loss": 0.589, "step": 11604 }, { - "epoch": 2.4334241979450617, - "grad_norm": 3.6391003911024296, - "learning_rate": 1.813279697889313e-06, - "loss": 0.1051, + "epoch": 1.638661395086134, + "grad_norm": 2.5586819885690337, + "learning_rate": 8.991007614886778e-06, + "loss": 0.3787, "step": 11605 }, { - "epoch": 2.4336338855105892, - "grad_norm": 7.050559231916166, - "learning_rate": 1.8119798462505612e-06, - "loss": 0.1811, + "epoch": 1.6388025981361198, + "grad_norm": 3.2974941042258696, + "learning_rate": 8.989490953477413e-06, + "loss": 0.5852, "step": 11606 }, { - "epoch": 2.4338435730761168, - "grad_norm": 5.102285905243485, - "learning_rate": 1.8106804142620449e-06, - "loss": 0.1499, + "epoch": 1.6389438011861057, + "grad_norm": 3.7120917522293926, + "learning_rate": 8.987974315551838e-06, + "loss": 0.5774, "step": 11607 }, { - "epoch": 2.434053260641644, - "grad_norm": 4.318424877069384, - "learning_rate": 1.8093814019903577e-06, - "loss": 0.1405, + "epoch": 1.6390850042360916, + "grad_norm": 3.8831475074416586, + "learning_rate": 8.98645770114531e-06, + "loss": 0.5806, "step": 11608 }, { - "epoch": 2.4342629482071714, - "grad_norm": 4.874774410112969, - "learning_rate": 1.8080828095020819e-06, - "loss": 0.1543, + "epoch": 1.6392262072860775, + "grad_norm": 3.140453945275205, + "learning_rate": 8.984941110293061e-06, + "loss": 0.4618, "step": 11609 }, { - "epoch": 2.434472635772699, - "grad_norm": 4.289148330915365, - "learning_rate": 1.806784636863771e-06, - "loss": 0.1558, + "epoch": 1.6393674103360634, + "grad_norm": 3.567525637544053, + "learning_rate": 8.983424543030344e-06, + "loss": 0.6199, "step": 11610 }, { - "epoch": 2.434682323338226, - "grad_norm": 3.552246822868351, - "learning_rate": 1.8054868841419549e-06, - "loss": 0.1493, + "epoch": 1.6395086133860493, + "grad_norm": 3.173021321515205, + "learning_rate": 8.981907999392403e-06, + "loss": 0.4828, "step": 11611 }, { - "epoch": 2.4348920109037535, - "grad_norm": 3.7513602592361384, - "learning_rate": 1.8041895514031493e-06, - "loss": 0.1298, + "epoch": 1.6396498164360351, + "grad_norm": 3.507700498808961, + "learning_rate": 8.980391479414478e-06, + "loss": 0.5557, "step": 11612 }, { - "epoch": 2.4351016984692806, - "grad_norm": 4.905802152210184, - "learning_rate": 1.8028926387138456e-06, - "loss": 0.1328, + "epoch": 1.639791019486021, + "grad_norm": 3.8604184832843202, + "learning_rate": 8.978874983131816e-06, + "loss": 0.5883, "step": 11613 }, { - "epoch": 2.435311386034808, - "grad_norm": 4.170007899332841, - "learning_rate": 1.8015961461405118e-06, - "loss": 0.1453, + "epoch": 1.639932222536007, + "grad_norm": 4.217393082613377, + "learning_rate": 8.977358510579658e-06, + "loss": 0.6407, "step": 11614 }, { - "epoch": 2.4355210736003357, - "grad_norm": 3.5796486863115904, - "learning_rate": 1.8003000737495923e-06, - "loss": 0.1101, + "epoch": 1.6400734255859928, + "grad_norm": 2.904952332249058, + "learning_rate": 8.975842061793247e-06, + "loss": 0.4347, "step": 11615 }, { - "epoch": 2.4357307611658627, - "grad_norm": 3.947394739230773, - "learning_rate": 1.7990044216075186e-06, - "loss": 0.1382, + "epoch": 1.6402146286359787, + "grad_norm": 3.7249708979730327, + "learning_rate": 8.974325636807826e-06, + "loss": 0.5295, "step": 11616 }, { - "epoch": 2.4359404487313903, - "grad_norm": 3.783281274405878, - "learning_rate": 1.7977091897806886e-06, - "loss": 0.1009, + "epoch": 1.6403558316859645, + "grad_norm": 3.2386950885444206, + "learning_rate": 8.972809235658631e-06, + "loss": 0.6062, "step": 11617 }, { - "epoch": 2.4361501362969173, - "grad_norm": 5.792851576131324, - "learning_rate": 1.7964143783354925e-06, - "loss": 0.1719, + "epoch": 1.6404970347359504, + "grad_norm": 3.6741575389414862, + "learning_rate": 8.971292858380908e-06, + "loss": 0.5964, "step": 11618 }, { - "epoch": 2.436359823862445, - "grad_norm": 3.5958710621670336, - "learning_rate": 1.7951199873382873e-06, - "loss": 0.1394, + "epoch": 1.6406382377859363, + "grad_norm": 3.028847217426916, + "learning_rate": 8.969776505009894e-06, + "loss": 0.5197, "step": 11619 }, { - "epoch": 2.4365695114279724, - "grad_norm": 5.096906359643023, - "learning_rate": 1.7938260168554112e-06, - "loss": 0.1876, + "epoch": 1.6407794408359222, + "grad_norm": 3.6453250109596493, + "learning_rate": 8.96826017558083e-06, + "loss": 0.6152, "step": 11620 }, { - "epoch": 2.4367791989934995, - "grad_norm": 4.62530718972109, - "learning_rate": 1.7925324669531853e-06, - "loss": 0.1422, + "epoch": 1.640920643885908, + "grad_norm": 3.75253356038471, + "learning_rate": 8.96674387012895e-06, + "loss": 0.5456, "step": 11621 }, { - "epoch": 2.436988886559027, - "grad_norm": 3.084964368189745, - "learning_rate": 1.7912393376979064e-06, - "loss": 0.1235, + "epoch": 1.641061846935894, + "grad_norm": 3.0418476777132364, + "learning_rate": 8.9652275886895e-06, + "loss": 0.465, "step": 11622 }, { - "epoch": 2.4371985741245545, - "grad_norm": 4.563698145888654, - "learning_rate": 1.7899466291558499e-06, - "loss": 0.1476, + "epoch": 1.6412030499858798, + "grad_norm": 3.3677574073742202, + "learning_rate": 8.963711331297713e-06, + "loss": 0.5191, "step": 11623 }, { - "epoch": 2.4374082616900816, - "grad_norm": 4.126461705233828, - "learning_rate": 1.7886543413932655e-06, - "loss": 0.142, + "epoch": 1.6413442530358657, + "grad_norm": 4.186626254873003, + "learning_rate": 8.962195097988825e-06, + "loss": 0.6796, "step": 11624 }, { - "epoch": 2.437617949255609, - "grad_norm": 3.659989552813751, - "learning_rate": 1.7873624744763907e-06, - "loss": 0.1474, + "epoch": 1.6414854560858516, + "grad_norm": 3.905694083038733, + "learning_rate": 8.960678888798082e-06, + "loss": 0.5876, "step": 11625 }, { - "epoch": 2.4378276368211367, - "grad_norm": 3.50288718532517, - "learning_rate": 1.7860710284714334e-06, - "loss": 0.0753, + "epoch": 1.6416266591358375, + "grad_norm": 3.3703755527821873, + "learning_rate": 8.959162703760706e-06, + "loss": 0.5089, "step": 11626 }, { - "epoch": 2.4380373243866638, - "grad_norm": 3.6278997167168217, - "learning_rate": 1.7847800034445784e-06, - "loss": 0.1217, + "epoch": 1.6417678621858234, + "grad_norm": 3.381486405865705, + "learning_rate": 8.95764654291194e-06, + "loss": 0.5659, "step": 11627 }, { - "epoch": 2.4382470119521913, - "grad_norm": 4.662845521691005, - "learning_rate": 1.7834893994620007e-06, - "loss": 0.1611, + "epoch": 1.641909065235809, + "grad_norm": 3.4977429865652176, + "learning_rate": 8.95613040628702e-06, + "loss": 0.4606, "step": 11628 }, { - "epoch": 2.438456699517719, - "grad_norm": 4.617434001085566, - "learning_rate": 1.7821992165898394e-06, - "loss": 0.1549, + "epoch": 1.642050268285795, + "grad_norm": 3.368360610302634, + "learning_rate": 8.954614293921175e-06, + "loss": 0.4917, "step": 11629 }, { - "epoch": 2.438666387083246, - "grad_norm": 6.2793805770872035, - "learning_rate": 1.780909454894223e-06, - "loss": 0.2151, + "epoch": 1.6421914713357808, + "grad_norm": 3.4681809707327536, + "learning_rate": 8.953098205849647e-06, + "loss": 0.5574, "step": 11630 }, { - "epoch": 2.4388760746487734, - "grad_norm": 4.174854093002755, - "learning_rate": 1.779620114441253e-06, - "loss": 0.1312, + "epoch": 1.6423326743857667, + "grad_norm": 3.5086713162334395, + "learning_rate": 8.951582142107663e-06, + "loss": 0.5032, "step": 11631 }, { - "epoch": 2.4390857622143005, - "grad_norm": 5.598812009577553, - "learning_rate": 1.7783311952970072e-06, - "loss": 0.1582, + "epoch": 1.6424738774357526, + "grad_norm": 4.321532822876115, + "learning_rate": 8.950066102730456e-06, + "loss": 0.5961, "step": 11632 }, { - "epoch": 2.439295449779828, - "grad_norm": 4.533099904707504, - "learning_rate": 1.7770426975275468e-06, - "loss": 0.1523, + "epoch": 1.6426150804857385, + "grad_norm": 3.032603171434424, + "learning_rate": 8.948550087753263e-06, + "loss": 0.4251, "step": 11633 }, { - "epoch": 2.4395051373453556, - "grad_norm": 3.5368892804206116, - "learning_rate": 1.775754621198914e-06, - "loss": 0.1339, + "epoch": 1.6427562835357243, + "grad_norm": 4.386802401397069, + "learning_rate": 8.947034097211309e-06, + "loss": 0.6617, "step": 11634 }, { - "epoch": 2.4397148249108827, - "grad_norm": 4.094024981456998, - "learning_rate": 1.7744669663771198e-06, - "loss": 0.1623, + "epoch": 1.6428974865857102, + "grad_norm": 3.0251192919397756, + "learning_rate": 8.945518131139826e-06, + "loss": 0.4673, "step": 11635 }, { - "epoch": 2.43992451247641, - "grad_norm": 4.536694867982867, - "learning_rate": 1.7731797331281574e-06, - "loss": 0.1901, + "epoch": 1.643038689635696, + "grad_norm": 4.079550771367353, + "learning_rate": 8.944002189574047e-06, + "loss": 0.6717, "step": 11636 }, { - "epoch": 2.4401342000419377, - "grad_norm": 2.9432722757813945, - "learning_rate": 1.771892921518006e-06, - "loss": 0.1033, + "epoch": 1.643179892685682, + "grad_norm": 3.626939632349534, + "learning_rate": 8.942486272549203e-06, + "loss": 0.5937, "step": 11637 }, { - "epoch": 2.440343887607465, - "grad_norm": 5.398432769752616, - "learning_rate": 1.7706065316126097e-06, - "loss": 0.1388, + "epoch": 1.6433210957356679, + "grad_norm": 3.413849097910186, + "learning_rate": 8.94097038010052e-06, + "loss": 0.582, "step": 11638 }, { - "epoch": 2.4405535751729923, - "grad_norm": 3.6611296986210267, - "learning_rate": 1.7693205634779054e-06, - "loss": 0.1087, + "epoch": 1.6434622987856538, + "grad_norm": 3.4935634423786794, + "learning_rate": 8.939454512263228e-06, + "loss": 0.5863, "step": 11639 }, { - "epoch": 2.4407632627385194, - "grad_norm": 3.815553052200072, - "learning_rate": 1.7680350171797966e-06, - "loss": 0.1199, + "epoch": 1.6436035018356396, + "grad_norm": 3.563872413321058, + "learning_rate": 8.937938669072557e-06, + "loss": 0.5334, "step": 11640 }, { - "epoch": 2.440972950304047, - "grad_norm": 3.746946118409033, - "learning_rate": 1.7667498927841686e-06, - "loss": 0.1459, + "epoch": 1.6437447048856255, + "grad_norm": 3.492020843098611, + "learning_rate": 8.936422850563728e-06, + "loss": 0.5588, "step": 11641 }, { - "epoch": 2.4411826378695745, - "grad_norm": 3.50954775367525, - "learning_rate": 1.7654651903568887e-06, - "loss": 0.1268, + "epoch": 1.6438859079356114, + "grad_norm": 3.7166154543145944, + "learning_rate": 8.93490705677198e-06, + "loss": 0.5949, "step": 11642 }, { - "epoch": 2.4413923254351015, - "grad_norm": 4.338792428491563, - "learning_rate": 1.7641809099638052e-06, - "loss": 0.1437, + "epoch": 1.6440271109855973, + "grad_norm": 2.864905491591639, + "learning_rate": 8.933391287732527e-06, + "loss": 0.5244, "step": 11643 }, { - "epoch": 2.441602013000629, - "grad_norm": 4.251083704147908, - "learning_rate": 1.7628970516707288e-06, - "loss": 0.151, + "epoch": 1.6441683140355832, + "grad_norm": 3.5001341231649548, + "learning_rate": 8.931875543480601e-06, + "loss": 0.4966, "step": 11644 }, { - "epoch": 2.4418117005661566, - "grad_norm": 3.5984200401758395, - "learning_rate": 1.761613615543465e-06, - "loss": 0.1294, + "epoch": 1.644309517085569, + "grad_norm": 3.673204516114982, + "learning_rate": 8.930359824051427e-06, + "loss": 0.5514, "step": 11645 }, { - "epoch": 2.4420213881316837, - "grad_norm": 4.069162601956504, - "learning_rate": 1.760330601647794e-06, - "loss": 0.15, + "epoch": 1.644450720135555, + "grad_norm": 3.212162899392053, + "learning_rate": 8.928844129480228e-06, + "loss": 0.4351, "step": 11646 }, { - "epoch": 2.442231075697211, - "grad_norm": 4.284091545195634, - "learning_rate": 1.7590480100494723e-06, - "loss": 0.1333, + "epoch": 1.6445919231855408, + "grad_norm": 3.8350700444536425, + "learning_rate": 8.927328459802227e-06, + "loss": 0.6544, "step": 11647 }, { - "epoch": 2.4424407632627387, - "grad_norm": 4.436216482089664, - "learning_rate": 1.7577658408142296e-06, - "loss": 0.1411, + "epoch": 1.6447331262355267, + "grad_norm": 3.390031999498588, + "learning_rate": 8.92581281505265e-06, + "loss": 0.4987, "step": 11648 }, { - "epoch": 2.442650450828266, - "grad_norm": 4.114813369168413, - "learning_rate": 1.7564840940077831e-06, - "loss": 0.131, + "epoch": 1.6448743292855126, + "grad_norm": 3.842945748515383, + "learning_rate": 8.924297195266721e-06, + "loss": 0.6313, "step": 11649 }, { - "epoch": 2.4428601383937933, - "grad_norm": 3.9510720990415544, - "learning_rate": 1.7552027696958273e-06, - "loss": 0.1407, + "epoch": 1.6450155323354985, + "grad_norm": 3.5834684887877595, + "learning_rate": 8.922781600479663e-06, + "loss": 0.5912, "step": 11650 }, { - "epoch": 2.4430698259593204, - "grad_norm": 4.254572443994705, - "learning_rate": 1.7539218679440295e-06, - "loss": 0.134, + "epoch": 1.6451567353854843, + "grad_norm": 3.68302714092901, + "learning_rate": 8.92126603072669e-06, + "loss": 0.5515, "step": 11651 }, { - "epoch": 2.443279513524848, - "grad_norm": 5.052339382643115, - "learning_rate": 1.752641388818035e-06, - "loss": 0.1316, + "epoch": 1.6452979384354702, + "grad_norm": 3.2596205954047557, + "learning_rate": 8.91975048604303e-06, + "loss": 0.5811, "step": 11652 }, { - "epoch": 2.4434892010903755, - "grad_norm": 3.4801378040600297, - "learning_rate": 1.7513613323834766e-06, - "loss": 0.1496, + "epoch": 1.6454391414854561, + "grad_norm": 4.130513608607756, + "learning_rate": 8.918234966463902e-06, + "loss": 0.6278, "step": 11653 }, { - "epoch": 2.4436988886559026, - "grad_norm": 4.474093318666903, - "learning_rate": 1.7500816987059533e-06, - "loss": 0.1641, + "epoch": 1.645580344535442, + "grad_norm": 3.190049769827903, + "learning_rate": 8.916719472024528e-06, + "loss": 0.5296, "step": 11654 }, { - "epoch": 2.44390857622143, - "grad_norm": 3.569491462606839, - "learning_rate": 1.7488024878510546e-06, - "loss": 0.1394, + "epoch": 1.6457215475854279, + "grad_norm": 3.3935515505738016, + "learning_rate": 8.915204002760123e-06, + "loss": 0.5551, "step": 11655 }, { - "epoch": 2.4441182637869576, - "grad_norm": 5.171505995008727, - "learning_rate": 1.7475236998843404e-06, - "loss": 0.151, + "epoch": 1.6458627506354138, + "grad_norm": 5.031111589292081, + "learning_rate": 8.91368855870591e-06, + "loss": 0.862, "step": 11656 }, { - "epoch": 2.4443279513524847, - "grad_norm": 3.8850534474147698, - "learning_rate": 1.746245334871346e-06, - "loss": 0.1212, + "epoch": 1.6460039536853996, + "grad_norm": 4.309704140982303, + "learning_rate": 8.912173139897107e-06, + "loss": 0.6718, "step": 11657 }, { - "epoch": 2.4445376389180122, - "grad_norm": 5.3519759157699385, - "learning_rate": 1.7449673928775934e-06, - "loss": 0.1692, + "epoch": 1.6461451567353855, + "grad_norm": 3.4717234683437352, + "learning_rate": 8.91065774636893e-06, + "loss": 0.5295, "step": 11658 }, { - "epoch": 2.4447473264835393, - "grad_norm": 4.2457292960975925, - "learning_rate": 1.7436898739685836e-06, - "loss": 0.1013, + "epoch": 1.6462863597853714, + "grad_norm": 3.73692920359234, + "learning_rate": 8.909142378156596e-06, + "loss": 0.6556, "step": 11659 }, { - "epoch": 2.444957014049067, - "grad_norm": 4.157043468905097, - "learning_rate": 1.7424127782097866e-06, - "loss": 0.1495, + "epoch": 1.6464275628353573, + "grad_norm": 3.728525017818683, + "learning_rate": 8.90762703529532e-06, + "loss": 0.593, "step": 11660 }, { - "epoch": 2.4451667016145944, - "grad_norm": 4.393449682972221, - "learning_rate": 1.7411361056666543e-06, - "loss": 0.1655, + "epoch": 1.6465687658853432, + "grad_norm": 3.9774084573431465, + "learning_rate": 8.906111717820322e-06, + "loss": 0.5665, "step": 11661 }, { - "epoch": 2.4453763891801215, - "grad_norm": 4.414633089717411, - "learning_rate": 1.7398598564046231e-06, - "loss": 0.1447, + "epoch": 1.6467099689353288, + "grad_norm": 4.791998849361074, + "learning_rate": 8.904596425766817e-06, + "loss": 0.9872, "step": 11662 }, { - "epoch": 2.445586076745649, - "grad_norm": 5.586456908572827, - "learning_rate": 1.7385840304890978e-06, - "loss": 0.1731, + "epoch": 1.6468511719853147, + "grad_norm": 3.125707216582831, + "learning_rate": 8.903081159170016e-06, + "loss": 0.5142, "step": 11663 }, { - "epoch": 2.4457957643111765, - "grad_norm": 2.948747927618289, - "learning_rate": 1.7373086279854734e-06, - "loss": 0.108, + "epoch": 1.6469923750353006, + "grad_norm": 3.5455043418769008, + "learning_rate": 8.901565918065134e-06, + "loss": 0.5645, "step": 11664 }, { - "epoch": 2.4460054518767036, - "grad_norm": 5.526856821536726, - "learning_rate": 1.7360336489591122e-06, - "loss": 0.1774, + "epoch": 1.6471335780852865, + "grad_norm": 3.8101465924274005, + "learning_rate": 8.900050702487386e-06, + "loss": 0.658, "step": 11665 }, { - "epoch": 2.446215139442231, - "grad_norm": 3.6775186599390577, - "learning_rate": 1.7347590934753566e-06, - "loss": 0.1115, + "epoch": 1.6472747811352724, + "grad_norm": 3.543847367677812, + "learning_rate": 8.898535512471986e-06, + "loss": 0.6415, "step": 11666 }, { - "epoch": 2.4464248270077587, - "grad_norm": 4.178274685953776, - "learning_rate": 1.733484961599533e-06, - "loss": 0.1372, + "epoch": 1.6474159841852583, + "grad_norm": 3.354424342347385, + "learning_rate": 8.897020348054147e-06, + "loss": 0.5487, "step": 11667 }, { - "epoch": 2.4466345145732857, - "grad_norm": 4.910626158879536, - "learning_rate": 1.7322112533969482e-06, - "loss": 0.1535, + "epoch": 1.6475571872352441, + "grad_norm": 3.35012041619421, + "learning_rate": 8.895505209269078e-06, + "loss": 0.5568, "step": 11668 }, { - "epoch": 2.4468442021388133, - "grad_norm": 4.471037286616529, - "learning_rate": 1.730937968932871e-06, - "loss": 0.1402, + "epoch": 1.64769839028523, + "grad_norm": 3.593323874560674, + "learning_rate": 8.893990096151986e-06, + "loss": 0.6238, "step": 11669 }, { - "epoch": 2.4470538897043403, - "grad_norm": 3.588733974631398, - "learning_rate": 1.7296651082725657e-06, - "loss": 0.1008, + "epoch": 1.647839593335216, + "grad_norm": 3.766345056822593, + "learning_rate": 8.89247500873809e-06, + "loss": 0.6142, "step": 11670 }, { - "epoch": 2.447263577269868, - "grad_norm": 4.311689490493596, - "learning_rate": 1.7283926714812693e-06, - "loss": 0.1601, + "epoch": 1.6479807963852018, + "grad_norm": 3.7747338031600552, + "learning_rate": 8.890959947062598e-06, + "loss": 0.572, "step": 11671 }, { - "epoch": 2.4474732648353954, - "grad_norm": 4.002955842729539, - "learning_rate": 1.727120658624195e-06, - "loss": 0.1578, + "epoch": 1.6481219994351877, + "grad_norm": 3.1198525118866662, + "learning_rate": 8.889444911160713e-06, + "loss": 0.4696, "step": 11672 }, { - "epoch": 2.4476829524009225, - "grad_norm": 4.291165418814481, - "learning_rate": 1.7258490697665331e-06, - "loss": 0.1432, + "epoch": 1.6482632024851736, + "grad_norm": 3.595561607069267, + "learning_rate": 8.887929901067652e-06, + "loss": 0.5346, "step": 11673 }, { - "epoch": 2.44789263996645, - "grad_norm": 5.26831003810725, - "learning_rate": 1.72457790497346e-06, - "loss": 0.184, + "epoch": 1.6484044055351594, + "grad_norm": 3.2421217537564884, + "learning_rate": 8.88641491681862e-06, + "loss": 0.5262, "step": 11674 }, { - "epoch": 2.4481023275319775, - "grad_norm": 3.570986338916178, - "learning_rate": 1.7233071643101195e-06, - "loss": 0.1128, + "epoch": 1.6485456085851453, + "grad_norm": 3.868888269725596, + "learning_rate": 8.884899958448828e-06, + "loss": 0.5866, "step": 11675 }, { - "epoch": 2.4483120150975046, - "grad_norm": 3.9987271604371264, - "learning_rate": 1.722036847841645e-06, - "loss": 0.1418, + "epoch": 1.6486868116351312, + "grad_norm": 4.077322405823822, + "learning_rate": 8.883385025993474e-06, + "loss": 0.6784, "step": 11676 }, { - "epoch": 2.448521702663032, - "grad_norm": 3.76620197488452, - "learning_rate": 1.7207669556331385e-06, - "loss": 0.134, + "epoch": 1.648828014685117, + "grad_norm": 2.962325793892677, + "learning_rate": 8.881870119487772e-06, + "loss": 0.4685, "step": 11677 }, { - "epoch": 2.4487313902285592, - "grad_norm": 3.698337422628714, - "learning_rate": 1.719497487749683e-06, - "loss": 0.1195, + "epoch": 1.648969217735103, + "grad_norm": 4.32559887484316, + "learning_rate": 8.880355238966923e-06, + "loss": 0.6224, "step": 11678 }, { - "epoch": 2.4489410777940868, - "grad_norm": 4.235277695964923, - "learning_rate": 1.7182284442563435e-06, - "loss": 0.1561, + "epoch": 1.6491104207850888, + "grad_norm": 3.7800636436025745, + "learning_rate": 8.878840384466137e-06, + "loss": 0.5824, "step": 11679 }, { - "epoch": 2.4491507653596143, - "grad_norm": 3.6130341874533607, - "learning_rate": 1.716959825218162e-06, - "loss": 0.1433, + "epoch": 1.6492516238350747, + "grad_norm": 3.070711491410998, + "learning_rate": 8.877325556020615e-06, + "loss": 0.4662, "step": 11680 }, { - "epoch": 2.4493604529251414, - "grad_norm": 4.189201744914129, - "learning_rate": 1.7156916307001558e-06, - "loss": 0.1545, + "epoch": 1.6493928268850606, + "grad_norm": 3.6052152716855383, + "learning_rate": 8.87581075366556e-06, + "loss": 0.504, "step": 11681 }, { - "epoch": 2.449570140490669, - "grad_norm": 3.5592516219431922, - "learning_rate": 1.7144238607673202e-06, - "loss": 0.0971, + "epoch": 1.6495340299350465, + "grad_norm": 4.023284854431615, + "learning_rate": 8.874295977436182e-06, + "loss": 0.67, "step": 11682 }, { - "epoch": 2.4497798280561964, - "grad_norm": 4.409566367736808, - "learning_rate": 1.7131565154846342e-06, - "loss": 0.1576, + "epoch": 1.6496752329850324, + "grad_norm": 3.756213994290018, + "learning_rate": 8.872781227367679e-06, + "loss": 0.6613, "step": 11683 }, { - "epoch": 2.4499895156217235, - "grad_norm": 4.795557195698707, - "learning_rate": 1.7118895949170466e-06, - "loss": 0.1423, + "epoch": 1.6498164360350183, + "grad_norm": 3.0313103004437907, + "learning_rate": 8.871266503495255e-06, + "loss": 0.3971, "step": 11684 }, { - "epoch": 2.450199203187251, - "grad_norm": 3.838838929487421, - "learning_rate": 1.710623099129496e-06, - "loss": 0.1372, + "epoch": 1.6499576390850041, + "grad_norm": 3.678960670940138, + "learning_rate": 8.869751805854107e-06, + "loss": 0.5223, "step": 11685 }, { - "epoch": 2.4504088907527786, - "grad_norm": 4.748323924801688, - "learning_rate": 1.7093570281868877e-06, - "loss": 0.1089, + "epoch": 1.65009884213499, + "grad_norm": 3.9941849530176667, + "learning_rate": 8.868237134479437e-06, + "loss": 0.6456, "step": 11686 }, { - "epoch": 2.4506185783183057, - "grad_norm": 5.3271736737080975, - "learning_rate": 1.708091382154109e-06, - "loss": 0.1601, + "epoch": 1.650240045184976, + "grad_norm": 3.295370068304338, + "learning_rate": 8.86672248940645e-06, + "loss": 0.4848, "step": 11687 }, { - "epoch": 2.450828265883833, - "grad_norm": 4.455878469225267, - "learning_rate": 1.7068261610960313e-06, - "loss": 0.1468, + "epoch": 1.6503812482349618, + "grad_norm": 3.7490172750075104, + "learning_rate": 8.865207870670342e-06, + "loss": 0.4955, "step": 11688 }, { - "epoch": 2.4510379534493603, - "grad_norm": 3.5607846836403882, - "learning_rate": 1.705561365077496e-06, - "loss": 0.1262, + "epoch": 1.6505224512849477, + "grad_norm": 4.505626935469949, + "learning_rate": 8.863693278306314e-06, + "loss": 0.6965, "step": 11689 }, { - "epoch": 2.451247641014888, - "grad_norm": 3.7169329686739547, - "learning_rate": 1.7042969941633247e-06, - "loss": 0.1213, + "epoch": 1.6506636543349336, + "grad_norm": 4.2173405537704, + "learning_rate": 8.862178712349562e-06, + "loss": 0.6328, "step": 11690 }, { - "epoch": 2.4514573285804153, - "grad_norm": 4.43642304382663, - "learning_rate": 1.703033048418321e-06, - "loss": 0.1482, + "epoch": 1.6508048573849194, + "grad_norm": 3.878776562027165, + "learning_rate": 8.860664172835285e-06, + "loss": 0.632, "step": 11691 }, { - "epoch": 2.4516670161459424, - "grad_norm": 5.800553639759782, - "learning_rate": 1.7017695279072655e-06, - "loss": 0.1445, + "epoch": 1.6509460604349053, + "grad_norm": 3.4190722635904525, + "learning_rate": 8.859149659798685e-06, + "loss": 0.4884, "step": 11692 }, { - "epoch": 2.45187670371147, - "grad_norm": 3.820853437237589, - "learning_rate": 1.700506432694915e-06, - "loss": 0.1373, + "epoch": 1.6510872634848912, + "grad_norm": 3.6837269323365742, + "learning_rate": 8.857635173274952e-06, + "loss": 0.5828, "step": 11693 }, { - "epoch": 2.4520863912769975, - "grad_norm": 3.574471631119868, - "learning_rate": 1.6992437628460012e-06, - "loss": 0.0946, + "epoch": 1.651228466534877, + "grad_norm": 4.891267917574343, + "learning_rate": 8.856120713299284e-06, + "loss": 0.6136, "step": 11694 }, { - "epoch": 2.4522960788425245, - "grad_norm": 3.8550870460287596, - "learning_rate": 1.6979815184252446e-06, - "loss": 0.122, + "epoch": 1.651369669584863, + "grad_norm": 3.5567123812305823, + "learning_rate": 8.854606279906874e-06, + "loss": 0.6207, "step": 11695 }, { - "epoch": 2.452505766408052, - "grad_norm": 4.39650800220711, - "learning_rate": 1.6967196994973322e-06, - "loss": 0.1391, + "epoch": 1.6515108726348489, + "grad_norm": 3.6809829824806513, + "learning_rate": 8.853091873132921e-06, + "loss": 0.5735, "step": 11696 }, { - "epoch": 2.452715453973579, - "grad_norm": 4.405001305073273, - "learning_rate": 1.6954583061269381e-06, - "loss": 0.1417, + "epoch": 1.6516520756848347, + "grad_norm": 2.8735230315058202, + "learning_rate": 8.851577493012617e-06, + "loss": 0.3971, "step": 11697 }, { - "epoch": 2.4529251415391067, - "grad_norm": 3.62682088591341, - "learning_rate": 1.6941973383787102e-06, - "loss": 0.1689, + "epoch": 1.6517932787348206, + "grad_norm": 3.3477124892043664, + "learning_rate": 8.850063139581156e-06, + "loss": 0.546, "step": 11698 }, { - "epoch": 2.453134829104634, - "grad_norm": 4.326610062497214, - "learning_rate": 1.6929367963172716e-06, - "loss": 0.1487, + "epoch": 1.6519344817848065, + "grad_norm": 3.8339880435060785, + "learning_rate": 8.848548812873731e-06, + "loss": 0.7709, "step": 11699 }, { - "epoch": 2.4533445166701613, - "grad_norm": 4.446067010239069, - "learning_rate": 1.6916766800072294e-06, - "loss": 0.1643, + "epoch": 1.6520756848347924, + "grad_norm": 3.6966808059271954, + "learning_rate": 8.847034512925536e-06, + "loss": 0.5491, "step": 11700 }, { - "epoch": 2.453554204235689, - "grad_norm": 3.733085875432683, - "learning_rate": 1.6904169895131705e-06, - "loss": 0.1421, + "epoch": 1.6522168878847783, + "grad_norm": 4.173640064468473, + "learning_rate": 8.845520239771763e-06, + "loss": 0.541, "step": 11701 }, { - "epoch": 2.4537638918012163, - "grad_norm": 4.979338794226402, - "learning_rate": 1.6891577248996528e-06, - "loss": 0.1677, + "epoch": 1.6523580909347642, + "grad_norm": 3.8786066277582245, + "learning_rate": 8.844005993447599e-06, + "loss": 0.6749, "step": 11702 }, { - "epoch": 2.4539735793667434, - "grad_norm": 4.564529022072222, - "learning_rate": 1.6878988862312141e-06, - "loss": 0.1345, + "epoch": 1.65249929398475, + "grad_norm": 3.5816801338100057, + "learning_rate": 8.842491773988234e-06, + "loss": 0.4931, "step": 11703 }, { - "epoch": 2.454183266932271, - "grad_norm": 3.7017955948351617, - "learning_rate": 1.6866404735723762e-06, - "loss": 0.124, + "epoch": 1.652640497034736, + "grad_norm": 2.9550820136021345, + "learning_rate": 8.840977581428863e-06, + "loss": 0.4373, "step": 11704 }, { - "epoch": 2.4543929544977985, - "grad_norm": 4.31256264758994, - "learning_rate": 1.6853824869876334e-06, - "loss": 0.1445, + "epoch": 1.6527817000847218, + "grad_norm": 3.9351023844619992, + "learning_rate": 8.839463415804672e-06, + "loss": 0.6827, "step": 11705 }, { - "epoch": 2.4546026420633256, - "grad_norm": 4.726608581160705, - "learning_rate": 1.6841249265414562e-06, - "loss": 0.1892, + "epoch": 1.6529229031347077, + "grad_norm": 3.080026457094292, + "learning_rate": 8.837949277150849e-06, + "loss": 0.4974, "step": 11706 }, { - "epoch": 2.454812329628853, - "grad_norm": 3.951149583094605, - "learning_rate": 1.6828677922983016e-06, - "loss": 0.1359, + "epoch": 1.6530641061846936, + "grad_norm": 3.650675087012112, + "learning_rate": 8.836435165502582e-06, + "loss": 0.6208, "step": 11707 }, { - "epoch": 2.45502201719438, - "grad_norm": 4.723496675944823, - "learning_rate": 1.6816110843225964e-06, - "loss": 0.1588, + "epoch": 1.6532053092346795, + "grad_norm": 3.9578076835115925, + "learning_rate": 8.83492108089506e-06, + "loss": 0.6134, "step": 11708 }, { - "epoch": 2.4552317047599077, - "grad_norm": 3.5675918903297243, - "learning_rate": 1.6803548026787531e-06, - "loss": 0.1402, + "epoch": 1.6533465122846653, + "grad_norm": 3.6011046259563826, + "learning_rate": 8.833407023363471e-06, + "loss": 0.5831, "step": 11709 }, { - "epoch": 2.4554413923254352, - "grad_norm": 3.5477777399110297, - "learning_rate": 1.6790989474311525e-06, - "loss": 0.103, + "epoch": 1.6534877153346512, + "grad_norm": 3.902404361282688, + "learning_rate": 8.831892992943e-06, + "loss": 0.6151, "step": 11710 }, { - "epoch": 2.4556510798909623, - "grad_norm": 5.043903318896898, - "learning_rate": 1.6778435186441655e-06, - "loss": 0.1773, + "epoch": 1.653628918384637, + "grad_norm": 3.9496105402365167, + "learning_rate": 8.83037898966883e-06, + "loss": 0.6642, "step": 11711 }, { - "epoch": 2.45586076745649, - "grad_norm": 3.9463900449576945, - "learning_rate": 1.6765885163821305e-06, - "loss": 0.1436, + "epoch": 1.653770121434623, + "grad_norm": 2.9187954365707043, + "learning_rate": 8.828865013576143e-06, + "loss": 0.5007, "step": 11712 }, { - "epoch": 2.4560704550220174, - "grad_norm": 4.345611977317838, - "learning_rate": 1.6753339407093716e-06, - "loss": 0.1377, + "epoch": 1.6539113244846089, + "grad_norm": 3.917949187995686, + "learning_rate": 8.827351064700131e-06, + "loss": 0.7263, "step": 11713 }, { - "epoch": 2.4562801425875445, - "grad_norm": 4.484473670769457, - "learning_rate": 1.6740797916901875e-06, - "loss": 0.1516, + "epoch": 1.6540525275345948, + "grad_norm": 5.595735320794007, + "learning_rate": 8.825837143075973e-06, + "loss": 0.8281, "step": 11714 }, { - "epoch": 2.456489830153072, - "grad_norm": 3.988734787420637, - "learning_rate": 1.672826069388852e-06, - "loss": 0.1699, + "epoch": 1.6541937305845806, + "grad_norm": 3.8497917584624224, + "learning_rate": 8.82432324873885e-06, + "loss": 0.6201, "step": 11715 }, { - "epoch": 2.456699517718599, - "grad_norm": 3.379006212056817, - "learning_rate": 1.671572773869623e-06, - "loss": 0.1209, + "epoch": 1.6543349336345665, + "grad_norm": 3.508345806215123, + "learning_rate": 8.822809381723952e-06, + "loss": 0.5706, "step": 11716 }, { - "epoch": 2.4569092052841266, - "grad_norm": 3.2239407754406737, - "learning_rate": 1.6703199051967368e-06, - "loss": 0.1114, + "epoch": 1.6544761366845524, + "grad_norm": 3.5817064557877147, + "learning_rate": 8.821295542066452e-06, + "loss": 0.5832, "step": 11717 }, { - "epoch": 2.457118892849654, - "grad_norm": 4.609798421444875, - "learning_rate": 1.6690674634344028e-06, - "loss": 0.1484, + "epoch": 1.6546173397345383, + "grad_norm": 3.226557789293554, + "learning_rate": 8.81978172980154e-06, + "loss": 0.4655, "step": 11718 }, { - "epoch": 2.457328580415181, - "grad_norm": 3.825286887146204, - "learning_rate": 1.667815448646808e-06, - "loss": 0.1322, + "epoch": 1.6547585427845242, + "grad_norm": 5.145284125784873, + "learning_rate": 8.818267944964387e-06, + "loss": 0.7388, "step": 11719 }, { - "epoch": 2.4575382679807087, - "grad_norm": 4.019395236386977, - "learning_rate": 1.666563860898125e-06, - "loss": 0.1508, + "epoch": 1.65489974583451, + "grad_norm": 3.4128155067648263, + "learning_rate": 8.816754187590175e-06, + "loss": 0.5788, "step": 11720 }, { - "epoch": 2.4577479555462363, - "grad_norm": 3.8144259825413203, - "learning_rate": 1.665312700252496e-06, - "loss": 0.1356, + "epoch": 1.655040948884496, + "grad_norm": 3.7915122683491105, + "learning_rate": 8.815240457714086e-06, + "loss": 0.5236, "step": 11721 }, { - "epoch": 2.4579576431117633, - "grad_norm": 3.903707468966791, - "learning_rate": 1.6640619667740487e-06, - "loss": 0.148, + "epoch": 1.6551821519344818, + "grad_norm": 3.669524406192264, + "learning_rate": 8.813726755371298e-06, + "loss": 0.5782, "step": 11722 }, { - "epoch": 2.458167330677291, - "grad_norm": 5.474609011800797, - "learning_rate": 1.662811660526884e-06, - "loss": 0.1788, + "epoch": 1.6553233549844677, + "grad_norm": 3.1938244798423856, + "learning_rate": 8.812213080596988e-06, + "loss": 0.6074, "step": 11723 }, { - "epoch": 2.4583770182428184, - "grad_norm": 3.795217878221053, - "learning_rate": 1.6615617815750785e-06, - "loss": 0.1367, + "epoch": 1.6554645580344536, + "grad_norm": 3.135169879581096, + "learning_rate": 8.81069943342633e-06, + "loss": 0.4707, "step": 11724 }, { - "epoch": 2.4585867058083455, - "grad_norm": 4.186073310279482, - "learning_rate": 1.6603123299826952e-06, - "loss": 0.1121, + "epoch": 1.6556057610844395, + "grad_norm": 2.9475954738749115, + "learning_rate": 8.809185813894507e-06, + "loss": 0.5006, "step": 11725 }, { - "epoch": 2.458796393373873, - "grad_norm": 5.051326217628293, - "learning_rate": 1.6590633058137739e-06, - "loss": 0.1771, + "epoch": 1.6557469641344253, + "grad_norm": 3.396585108213299, + "learning_rate": 8.807672222036692e-06, + "loss": 0.535, "step": 11726 }, { - "epoch": 2.4590060809394005, - "grad_norm": 2.859400860563314, - "learning_rate": 1.6578147091323194e-06, - "loss": 0.1112, + "epoch": 1.6558881671844112, + "grad_norm": 3.695789634824404, + "learning_rate": 8.806158657888058e-06, + "loss": 0.5993, "step": 11727 }, { - "epoch": 2.4592157685049276, - "grad_norm": 4.076957558715104, - "learning_rate": 1.65656654000233e-06, - "loss": 0.1504, + "epoch": 1.6560293702343971, + "grad_norm": 4.480429315692619, + "learning_rate": 8.804645121483781e-06, + "loss": 0.6998, "step": 11728 }, { - "epoch": 2.459425456070455, - "grad_norm": 4.974530500416484, - "learning_rate": 1.6553187984877794e-06, - "loss": 0.1432, + "epoch": 1.656170573284383, + "grad_norm": 3.976790134445273, + "learning_rate": 8.803131612859034e-06, + "loss": 0.5743, "step": 11729 }, { - "epoch": 2.4596351436359822, - "grad_norm": 6.832899218295099, - "learning_rate": 1.654071484652613e-06, - "loss": 0.2114, + "epoch": 1.6563117763343689, + "grad_norm": 3.199168761040424, + "learning_rate": 8.801618132048992e-06, + "loss": 0.4855, "step": 11730 }, { - "epoch": 2.4598448312015098, - "grad_norm": 3.425279238123437, - "learning_rate": 1.652824598560756e-06, - "loss": 0.1393, + "epoch": 1.6564529793843548, + "grad_norm": 3.847428251853948, + "learning_rate": 8.80010467908883e-06, + "loss": 0.6054, "step": 11731 }, { - "epoch": 2.4600545187670373, - "grad_norm": 4.798244783013604, - "learning_rate": 1.6515781402761178e-06, - "loss": 0.1795, + "epoch": 1.6565941824343406, + "grad_norm": 4.088439338516689, + "learning_rate": 8.798591254013712e-06, + "loss": 0.7225, "step": 11732 }, { - "epoch": 2.4602642063325644, - "grad_norm": 5.160155677326122, - "learning_rate": 1.650332109862578e-06, - "loss": 0.1755, + "epoch": 1.6567353854843265, + "grad_norm": 3.9550393005834454, + "learning_rate": 8.797077856858817e-06, + "loss": 0.5615, "step": 11733 }, { - "epoch": 2.460473893898092, - "grad_norm": 4.55573336884829, - "learning_rate": 1.6490865073840012e-06, - "loss": 0.1371, + "epoch": 1.6568765885343124, + "grad_norm": 3.7456608460058947, + "learning_rate": 8.795564487659313e-06, + "loss": 0.7042, "step": 11734 }, { - "epoch": 2.460683581463619, - "grad_norm": 4.689444648769969, - "learning_rate": 1.6478413329042241e-06, - "loss": 0.1581, + "epoch": 1.6570177915842983, + "grad_norm": 3.3174618500262616, + "learning_rate": 8.794051146450374e-06, + "loss": 0.6028, "step": 11735 }, { - "epoch": 2.4608932690291465, - "grad_norm": 5.4201384224461435, - "learning_rate": 1.6465965864870636e-06, - "loss": 0.1497, + "epoch": 1.6571589946342842, + "grad_norm": 3.262161968653483, + "learning_rate": 8.792537833267161e-06, + "loss": 0.5298, "step": 11736 }, { - "epoch": 2.461102956594674, - "grad_norm": 3.6287365004064465, - "learning_rate": 1.6453522681963164e-06, - "loss": 0.1536, + "epoch": 1.65730019768427, + "grad_norm": 3.49527466674295, + "learning_rate": 8.79102454814485e-06, + "loss": 0.5388, "step": 11737 }, { - "epoch": 2.461312644160201, - "grad_norm": 3.6616532466385783, - "learning_rate": 1.6441083780957568e-06, - "loss": 0.1278, + "epoch": 1.657441400734256, + "grad_norm": 3.223092387332322, + "learning_rate": 8.789511291118601e-06, + "loss": 0.5795, "step": 11738 }, { - "epoch": 2.4615223317257287, - "grad_norm": 3.6914578769747046, - "learning_rate": 1.642864916249136e-06, - "loss": 0.1445, + "epoch": 1.6575826037842418, + "grad_norm": 3.6545271326870457, + "learning_rate": 8.787998062223593e-06, + "loss": 0.5544, "step": 11739 }, { - "epoch": 2.461732019291256, - "grad_norm": 3.6663745713038933, - "learning_rate": 1.6416218827201813e-06, - "loss": 0.1338, + "epoch": 1.6577238068342277, + "grad_norm": 3.3542400054261834, + "learning_rate": 8.786484861494984e-06, + "loss": 0.6543, "step": 11740 }, { - "epoch": 2.4619417068567833, - "grad_norm": 3.833988929727368, - "learning_rate": 1.6403792775726035e-06, - "loss": 0.1182, + "epoch": 1.6578650098842136, + "grad_norm": 3.796930617899137, + "learning_rate": 8.78497168896794e-06, + "loss": 0.6668, "step": 11741 }, { - "epoch": 2.462151394422311, - "grad_norm": 4.423237581815354, - "learning_rate": 1.6391371008700852e-06, - "loss": 0.1395, + "epoch": 1.6580062129341995, + "grad_norm": 3.715921028684624, + "learning_rate": 8.783458544677633e-06, + "loss": 0.5827, "step": 11742 }, { - "epoch": 2.4623610819878383, - "grad_norm": 4.189313452066283, - "learning_rate": 1.637895352676293e-06, - "loss": 0.1387, + "epoch": 1.6581474159841854, + "grad_norm": 3.297186870601694, + "learning_rate": 8.781945428659225e-06, + "loss": 0.622, "step": 11743 }, { - "epoch": 2.4625707695533654, - "grad_norm": 4.3764190239440515, - "learning_rate": 1.6366540330548686e-06, - "loss": 0.1372, + "epoch": 1.6582886190341712, + "grad_norm": 3.017646598608677, + "learning_rate": 8.780432340947879e-06, + "loss": 0.504, "step": 11744 }, { - "epoch": 2.462780457118893, - "grad_norm": 5.014165012194932, - "learning_rate": 1.6354131420694276e-06, - "loss": 0.1585, + "epoch": 1.6584298220841571, + "grad_norm": 3.261233009803769, + "learning_rate": 8.778919281578758e-06, + "loss": 0.5652, "step": 11745 }, { - "epoch": 2.4629901446844205, - "grad_norm": 5.23412055920183, - "learning_rate": 1.63417267978357e-06, - "loss": 0.1455, + "epoch": 1.658571025134143, + "grad_norm": 3.4242993367409182, + "learning_rate": 8.777406250587021e-06, + "loss": 0.5675, "step": 11746 }, { - "epoch": 2.4631998322499475, - "grad_norm": 3.5166038732592604, - "learning_rate": 1.6329326462608774e-06, - "loss": 0.154, + "epoch": 1.658712228184129, + "grad_norm": 3.5736461229099943, + "learning_rate": 8.77589324800784e-06, + "loss": 0.5426, "step": 11747 }, { - "epoch": 2.463409519815475, - "grad_norm": 3.724216827066388, - "learning_rate": 1.6316930415648946e-06, - "loss": 0.1353, + "epoch": 1.6588534312341148, + "grad_norm": 3.608884599784015, + "learning_rate": 8.77438027387637e-06, + "loss": 0.6978, "step": 11748 }, { - "epoch": 2.463619207381002, - "grad_norm": 3.095427442032372, - "learning_rate": 1.6304538657591562e-06, - "loss": 0.096, + "epoch": 1.6589946342841007, + "grad_norm": 3.551761593462726, + "learning_rate": 8.772867328227773e-06, + "loss": 0.5274, "step": 11749 }, { - "epoch": 2.4638288949465297, - "grad_norm": 4.933847781004414, - "learning_rate": 1.629215118907177e-06, - "loss": 0.1683, + "epoch": 1.6591358373340865, + "grad_norm": 2.7374876868902667, + "learning_rate": 8.771354411097207e-06, + "loss": 0.4503, "step": 11750 }, { - "epoch": 2.464038582512057, - "grad_norm": 4.997692151281342, - "learning_rate": 1.6279768010724405e-06, - "loss": 0.1609, + "epoch": 1.6592770403840724, + "grad_norm": 3.2416659758678805, + "learning_rate": 8.769841522519835e-06, + "loss": 0.5158, "step": 11751 }, { - "epoch": 2.4642482700775843, - "grad_norm": 3.474351926937195, - "learning_rate": 1.626738912318412e-06, - "loss": 0.1134, + "epoch": 1.6594182434340583, + "grad_norm": 3.8179168465472646, + "learning_rate": 8.768328662530818e-06, + "loss": 0.5645, "step": 11752 }, { - "epoch": 2.464457957643112, - "grad_norm": 3.8562070006652234, - "learning_rate": 1.625501452708539e-06, - "loss": 0.153, + "epoch": 1.6595594464840442, + "grad_norm": 3.2572112471660035, + "learning_rate": 8.76681583116531e-06, + "loss": 0.4985, "step": 11753 }, { - "epoch": 2.464667645208639, - "grad_norm": 3.0809851411868796, - "learning_rate": 1.6242644223062388e-06, - "loss": 0.0937, + "epoch": 1.65970064953403, + "grad_norm": 2.7813644352470885, + "learning_rate": 8.765303028458468e-06, + "loss": 0.4021, "step": 11754 }, { - "epoch": 2.4648773327741664, - "grad_norm": 4.23543577723989, - "learning_rate": 1.623027821174916e-06, - "loss": 0.1313, + "epoch": 1.659841852584016, + "grad_norm": 3.285496379366319, + "learning_rate": 8.763790254445448e-06, + "loss": 0.5305, "step": 11755 }, { - "epoch": 2.465087020339694, - "grad_norm": 5.446591071000503, - "learning_rate": 1.6217916493779473e-06, - "loss": 0.1768, + "epoch": 1.6599830556340018, + "grad_norm": 4.624296195793952, + "learning_rate": 8.762277509161413e-06, + "loss": 0.5827, "step": 11756 }, { - "epoch": 2.465296707905221, - "grad_norm": 3.1987959570885462, - "learning_rate": 1.6205559069786847e-06, - "loss": 0.1279, + "epoch": 1.6601242586839877, + "grad_norm": 2.9375287558686876, + "learning_rate": 8.760764792641512e-06, + "loss": 0.5053, "step": 11757 }, { - "epoch": 2.4655063954707486, - "grad_norm": 4.173271586019623, - "learning_rate": 1.6193205940404656e-06, - "loss": 0.1459, + "epoch": 1.6602654617339736, + "grad_norm": 3.0139769776682126, + "learning_rate": 8.7592521049209e-06, + "loss": 0.4673, "step": 11758 }, { - "epoch": 2.465716083036276, - "grad_norm": 3.8808073971218806, - "learning_rate": 1.6180857106266045e-06, - "loss": 0.1158, + "epoch": 1.6604066647839595, + "grad_norm": 3.011707378070552, + "learning_rate": 8.757739446034737e-06, + "loss": 0.482, "step": 11759 }, { - "epoch": 2.465925770601803, - "grad_norm": 3.890823200143949, - "learning_rate": 1.616851256800387e-06, - "loss": 0.0957, + "epoch": 1.6605478678339454, + "grad_norm": 3.7547121265718975, + "learning_rate": 8.756226816018172e-06, + "loss": 0.6058, "step": 11760 }, { - "epoch": 2.4661354581673307, - "grad_norm": 4.7218539451389105, - "learning_rate": 1.6156172326250818e-06, - "loss": 0.1753, + "epoch": 1.6606890708839313, + "grad_norm": 3.5372775443242768, + "learning_rate": 8.75471421490636e-06, + "loss": 0.5591, "step": 11761 }, { - "epoch": 2.4663451457328582, - "grad_norm": 6.244042513007644, - "learning_rate": 1.6143836381639366e-06, - "loss": 0.1826, + "epoch": 1.6608302739339171, + "grad_norm": 3.153396252381114, + "learning_rate": 8.75320164273445e-06, + "loss": 0.4684, "step": 11762 }, { - "epoch": 2.4665548332983853, - "grad_norm": 3.9792557462414972, - "learning_rate": 1.6131504734801716e-06, - "loss": 0.1677, + "epoch": 1.660971476983903, + "grad_norm": 3.1864200232606636, + "learning_rate": 8.751689099537592e-06, + "loss": 0.4585, "step": 11763 }, { - "epoch": 2.466764520863913, - "grad_norm": 3.9360357046038206, - "learning_rate": 1.6119177386369943e-06, - "loss": 0.155, + "epoch": 1.6611126800338887, + "grad_norm": 3.1863992697492174, + "learning_rate": 8.750176585350945e-06, + "loss": 0.5074, "step": 11764 }, { - "epoch": 2.4669742084294404, - "grad_norm": 4.0894509689794685, - "learning_rate": 1.6106854336975808e-06, - "loss": 0.1581, + "epoch": 1.6612538830838746, + "grad_norm": 3.258180484799217, + "learning_rate": 8.748664100209652e-06, + "loss": 0.4792, "step": 11765 }, { - "epoch": 2.4671838959949675, - "grad_norm": 4.56045207919616, - "learning_rate": 1.609453558725087e-06, - "loss": 0.1204, + "epoch": 1.6613950861338604, + "grad_norm": 3.3518429115544275, + "learning_rate": 8.747151644148867e-06, + "loss": 0.4493, "step": 11766 }, { - "epoch": 2.467393583560495, - "grad_norm": 4.113545949616134, - "learning_rate": 1.6082221137826538e-06, - "loss": 0.1618, + "epoch": 1.6615362891838463, + "grad_norm": 3.1954167115768985, + "learning_rate": 8.745639217203733e-06, + "loss": 0.4653, "step": 11767 }, { - "epoch": 2.467603271126022, - "grad_norm": 3.3431955946339924, - "learning_rate": 1.6069910989333915e-06, - "loss": 0.1302, + "epoch": 1.6616774922338322, + "grad_norm": 3.888409935697885, + "learning_rate": 8.744126819409405e-06, + "loss": 0.6661, "step": 11768 }, { - "epoch": 2.4678129586915496, - "grad_norm": 3.881445070057582, - "learning_rate": 1.6057605142403897e-06, - "loss": 0.1468, + "epoch": 1.661818695283818, + "grad_norm": 3.927085264786611, + "learning_rate": 8.74261445080103e-06, + "loss": 0.5647, "step": 11769 }, { - "epoch": 2.468022646257077, - "grad_norm": 5.8557843195805415, - "learning_rate": 1.6045303597667206e-06, - "loss": 0.187, + "epoch": 1.661959898333804, + "grad_norm": 4.236553920520013, + "learning_rate": 8.741102111413749e-06, + "loss": 0.609, "step": 11770 }, { - "epoch": 2.468232333822604, - "grad_norm": 5.615903884614249, - "learning_rate": 1.6033006355754355e-06, - "loss": 0.1734, + "epoch": 1.6621011013837899, + "grad_norm": 3.9622932380178275, + "learning_rate": 8.73958980128271e-06, + "loss": 0.6006, "step": 11771 }, { - "epoch": 2.4684420213881317, - "grad_norm": 4.480712096008919, - "learning_rate": 1.6020713417295541e-06, - "loss": 0.1416, + "epoch": 1.6622423044337757, + "grad_norm": 3.9180595869253225, + "learning_rate": 8.738077520443061e-06, + "loss": 0.6383, "step": 11772 }, { - "epoch": 2.468651708953659, - "grad_norm": 4.551764679829709, - "learning_rate": 1.6008424782920806e-06, - "loss": 0.1397, + "epoch": 1.6623835074837616, + "grad_norm": 3.644559501290641, + "learning_rate": 8.736565268929943e-06, + "loss": 0.556, "step": 11773 }, { - "epoch": 2.4688613965191863, - "grad_norm": 4.021038244891588, - "learning_rate": 1.599614045325998e-06, - "loss": 0.1084, + "epoch": 1.6625247105337475, + "grad_norm": 3.072015072593699, + "learning_rate": 8.735053046778506e-06, + "loss": 0.4923, "step": 11774 }, { - "epoch": 2.469071084084714, - "grad_norm": 3.7796055465638956, - "learning_rate": 1.5983860428942677e-06, - "loss": 0.1123, + "epoch": 1.6626659135837334, + "grad_norm": 3.3318947569572863, + "learning_rate": 8.733540854023888e-06, + "loss": 0.4738, "step": 11775 }, { - "epoch": 2.469280771650241, - "grad_norm": 3.734566099884102, - "learning_rate": 1.5971584710598242e-06, - "loss": 0.1061, + "epoch": 1.6628071166337193, + "grad_norm": 2.651235984828113, + "learning_rate": 8.732028690701235e-06, + "loss": 0.4197, "step": 11776 }, { - "epoch": 2.4694904592157685, - "grad_norm": 4.178463492697262, - "learning_rate": 1.5959313298855804e-06, - "loss": 0.1362, + "epoch": 1.6629483196837052, + "grad_norm": 3.2354418894991457, + "learning_rate": 8.730516556845688e-06, + "loss": 0.4439, "step": 11777 }, { - "epoch": 2.469700146781296, - "grad_norm": 4.903862944623397, - "learning_rate": 1.594704619434435e-06, - "loss": 0.1884, + "epoch": 1.663089522733691, + "grad_norm": 3.2226979105164872, + "learning_rate": 8.729004452492388e-06, + "loss": 0.5676, "step": 11778 }, { - "epoch": 2.469909834346823, - "grad_norm": 4.299595522877735, - "learning_rate": 1.5934783397692533e-06, - "loss": 0.1291, + "epoch": 1.663230725783677, + "grad_norm": 3.5426747653551582, + "learning_rate": 8.727492377676474e-06, + "loss": 0.6309, "step": 11779 }, { - "epoch": 2.4701195219123506, - "grad_norm": 4.632466432960411, - "learning_rate": 1.5922524909528903e-06, - "loss": 0.1561, + "epoch": 1.6633719288336628, + "grad_norm": 3.7405809400779253, + "learning_rate": 8.725980332433089e-06, + "loss": 0.7124, "step": 11780 }, { - "epoch": 2.470329209477878, - "grad_norm": 3.5322922683881894, - "learning_rate": 1.5910270730481692e-06, - "loss": 0.1169, + "epoch": 1.6635131318836487, + "grad_norm": 3.463851573396787, + "learning_rate": 8.724468316797368e-06, + "loss": 0.5822, "step": 11781 }, { - "epoch": 2.4705388970434052, - "grad_norm": 3.4997735443839293, - "learning_rate": 1.589802086117892e-06, - "loss": 0.1294, + "epoch": 1.6636543349336346, + "grad_norm": 3.0301599087570037, + "learning_rate": 8.722956330804456e-06, + "loss": 0.4579, "step": 11782 }, { - "epoch": 2.4707485846089328, - "grad_norm": 3.252062100082361, - "learning_rate": 1.5885775302248453e-06, - "loss": 0.1285, + "epoch": 1.6637955379836205, + "grad_norm": 4.149373868745879, + "learning_rate": 8.721444374489485e-06, + "loss": 0.6328, "step": 11783 }, { - "epoch": 2.4709582721744603, - "grad_norm": 3.5285278911751115, - "learning_rate": 1.587353405431794e-06, - "loss": 0.1332, + "epoch": 1.6639367410336063, + "grad_norm": 3.4125901348547596, + "learning_rate": 8.719932447887594e-06, + "loss": 0.5859, "step": 11784 }, { - "epoch": 2.4711679597399874, - "grad_norm": 4.447736339511368, - "learning_rate": 1.5861297118014662e-06, - "loss": 0.1495, + "epoch": 1.6640779440835922, + "grad_norm": 3.752275530820653, + "learning_rate": 8.718420551033922e-06, + "loss": 0.5885, "step": 11785 }, { - "epoch": 2.471377647305515, - "grad_norm": 4.266219649048172, - "learning_rate": 1.584906449396585e-06, - "loss": 0.1455, + "epoch": 1.664219147133578, + "grad_norm": 3.578626486806983, + "learning_rate": 8.716908683963602e-06, + "loss": 0.521, "step": 11786 }, { - "epoch": 2.471587334871042, - "grad_norm": 4.684846693159901, - "learning_rate": 1.5836836182798443e-06, - "loss": 0.1557, + "epoch": 1.664360350183564, + "grad_norm": 2.8279108463052802, + "learning_rate": 8.715396846711773e-06, + "loss": 0.4255, "step": 11787 }, { - "epoch": 2.4717970224365695, - "grad_norm": 4.63365780390233, - "learning_rate": 1.5824612185139165e-06, - "loss": 0.1536, + "epoch": 1.6645015532335499, + "grad_norm": 3.8576496499375263, + "learning_rate": 8.713885039313562e-06, + "loss": 0.6049, "step": 11788 }, { - "epoch": 2.472006710002097, - "grad_norm": 4.358589877591667, - "learning_rate": 1.5812392501614482e-06, - "loss": 0.1143, + "epoch": 1.6646427562835358, + "grad_norm": 2.6867857445439745, + "learning_rate": 8.712373261804109e-06, + "loss": 0.3965, "step": 11789 }, { - "epoch": 2.472216397567624, - "grad_norm": 5.777124349478835, - "learning_rate": 1.5800177132850724e-06, - "loss": 0.142, + "epoch": 1.6647839593335216, + "grad_norm": 3.810580367951582, + "learning_rate": 8.710861514218545e-06, + "loss": 0.6289, "step": 11790 }, { - "epoch": 2.4724260851331517, - "grad_norm": 4.58126395578425, - "learning_rate": 1.5787966079473904e-06, - "loss": 0.1856, + "epoch": 1.6649251623835075, + "grad_norm": 3.281358819549739, + "learning_rate": 8.709349796592004e-06, + "loss": 0.5257, "step": 11791 }, { - "epoch": 2.4726357726986787, - "grad_norm": 5.759565198721991, - "learning_rate": 1.5775759342109899e-06, - "loss": 0.1697, + "epoch": 1.6650663654334934, + "grad_norm": 3.508721648434874, + "learning_rate": 8.707838108959617e-06, + "loss": 0.5488, "step": 11792 }, { - "epoch": 2.4728454602642063, - "grad_norm": 3.56304060604929, - "learning_rate": 1.5763556921384315e-06, - "loss": 0.1041, + "epoch": 1.6652075684834793, + "grad_norm": 3.351240840136458, + "learning_rate": 8.70632645135651e-06, + "loss": 0.5486, "step": 11793 }, { - "epoch": 2.473055147829734, - "grad_norm": 4.688031824460209, - "learning_rate": 1.5751358817922502e-06, - "loss": 0.1671, + "epoch": 1.6653487715334652, + "grad_norm": 3.9447916300630856, + "learning_rate": 8.704814823817822e-06, + "loss": 0.5449, "step": 11794 }, { - "epoch": 2.473264835395261, - "grad_norm": 3.817424067047199, - "learning_rate": 1.573916503234968e-06, - "loss": 0.1183, + "epoch": 1.665489974583451, + "grad_norm": 2.722985529121054, + "learning_rate": 8.703303226378678e-06, + "loss": 0.3912, "step": 11795 }, { - "epoch": 2.4734745229607884, - "grad_norm": 3.8570270234838753, - "learning_rate": 1.5726975565290824e-06, - "loss": 0.1163, + "epoch": 1.665631177633437, + "grad_norm": 3.731282588483059, + "learning_rate": 8.701791659074206e-06, + "loss": 0.5677, "step": 11796 }, { - "epoch": 2.473684210526316, - "grad_norm": 5.62404102091517, - "learning_rate": 1.5714790417370618e-06, - "loss": 0.2112, + "epoch": 1.6657723806834228, + "grad_norm": 3.13868205764393, + "learning_rate": 8.700280121939535e-06, + "loss": 0.445, "step": 11797 }, { - "epoch": 2.473893898091843, - "grad_norm": 4.6524004494361275, - "learning_rate": 1.570260958921358e-06, - "loss": 0.1735, + "epoch": 1.6659135837334085, + "grad_norm": 3.3665634805518803, + "learning_rate": 8.698768615009789e-06, + "loss": 0.4251, "step": 11798 }, { - "epoch": 2.4741035856573705, - "grad_norm": 2.9688582184165977, - "learning_rate": 1.5690433081444023e-06, - "loss": 0.1113, + "epoch": 1.6660547867833944, + "grad_norm": 3.4888800001658624, + "learning_rate": 8.697257138320104e-06, + "loss": 0.5908, "step": 11799 }, { - "epoch": 2.474313273222898, - "grad_norm": 4.994284119966936, - "learning_rate": 1.567826089468597e-06, - "loss": 0.1499, + "epoch": 1.6661959898333802, + "grad_norm": 3.549288175500463, + "learning_rate": 8.695745691905599e-06, + "loss": 0.679, "step": 11800 }, { - "epoch": 2.474522960788425, - "grad_norm": 3.590827167692221, - "learning_rate": 1.5666093029563335e-06, - "loss": 0.1172, + "epoch": 1.6663371928833661, + "grad_norm": 3.8249142152586346, + "learning_rate": 8.694234275801397e-06, + "loss": 0.563, "step": 11801 }, { - "epoch": 2.4747326483539527, - "grad_norm": 4.764100956917982, - "learning_rate": 1.5653929486699682e-06, - "loss": 0.1549, + "epoch": 1.666478395933352, + "grad_norm": 3.1795698417392746, + "learning_rate": 8.692722890042632e-06, + "loss": 0.5097, "step": 11802 }, { - "epoch": 2.47494233591948, - "grad_norm": 4.000926603922949, - "learning_rate": 1.5641770266718425e-06, - "loss": 0.1548, + "epoch": 1.666619598983338, + "grad_norm": 4.380338714011936, + "learning_rate": 8.69121153466442e-06, + "loss": 0.7053, "step": 11803 }, { - "epoch": 2.4751520234850073, - "grad_norm": 4.9638337343009145, - "learning_rate": 1.5629615370242745e-06, - "loss": 0.1026, + "epoch": 1.6667608020333238, + "grad_norm": 3.332757915953628, + "learning_rate": 8.689700209701887e-06, + "loss": 0.5294, "step": 11804 }, { - "epoch": 2.475361711050535, - "grad_norm": 3.7887276998061523, - "learning_rate": 1.5617464797895666e-06, - "loss": 0.1393, + "epoch": 1.6669020050833097, + "grad_norm": 4.206320113036879, + "learning_rate": 8.688188915190156e-06, + "loss": 0.5536, "step": 11805 }, { - "epoch": 2.475571398616062, - "grad_norm": 3.179956073337639, - "learning_rate": 1.560531855029981e-06, - "loss": 0.1147, + "epoch": 1.6670432081332955, + "grad_norm": 3.901885080466035, + "learning_rate": 8.686677651164345e-06, + "loss": 0.6814, "step": 11806 }, { - "epoch": 2.4757810861815894, - "grad_norm": 3.484309836679592, - "learning_rate": 1.5593176628077767e-06, - "loss": 0.1108, + "epoch": 1.6671844111832814, + "grad_norm": 4.1663138300122435, + "learning_rate": 8.685166417659581e-06, + "loss": 0.5174, "step": 11807 }, { - "epoch": 2.475990773747117, - "grad_norm": 4.287122212979466, - "learning_rate": 1.558103903185183e-06, - "loss": 0.1515, + "epoch": 1.6673256142332673, + "grad_norm": 3.339407189127907, + "learning_rate": 8.683655214710982e-06, + "loss": 0.5206, "step": 11808 }, { - "epoch": 2.476200461312644, - "grad_norm": 3.9042309204573415, - "learning_rate": 1.556890576224407e-06, - "loss": 0.1579, + "epoch": 1.6674668172832532, + "grad_norm": 3.29607884414573, + "learning_rate": 8.682144042353666e-06, + "loss": 0.589, "step": 11809 }, { - "epoch": 2.4764101488781716, - "grad_norm": 4.274468797760751, - "learning_rate": 1.5556776819876296e-06, - "loss": 0.1374, + "epoch": 1.667608020333239, + "grad_norm": 4.072754256106878, + "learning_rate": 8.680632900622752e-06, + "loss": 0.6087, "step": 11810 }, { - "epoch": 2.4766198364436987, - "grad_norm": 5.566457632731147, - "learning_rate": 1.5544652205370203e-06, - "loss": 0.1575, + "epoch": 1.667749223383225, + "grad_norm": 3.3176699801185885, + "learning_rate": 8.679121789553366e-06, + "loss": 0.4965, "step": 11811 }, { - "epoch": 2.476829524009226, - "grad_norm": 3.213799707234014, - "learning_rate": 1.5532531919347127e-06, - "loss": 0.118, + "epoch": 1.6678904264332108, + "grad_norm": 3.534854414817696, + "learning_rate": 8.677610709180612e-06, + "loss": 0.5173, "step": 11812 }, { - "epoch": 2.4770392115747537, - "grad_norm": 4.3541484114463485, - "learning_rate": 1.5520415962428325e-06, - "loss": 0.1322, + "epoch": 1.6680316294831967, + "grad_norm": 3.8125347488209584, + "learning_rate": 8.676099659539618e-06, + "loss": 0.5781, "step": 11813 }, { - "epoch": 2.477248899140281, - "grad_norm": 4.855383855939923, - "learning_rate": 1.5508304335234725e-06, - "loss": 0.0966, + "epoch": 1.6681728325331826, + "grad_norm": 3.534691932673833, + "learning_rate": 8.674588640665495e-06, + "loss": 0.5626, "step": 11814 }, { - "epoch": 2.4774585867058083, - "grad_norm": 5.367344856384804, - "learning_rate": 1.549619703838705e-06, - "loss": 0.2098, + "epoch": 1.6683140355831685, + "grad_norm": 4.018937437100556, + "learning_rate": 8.673077652593357e-06, + "loss": 0.4826, "step": 11815 }, { - "epoch": 2.477668274271336, - "grad_norm": 5.015746142285111, - "learning_rate": 1.548409407250584e-06, - "loss": 0.1828, + "epoch": 1.6684552386331544, + "grad_norm": 4.214688270018037, + "learning_rate": 8.671566695358324e-06, + "loss": 0.6256, "step": 11816 }, { - "epoch": 2.477877961836863, - "grad_norm": 3.39963965597521, - "learning_rate": 1.5471995438211418e-06, - "loss": 0.1025, + "epoch": 1.6685964416831403, + "grad_norm": 4.120202320830355, + "learning_rate": 8.670055768995508e-06, + "loss": 0.7755, "step": 11817 }, { - "epoch": 2.4780876494023905, - "grad_norm": 7.232819013434011, - "learning_rate": 1.5459901136123834e-06, - "loss": 0.2417, + "epoch": 1.6687376447331261, + "grad_norm": 3.9327198720882652, + "learning_rate": 8.668544873540017e-06, + "loss": 0.6325, "step": 11818 }, { - "epoch": 2.478297336967918, - "grad_norm": 3.0867581015834715, - "learning_rate": 1.5447811166862925e-06, - "loss": 0.1142, + "epoch": 1.668878847783112, + "grad_norm": 4.0888828645099915, + "learning_rate": 8.667034009026972e-06, + "loss": 0.5921, "step": 11819 }, { - "epoch": 2.478507024533445, - "grad_norm": 4.298037871665368, - "learning_rate": 1.5435725531048362e-06, - "loss": 0.1267, + "epoch": 1.669020050833098, + "grad_norm": 3.7078082540154633, + "learning_rate": 8.665523175491484e-06, + "loss": 0.6071, "step": 11820 }, { - "epoch": 2.4787167120989726, - "grad_norm": 4.373892603946846, - "learning_rate": 1.5423644229299506e-06, - "loss": 0.143, + "epoch": 1.6691612538830838, + "grad_norm": 3.180805037556508, + "learning_rate": 8.664012372968658e-06, + "loss": 0.478, "step": 11821 }, { - "epoch": 2.4789263996645, - "grad_norm": 3.265878466045089, - "learning_rate": 1.5411567262235605e-06, - "loss": 0.1321, + "epoch": 1.6693024569330697, + "grad_norm": 3.407186692631098, + "learning_rate": 8.662501601493607e-06, + "loss": 0.4855, "step": 11822 }, { - "epoch": 2.479136087230027, - "grad_norm": 3.799821116839136, - "learning_rate": 1.5399494630475586e-06, - "loss": 0.1276, + "epoch": 1.6694436599830556, + "grad_norm": 3.756222899851191, + "learning_rate": 8.66099086110144e-06, + "loss": 0.5248, "step": 11823 }, { - "epoch": 2.4793457747955547, - "grad_norm": 4.910500616301495, - "learning_rate": 1.5387426334638189e-06, - "loss": 0.191, + "epoch": 1.6695848630330414, + "grad_norm": 3.493340715982749, + "learning_rate": 8.659480151827267e-06, + "loss": 0.6306, "step": 11824 }, { - "epoch": 2.479555462361082, - "grad_norm": 4.966184556499949, - "learning_rate": 1.537536237534193e-06, - "loss": 0.1638, + "epoch": 1.6697260660830273, + "grad_norm": 3.0816676012854054, + "learning_rate": 8.657969473706197e-06, + "loss": 0.4261, "step": 11825 }, { - "epoch": 2.4797651499266093, - "grad_norm": 4.947293974233818, - "learning_rate": 1.5363302753205177e-06, - "loss": 0.1394, + "epoch": 1.6698672691330132, + "grad_norm": 3.298957463594451, + "learning_rate": 8.65645882677334e-06, + "loss": 0.4269, "step": 11826 }, { - "epoch": 2.479974837492137, - "grad_norm": 4.420435070328749, - "learning_rate": 1.535124746884591e-06, - "loss": 0.1535, + "epoch": 1.670008472182999, + "grad_norm": 4.536664585722148, + "learning_rate": 8.654948211063794e-06, + "loss": 0.6175, "step": 11827 }, { - "epoch": 2.480184525057664, - "grad_norm": 4.69978911875586, - "learning_rate": 1.5339196522882017e-06, - "loss": 0.1538, + "epoch": 1.670149675232985, + "grad_norm": 4.091091377225782, + "learning_rate": 8.65343762661268e-06, + "loss": 0.6747, "step": 11828 }, { - "epoch": 2.4803942126231915, - "grad_norm": 5.175169276279862, - "learning_rate": 1.5327149915931173e-06, - "loss": 0.1257, + "epoch": 1.6702908782829708, + "grad_norm": 3.4472066568414683, + "learning_rate": 8.651927073455085e-06, + "loss": 0.5497, "step": 11829 }, { - "epoch": 2.4806039001887186, - "grad_norm": 4.999418976485478, - "learning_rate": 1.531510764861075e-06, - "loss": 0.1281, + "epoch": 1.6704320813329567, + "grad_norm": 3.7384452692408634, + "learning_rate": 8.650416551626126e-06, + "loss": 0.4999, "step": 11830 }, { - "epoch": 2.480813587754246, - "grad_norm": 3.854125899961667, - "learning_rate": 1.5303069721537912e-06, - "loss": 0.121, + "epoch": 1.6705732843829426, + "grad_norm": 3.323611335239815, + "learning_rate": 8.648906061160903e-06, + "loss": 0.5476, "step": 11831 }, { - "epoch": 2.4810232753197736, - "grad_norm": 4.366764438261112, - "learning_rate": 1.5291036135329652e-06, - "loss": 0.1414, + "epoch": 1.6707144874329285, + "grad_norm": 2.9648162417218695, + "learning_rate": 8.647395602094517e-06, + "loss": 0.4715, "step": 11832 }, { - "epoch": 2.4812329628853007, - "grad_norm": 4.18102729171108, - "learning_rate": 1.5279006890602733e-06, - "loss": 0.1194, + "epoch": 1.6708556904829144, + "grad_norm": 3.787153880016145, + "learning_rate": 8.645885174462077e-06, + "loss": 0.5878, "step": 11833 }, { - "epoch": 2.4814426504508282, - "grad_norm": 4.979768063492309, - "learning_rate": 1.526698198797365e-06, - "loss": 0.1517, + "epoch": 1.6709968935329003, + "grad_norm": 4.443030949399692, + "learning_rate": 8.64437477829868e-06, + "loss": 0.7999, "step": 11834 }, { - "epoch": 2.4816523380163558, - "grad_norm": 3.9488939343662173, - "learning_rate": 1.5254961428058678e-06, - "loss": 0.1446, + "epoch": 1.6711380965828861, + "grad_norm": 3.409431606087524, + "learning_rate": 8.642864413639425e-06, + "loss": 0.5353, "step": 11835 }, { - "epoch": 2.481862025581883, - "grad_norm": 4.59018665311151, - "learning_rate": 1.5242945211473936e-06, - "loss": 0.1635, + "epoch": 1.671279299632872, + "grad_norm": 3.6564535830588185, + "learning_rate": 8.641354080519422e-06, + "loss": 0.6192, "step": 11836 }, { - "epoch": 2.4820717131474104, - "grad_norm": 4.224787443030674, - "learning_rate": 1.5230933338835241e-06, - "loss": 0.1556, + "epoch": 1.671420502682858, + "grad_norm": 3.664445994642492, + "learning_rate": 8.639843778973756e-06, + "loss": 0.5791, "step": 11837 }, { - "epoch": 2.482281400712938, - "grad_norm": 4.279839485963888, - "learning_rate": 1.5218925810758256e-06, - "loss": 0.1331, + "epoch": 1.6715617057328438, + "grad_norm": 4.459591095245657, + "learning_rate": 8.638333509037537e-06, + "loss": 0.6629, "step": 11838 }, { - "epoch": 2.482491088278465, - "grad_norm": 5.407492332100062, - "learning_rate": 1.5206922627858367e-06, - "loss": 0.1292, + "epoch": 1.6717029087828297, + "grad_norm": 4.068535487914683, + "learning_rate": 8.636823270745858e-06, + "loss": 0.615, "step": 11839 }, { - "epoch": 2.4827007758439925, - "grad_norm": 4.429061773970704, - "learning_rate": 1.5194923790750738e-06, - "loss": 0.1212, + "epoch": 1.6718441118328156, + "grad_norm": 4.497429931957836, + "learning_rate": 8.635313064133817e-06, + "loss": 0.6714, "step": 11840 }, { - "epoch": 2.48291046340952, - "grad_norm": 4.020767133826447, - "learning_rate": 1.5182929300050353e-06, - "loss": 0.1429, + "epoch": 1.6719853148828014, + "grad_norm": 3.1216720773124242, + "learning_rate": 8.633802889236509e-06, + "loss": 0.4743, "step": 11841 }, { - "epoch": 2.483120150975047, - "grad_norm": 5.1474317333802055, - "learning_rate": 1.5170939156371978e-06, - "loss": 0.1577, + "epoch": 1.6721265179327873, + "grad_norm": 3.420122440356066, + "learning_rate": 8.632292746089034e-06, + "loss": 0.6103, "step": 11842 }, { - "epoch": 2.4833298385405747, - "grad_norm": 5.22423217552654, - "learning_rate": 1.5158953360330086e-06, - "loss": 0.1802, + "epoch": 1.6722677209827732, + "grad_norm": 3.88428446179128, + "learning_rate": 8.630782634726487e-06, + "loss": 0.5517, "step": 11843 }, { - "epoch": 2.4835395261061017, - "grad_norm": 4.353578637429943, - "learning_rate": 1.5146971912538977e-06, - "loss": 0.1526, + "epoch": 1.672408924032759, + "grad_norm": 3.1563730771256173, + "learning_rate": 8.629272555183956e-06, + "loss": 0.5258, "step": 11844 }, { - "epoch": 2.4837492136716293, - "grad_norm": 4.8014925208867005, - "learning_rate": 1.5134994813612737e-06, - "loss": 0.1463, + "epoch": 1.672550127082745, + "grad_norm": 4.51886496990165, + "learning_rate": 8.627762507496546e-06, + "loss": 0.7154, "step": 11845 }, { - "epoch": 2.483958901237157, - "grad_norm": 3.2016024822144376, - "learning_rate": 1.5123022064165205e-06, - "loss": 0.1063, + "epoch": 1.6726913301327309, + "grad_norm": 3.592722847511932, + "learning_rate": 8.626252491699335e-06, + "loss": 0.5898, "step": 11846 }, { - "epoch": 2.484168588802684, - "grad_norm": 3.061946591652013, - "learning_rate": 1.5111053664809983e-06, - "loss": 0.0954, + "epoch": 1.6728325331827167, + "grad_norm": 3.249355488456688, + "learning_rate": 8.624742507827427e-06, + "loss": 0.558, "step": 11847 }, { - "epoch": 2.4843782763682114, - "grad_norm": 3.8862065827236787, - "learning_rate": 1.5099089616160523e-06, - "loss": 0.122, + "epoch": 1.6729737362327026, + "grad_norm": 2.9130105166090616, + "learning_rate": 8.623232555915907e-06, + "loss": 0.5132, "step": 11848 }, { - "epoch": 2.4845879639337385, - "grad_norm": 3.1609369013371373, - "learning_rate": 1.5087129918829946e-06, - "loss": 0.1002, + "epoch": 1.6731149392826885, + "grad_norm": 3.9198247650453655, + "learning_rate": 8.621722635999868e-06, + "loss": 0.6091, "step": 11849 }, { - "epoch": 2.484797651499266, - "grad_norm": 5.022728445571421, - "learning_rate": 1.507517457343125e-06, - "loss": 0.141, + "epoch": 1.6732561423326744, + "grad_norm": 3.3647444032271108, + "learning_rate": 8.6202127481144e-06, + "loss": 0.5635, "step": 11850 }, { - "epoch": 2.4850073390647935, - "grad_norm": 3.16948114759684, - "learning_rate": 1.5063223580577159e-06, - "loss": 0.1174, + "epoch": 1.6733973453826603, + "grad_norm": 4.807280972052962, + "learning_rate": 8.618702892294593e-06, + "loss": 0.7692, "step": 11851 }, { - "epoch": 2.4852170266303206, - "grad_norm": 4.12757541917744, - "learning_rate": 1.5051276940880155e-06, - "loss": 0.1424, + "epoch": 1.6735385484326462, + "grad_norm": 3.064648830490491, + "learning_rate": 8.617193068575534e-06, + "loss": 0.4539, "step": 11852 }, { - "epoch": 2.485426714195848, - "grad_norm": 3.2653015269687353, - "learning_rate": 1.503933465495253e-06, - "loss": 0.1106, + "epoch": 1.673679751482632, + "grad_norm": 3.30783750301966, + "learning_rate": 8.615683276992313e-06, + "loss": 0.4834, "step": 11853 }, { - "epoch": 2.4856364017613757, - "grad_norm": 4.643528641388025, - "learning_rate": 1.5027396723406395e-06, - "loss": 0.1647, + "epoch": 1.673820954532618, + "grad_norm": 4.138434182904973, + "learning_rate": 8.61417351758001e-06, + "loss": 0.7309, "step": 11854 }, { - "epoch": 2.4858460893269028, - "grad_norm": 5.062840533062073, - "learning_rate": 1.501546314685356e-06, - "loss": 0.1413, + "epoch": 1.6739621575826038, + "grad_norm": 2.9961660126614236, + "learning_rate": 8.61266379037372e-06, + "loss": 0.4829, "step": 11855 }, { - "epoch": 2.4860557768924303, - "grad_norm": 4.374360883172957, - "learning_rate": 1.500353392590561e-06, - "loss": 0.1626, + "epoch": 1.6741033606325897, + "grad_norm": 3.17518453226916, + "learning_rate": 8.611154095408521e-06, + "loss": 0.4367, "step": 11856 }, { - "epoch": 2.486265464457958, - "grad_norm": 3.851082609914037, - "learning_rate": 1.4991609061173984e-06, - "loss": 0.1338, + "epoch": 1.6742445636825756, + "grad_norm": 4.067531199968323, + "learning_rate": 8.609644432719504e-06, + "loss": 0.6748, "step": 11857 }, { - "epoch": 2.486475152023485, - "grad_norm": 5.105289973432092, - "learning_rate": 1.497968855326981e-06, - "loss": 0.1429, + "epoch": 1.6743857667325615, + "grad_norm": 3.4702533010281855, + "learning_rate": 8.608134802341745e-06, + "loss": 0.5683, "step": 11858 }, { - "epoch": 2.4866848395890124, - "grad_norm": 4.803652980933307, - "learning_rate": 1.4967772402804082e-06, - "loss": 0.1351, + "epoch": 1.6745269697825473, + "grad_norm": 3.507799220589695, + "learning_rate": 8.606625204310337e-06, + "loss": 0.4881, "step": 11859 }, { - "epoch": 2.48689452715454, - "grad_norm": 4.417975671406087, - "learning_rate": 1.4955860610387495e-06, - "loss": 0.128, + "epoch": 1.6746681728325332, + "grad_norm": 3.4217478350021837, + "learning_rate": 8.605115638660356e-06, + "loss": 0.5644, "step": 11860 }, { - "epoch": 2.487104214720067, - "grad_norm": 4.452248138964775, - "learning_rate": 1.4943953176630543e-06, - "loss": 0.1191, + "epoch": 1.674809375882519, + "grad_norm": 3.1644089494950847, + "learning_rate": 8.603606105426884e-06, + "loss": 0.5144, "step": 11861 }, { - "epoch": 2.4873139022855946, - "grad_norm": 4.5293179279593145, - "learning_rate": 1.4932050102143503e-06, - "loss": 0.1287, + "epoch": 1.674950578932505, + "grad_norm": 4.145861669458106, + "learning_rate": 8.602096604645009e-06, + "loss": 0.6752, "step": 11862 }, { - "epoch": 2.4875235898511217, - "grad_norm": 3.9126790070487965, - "learning_rate": 1.4920151387536486e-06, - "loss": 0.137, + "epoch": 1.6750917819824909, + "grad_norm": 3.508954585457945, + "learning_rate": 8.600587136349799e-06, + "loss": 0.4418, "step": 11863 }, { - "epoch": 2.487733277416649, - "grad_norm": 4.332492138687773, - "learning_rate": 1.4908257033419226e-06, - "loss": 0.1715, + "epoch": 1.6752329850324768, + "grad_norm": 3.695775431775974, + "learning_rate": 8.599077700576342e-06, + "loss": 0.537, "step": 11864 }, { - "epoch": 2.4879429649821767, - "grad_norm": 4.8286363169286215, - "learning_rate": 1.4896367040401382e-06, - "loss": 0.1195, + "epoch": 1.6753741880824626, + "grad_norm": 3.6021196733255447, + "learning_rate": 8.597568297359713e-06, + "loss": 0.5555, "step": 11865 }, { - "epoch": 2.488152652547704, - "grad_norm": 4.52475730780462, - "learning_rate": 1.4884481409092356e-06, - "loss": 0.1755, + "epoch": 1.6755153911324485, + "grad_norm": 4.435646139946198, + "learning_rate": 8.59605892673499e-06, + "loss": 0.6973, "step": 11866 }, { - "epoch": 2.4883623401132313, - "grad_norm": 5.062774352010064, - "learning_rate": 1.487260014010128e-06, - "loss": 0.1607, + "epoch": 1.6756565941824344, + "grad_norm": 3.487067399872672, + "learning_rate": 8.594549588737253e-06, + "loss": 0.5746, "step": 11867 }, { - "epoch": 2.4885720276787584, - "grad_norm": 3.65963675108377, - "learning_rate": 1.4860723234037066e-06, - "loss": 0.1371, + "epoch": 1.6757977972324203, + "grad_norm": 4.349398823529884, + "learning_rate": 8.593040283401576e-06, + "loss": 0.6564, "step": 11868 }, { - "epoch": 2.488781715244286, - "grad_norm": 3.7262309680921453, - "learning_rate": 1.4848850691508476e-06, - "loss": 0.1408, + "epoch": 1.6759390002824062, + "grad_norm": 3.3969434218627557, + "learning_rate": 8.591531010763036e-06, + "loss": 0.569, "step": 11869 }, { - "epoch": 2.4889914028098135, - "grad_norm": 7.619352746497935, - "learning_rate": 1.4836982513123965e-06, - "loss": 0.1534, + "epoch": 1.676080203332392, + "grad_norm": 4.151062540440489, + "learning_rate": 8.590021770856708e-06, + "loss": 0.6894, "step": 11870 }, { - "epoch": 2.4892010903753405, - "grad_norm": 5.388045217848504, - "learning_rate": 1.4825118699491815e-06, - "loss": 0.1636, + "epoch": 1.676221406382378, + "grad_norm": 3.5364462398823813, + "learning_rate": 8.588512563717664e-06, + "loss": 0.5183, "step": 11871 }, { - "epoch": 2.489410777940868, - "grad_norm": 3.792800012433815, - "learning_rate": 1.4813259251220069e-06, - "loss": 0.1424, + "epoch": 1.6763626094323638, + "grad_norm": 3.9188700852249254, + "learning_rate": 8.587003389380977e-06, + "loss": 0.6232, "step": 11872 }, { - "epoch": 2.4896204655063956, - "grad_norm": 3.612821643827426, - "learning_rate": 1.4801404168916511e-06, - "loss": 0.1419, + "epoch": 1.6765038124823497, + "grad_norm": 3.690461266187758, + "learning_rate": 8.585494247881722e-06, + "loss": 0.439, "step": 11873 }, { - "epoch": 2.4898301530719227, - "grad_norm": 5.651389763679717, - "learning_rate": 1.4789553453188754e-06, - "loss": 0.1651, + "epoch": 1.6766450155323356, + "grad_norm": 3.263811869505128, + "learning_rate": 8.58398513925497e-06, + "loss": 0.4537, "step": 11874 }, { - "epoch": 2.49003984063745, - "grad_norm": 4.0074406403922955, - "learning_rate": 1.47777071046442e-06, - "loss": 0.1398, + "epoch": 1.6767862185823215, + "grad_norm": 3.464898975227575, + "learning_rate": 8.58247606353579e-06, + "loss": 0.5901, "step": 11875 }, { - "epoch": 2.4902495282029777, - "grad_norm": 5.202403629707815, - "learning_rate": 1.4765865123889967e-06, - "loss": 0.1329, + "epoch": 1.6769274216323073, + "grad_norm": 3.1390323153972277, + "learning_rate": 8.580967020759257e-06, + "loss": 0.508, "step": 11876 }, { - "epoch": 2.490459215768505, - "grad_norm": 4.365099978031514, - "learning_rate": 1.4754027511532954e-06, - "loss": 0.1515, + "epoch": 1.6770686246822932, + "grad_norm": 4.798097362770125, + "learning_rate": 8.579458010960435e-06, + "loss": 0.756, "step": 11877 }, { - "epoch": 2.4906689033340323, - "grad_norm": 4.6235639230024805, - "learning_rate": 1.4742194268179911e-06, - "loss": 0.1631, + "epoch": 1.6772098277322791, + "grad_norm": 3.4561803754450255, + "learning_rate": 8.577949034174395e-06, + "loss": 0.7076, "step": 11878 }, { - "epoch": 2.49087859089956, - "grad_norm": 4.549393359040409, - "learning_rate": 1.4730365394437252e-06, - "loss": 0.1711, + "epoch": 1.677351030782265, + "grad_norm": 4.054219532430299, + "learning_rate": 8.576440090436213e-06, + "loss": 0.5889, "step": 11879 }, { - "epoch": 2.491088278465087, - "grad_norm": 3.9769602005948723, - "learning_rate": 1.471854089091128e-06, - "loss": 0.1573, + "epoch": 1.6774922338322509, + "grad_norm": 3.293657309526127, + "learning_rate": 8.57493117978094e-06, + "loss": 0.6084, "step": 11880 }, { - "epoch": 2.4912979660306145, - "grad_norm": 4.178709977139378, - "learning_rate": 1.4706720758208016e-06, - "loss": 0.1628, + "epoch": 1.6776334368822368, + "grad_norm": 3.5517445078406236, + "learning_rate": 8.573422302243653e-06, + "loss": 0.5517, "step": 11881 }, { - "epoch": 2.4915076535961416, - "grad_norm": 5.693388954694277, - "learning_rate": 1.4694904996933213e-06, - "loss": 0.1667, + "epoch": 1.6777746399322226, + "grad_norm": 3.791752462856347, + "learning_rate": 8.571913457859418e-06, + "loss": 0.55, "step": 11882 }, { - "epoch": 2.491717341161669, - "grad_norm": 4.541180690923713, - "learning_rate": 1.4683093607692479e-06, - "loss": 0.156, + "epoch": 1.6779158429822085, + "grad_norm": 3.4349666111582375, + "learning_rate": 8.570404646663295e-06, + "loss": 0.5008, "step": 11883 }, { - "epoch": 2.4919270287271966, - "grad_norm": 3.6722427203059875, - "learning_rate": 1.4671286591091228e-06, - "loss": 0.1314, + "epoch": 1.6780570460321944, + "grad_norm": 3.3265798616561, + "learning_rate": 8.56889586869035e-06, + "loss": 0.5913, "step": 11884 }, { - "epoch": 2.4921367162927237, - "grad_norm": 4.889179305131772, - "learning_rate": 1.4659483947734487e-06, - "loss": 0.1641, + "epoch": 1.6781982490821803, + "grad_norm": 4.518021111624617, + "learning_rate": 8.567387123975648e-06, + "loss": 0.5671, "step": 11885 }, { - "epoch": 2.4923464038582512, - "grad_norm": 3.4429981792259916, - "learning_rate": 1.4647685678227197e-06, - "loss": 0.1204, + "epoch": 1.6783394521321662, + "grad_norm": 3.260256711687439, + "learning_rate": 8.565878412554251e-06, + "loss": 0.5686, "step": 11886 }, { - "epoch": 2.4925560914237788, - "grad_norm": 3.944329213724358, - "learning_rate": 1.4635891783174095e-06, - "loss": 0.1109, + "epoch": 1.678480655182152, + "grad_norm": 3.2415569306181413, + "learning_rate": 8.564369734461222e-06, + "loss": 0.5175, "step": 11887 }, { - "epoch": 2.492765778989306, - "grad_norm": 4.061479007705403, - "learning_rate": 1.4624102263179584e-06, - "loss": 0.1287, + "epoch": 1.678621858232138, + "grad_norm": 3.2715087448908484, + "learning_rate": 8.562861089731618e-06, + "loss": 0.505, "step": 11888 }, { - "epoch": 2.4929754665548334, - "grad_norm": 4.143768006344261, - "learning_rate": 1.4612317118847886e-06, - "loss": 0.1354, + "epoch": 1.6787630612821238, + "grad_norm": 2.8598828442356816, + "learning_rate": 8.561352478400501e-06, + "loss": 0.4608, "step": 11889 }, { - "epoch": 2.4931851541203605, - "grad_norm": 3.965578465906405, - "learning_rate": 1.4600536350783057e-06, - "loss": 0.1164, + "epoch": 1.6789042643321097, + "grad_norm": 3.7979466670185604, + "learning_rate": 8.559843900502934e-06, + "loss": 0.6318, "step": 11890 }, { - "epoch": 2.493394841685888, - "grad_norm": 4.235821489849507, - "learning_rate": 1.4588759959588828e-06, - "loss": 0.1588, + "epoch": 1.6790454673820956, + "grad_norm": 3.5692995331344015, + "learning_rate": 8.55833535607397e-06, + "loss": 0.527, "step": 11891 }, { - "epoch": 2.4936045292514155, - "grad_norm": 7.824522161100078, - "learning_rate": 1.4576987945868826e-06, - "loss": 0.1308, + "epoch": 1.6791866704320815, + "grad_norm": 3.13632668650295, + "learning_rate": 8.556826845148669e-06, + "loss": 0.4456, "step": 11892 }, { - "epoch": 2.4938142168169426, - "grad_norm": 4.8478841524399225, - "learning_rate": 1.4565220310226314e-06, - "loss": 0.1325, + "epoch": 1.6793278734820674, + "grad_norm": 3.1417910369169144, + "learning_rate": 8.55531836776209e-06, + "loss": 0.5042, "step": 11893 }, { - "epoch": 2.49402390438247, - "grad_norm": 6.190552596379382, - "learning_rate": 1.4553457053264475e-06, - "loss": 0.25, + "epoch": 1.6794690765320532, + "grad_norm": 3.3386501533137434, + "learning_rate": 8.55380992394929e-06, + "loss": 0.5512, "step": 11894 }, { - "epoch": 2.4942335919479977, - "grad_norm": 5.6059324514679005, - "learning_rate": 1.4541698175586128e-06, - "loss": 0.2003, + "epoch": 1.6796102795820391, + "grad_norm": 4.026068427138042, + "learning_rate": 8.552301513745322e-06, + "loss": 0.665, "step": 11895 }, { - "epoch": 2.4944432795135247, - "grad_norm": 3.6088258137245575, - "learning_rate": 1.4529943677794e-06, - "loss": 0.0791, + "epoch": 1.679751482632025, + "grad_norm": 3.380219839451239, + "learning_rate": 8.550793137185243e-06, + "loss": 0.5191, "step": 11896 }, { - "epoch": 2.4946529670790523, - "grad_norm": 4.841589191709809, - "learning_rate": 1.4518193560490501e-06, - "loss": 0.1397, + "epoch": 1.6798926856820109, + "grad_norm": 4.887089884202536, + "learning_rate": 8.549284794304102e-06, + "loss": 0.724, "step": 11897 }, { - "epoch": 2.49486265464458, - "grad_norm": 4.014471890546808, - "learning_rate": 1.4506447824277815e-06, - "loss": 0.1332, + "epoch": 1.6800338887319968, + "grad_norm": 3.439596434299331, + "learning_rate": 8.547776485136957e-06, + "loss": 0.5404, "step": 11898 }, { - "epoch": 2.495072342210107, - "grad_norm": 3.254902344881698, - "learning_rate": 1.4494706469757958e-06, - "loss": 0.1072, + "epoch": 1.6801750917819827, + "grad_norm": 3.0716721957899957, + "learning_rate": 8.546268209718862e-06, + "loss": 0.5001, "step": 11899 }, { - "epoch": 2.4952820297756344, - "grad_norm": 5.16455998807153, - "learning_rate": 1.4482969497532717e-06, - "loss": 0.1644, + "epoch": 1.6803162948319683, + "grad_norm": 2.9402522796596027, + "learning_rate": 8.544759968084863e-06, + "loss": 0.4701, "step": 11900 }, { - "epoch": 2.4954917173411615, - "grad_norm": 4.645343865534257, - "learning_rate": 1.4471236908203623e-06, - "loss": 0.1583, + "epoch": 1.6804574978819542, + "grad_norm": 4.126089825878445, + "learning_rate": 8.543251760270013e-06, + "loss": 0.5241, "step": 11901 }, { - "epoch": 2.495701404906689, - "grad_norm": 3.996074222818098, - "learning_rate": 1.4459508702371939e-06, - "loss": 0.1125, + "epoch": 1.68059870093194, + "grad_norm": 3.4580561325043577, + "learning_rate": 8.541743586309366e-06, + "loss": 0.5369, "step": 11902 }, { - "epoch": 2.4959110924722165, - "grad_norm": 3.4354103713489246, - "learning_rate": 1.4447784880638837e-06, - "loss": 0.0866, + "epoch": 1.680739903981926, + "grad_norm": 3.3150190948575577, + "learning_rate": 8.540235446237967e-06, + "loss": 0.4515, "step": 11903 }, { - "epoch": 2.4961207800377436, - "grad_norm": 4.6672239621200085, - "learning_rate": 1.4436065443605108e-06, - "loss": 0.1495, + "epoch": 1.6808811070319118, + "grad_norm": 3.2151665450479, + "learning_rate": 8.53872734009087e-06, + "loss": 0.5344, "step": 11904 }, { - "epoch": 2.496330467603271, - "grad_norm": 3.5709244690887085, - "learning_rate": 1.4424350391871455e-06, - "loss": 0.1466, + "epoch": 1.6810223100818977, + "grad_norm": 3.5302943013673147, + "learning_rate": 8.537219267903115e-06, + "loss": 0.5848, "step": 11905 }, { - "epoch": 2.4965401551687987, - "grad_norm": 4.081820902926082, - "learning_rate": 1.441263972603827e-06, - "loss": 0.1474, + "epoch": 1.6811635131318836, + "grad_norm": 3.8622418323248304, + "learning_rate": 8.535711229709749e-06, + "loss": 0.6894, "step": 11906 }, { - "epoch": 2.4967498427343258, - "grad_norm": 3.2126615183171428, - "learning_rate": 1.4400933446705712e-06, - "loss": 0.1117, + "epoch": 1.6813047161818695, + "grad_norm": 4.7294114379445755, + "learning_rate": 8.534203225545824e-06, + "loss": 0.7566, "step": 11907 }, { - "epoch": 2.4969595302998533, - "grad_norm": 3.203532200101062, - "learning_rate": 1.4389231554473803e-06, - "loss": 0.1104, + "epoch": 1.6814459192318554, + "grad_norm": 4.079624387607387, + "learning_rate": 8.532695255446384e-06, + "loss": 0.6253, "step": 11908 }, { - "epoch": 2.4971692178653804, - "grad_norm": 3.929159555677046, - "learning_rate": 1.4377534049942265e-06, - "loss": 0.1301, + "epoch": 1.6815871222818413, + "grad_norm": 3.415157974730411, + "learning_rate": 8.53118731944647e-06, + "loss": 0.516, "step": 11909 }, { - "epoch": 2.497378905430908, - "grad_norm": 4.429654200956873, - "learning_rate": 1.4365840933710594e-06, - "loss": 0.147, + "epoch": 1.6817283253318271, + "grad_norm": 3.1271981421768706, + "learning_rate": 8.52967941758113e-06, + "loss": 0.5921, "step": 11910 }, { - "epoch": 2.4975885929964354, - "grad_norm": 3.69432365517285, - "learning_rate": 1.4354152206378092e-06, - "loss": 0.1379, + "epoch": 1.681869528381813, + "grad_norm": 3.3309398080967445, + "learning_rate": 8.528171549885409e-06, + "loss": 0.4864, "step": 11911 }, { - "epoch": 2.4977982805619625, - "grad_norm": 4.5691739791666315, - "learning_rate": 1.4342467868543863e-06, - "loss": 0.1538, + "epoch": 1.682010731431799, + "grad_norm": 3.3849159350185642, + "learning_rate": 8.52666371639434e-06, + "loss": 0.4973, "step": 11912 }, { - "epoch": 2.49800796812749, - "grad_norm": 5.3423270004317045, - "learning_rate": 1.433078792080672e-06, - "loss": 0.1998, + "epoch": 1.6821519344817848, + "grad_norm": 3.8222053962175035, + "learning_rate": 8.525155917142977e-06, + "loss": 0.646, "step": 11913 }, { - "epoch": 2.4982176556930176, - "grad_norm": 4.046194460253145, - "learning_rate": 1.4319112363765253e-06, - "loss": 0.1309, + "epoch": 1.6822931375317707, + "grad_norm": 3.8354437254392786, + "learning_rate": 8.523648152166349e-06, + "loss": 0.5603, "step": 11914 }, { - "epoch": 2.4984273432585447, - "grad_norm": 4.337037682355158, - "learning_rate": 1.4307441198017912e-06, - "loss": 0.1344, + "epoch": 1.6824343405817566, + "grad_norm": 4.653551489276745, + "learning_rate": 8.522140421499499e-06, + "loss": 0.6588, "step": 11915 }, { - "epoch": 2.498637030824072, - "grad_norm": 5.701306146940049, - "learning_rate": 1.429577442416281e-06, - "loss": 0.1413, + "epoch": 1.6825755436317424, + "grad_norm": 3.6917062912717924, + "learning_rate": 8.520632725177468e-06, + "loss": 0.601, "step": 11916 }, { - "epoch": 2.4988467183895997, - "grad_norm": 4.030430621545223, - "learning_rate": 1.4284112042797938e-06, - "loss": 0.1435, + "epoch": 1.6827167466817283, + "grad_norm": 3.2943006528974537, + "learning_rate": 8.519125063235293e-06, + "loss": 0.4501, "step": 11917 }, { - "epoch": 2.499056405955127, - "grad_norm": 4.4463605065432015, - "learning_rate": 1.4272454054520989e-06, - "loss": 0.1432, + "epoch": 1.6828579497317142, + "grad_norm": 2.9020728347640077, + "learning_rate": 8.517617435708011e-06, + "loss": 0.4228, "step": 11918 }, { - "epoch": 2.4992660935206543, - "grad_norm": 5.363547812191036, - "learning_rate": 1.4260800459929447e-06, - "loss": 0.1782, + "epoch": 1.6829991527817, + "grad_norm": 4.2663202857206, + "learning_rate": 8.516109842630664e-06, + "loss": 0.7258, "step": 11919 }, { - "epoch": 2.4994757810861814, - "grad_norm": 6.644405939881724, - "learning_rate": 1.4249151259620575e-06, - "loss": 0.1386, + "epoch": 1.683140355831686, + "grad_norm": 3.7813351125697343, + "learning_rate": 8.51460228403828e-06, + "loss": 0.6253, "step": 11920 }, { - "epoch": 2.499685468651709, - "grad_norm": 3.6327690025771116, - "learning_rate": 1.4237506454191452e-06, - "loss": 0.1134, + "epoch": 1.6832815588816719, + "grad_norm": 3.0451526183866946, + "learning_rate": 8.513094759965904e-06, + "loss": 0.467, "step": 11921 }, { - "epoch": 2.4998951562172365, - "grad_norm": 4.015831231529636, - "learning_rate": 1.422586604423888e-06, - "loss": 0.1499, + "epoch": 1.6834227619316577, + "grad_norm": 3.4390450369385994, + "learning_rate": 8.511587270448556e-06, + "loss": 0.5002, "step": 11922 }, { - "epoch": 2.5001048437827635, - "grad_norm": 4.13129517418543, - "learning_rate": 1.421423003035941e-06, - "loss": 0.1274, + "epoch": 1.6835639649816436, + "grad_norm": 3.8699248988901553, + "learning_rate": 8.510079815521278e-06, + "loss": 0.516, "step": 11923 }, { - "epoch": 2.500314531348291, - "grad_norm": 3.805114010358794, - "learning_rate": 1.420259841314946e-06, - "loss": 0.1407, + "epoch": 1.6837051680316295, + "grad_norm": 3.8654364724422123, + "learning_rate": 8.508572395219104e-06, + "loss": 0.6801, "step": 11924 }, { - "epoch": 2.500524218913818, - "grad_norm": 5.7804665427700614, - "learning_rate": 1.419097119320515e-06, - "loss": 0.1982, + "epoch": 1.6838463710816154, + "grad_norm": 3.6302794554577353, + "learning_rate": 8.507065009577062e-06, + "loss": 0.5823, "step": 11925 }, { - "epoch": 2.5007339064793457, - "grad_norm": 5.501558371091093, - "learning_rate": 1.417934837112237e-06, - "loss": 0.1747, + "epoch": 1.6839875741316013, + "grad_norm": 3.436179194359734, + "learning_rate": 8.505557658630186e-06, + "loss": 0.5481, "step": 11926 }, { - "epoch": 2.500943594044873, - "grad_norm": 3.276467546611582, - "learning_rate": 1.4167729947496866e-06, - "loss": 0.1309, + "epoch": 1.6841287771815872, + "grad_norm": 4.212051964642586, + "learning_rate": 8.504050342413501e-06, + "loss": 0.6707, "step": 11927 }, { - "epoch": 2.5011532816104003, - "grad_norm": 4.0470134797024055, - "learning_rate": 1.415611592292404e-06, - "loss": 0.1292, + "epoch": 1.684269980231573, + "grad_norm": 3.8337224614408294, + "learning_rate": 8.502543060962043e-06, + "loss": 0.5991, "step": 11928 }, { - "epoch": 2.501362969175928, - "grad_norm": 4.432152411732758, - "learning_rate": 1.4144506297999194e-06, - "loss": 0.1484, + "epoch": 1.684411183281559, + "grad_norm": 3.3989261823118007, + "learning_rate": 8.501035814310837e-06, + "loss": 0.5847, "step": 11929 }, { - "epoch": 2.5015726567414553, - "grad_norm": 3.6012859174280756, - "learning_rate": 1.4132901073317295e-06, - "loss": 0.1143, + "epoch": 1.6845523863315448, + "grad_norm": 4.498316434136469, + "learning_rate": 8.499528602494914e-06, + "loss": 0.6681, "step": 11930 }, { - "epoch": 2.5017823443069824, - "grad_norm": 4.273226823443075, - "learning_rate": 1.4121300249473135e-06, - "loss": 0.1488, + "epoch": 1.6846935893815307, + "grad_norm": 3.3969688170107584, + "learning_rate": 8.498021425549297e-06, + "loss": 0.6057, "step": 11931 }, { - "epoch": 2.50199203187251, - "grad_norm": 4.432798716182812, - "learning_rate": 1.4109703827061282e-06, - "loss": 0.1273, + "epoch": 1.6848347924315166, + "grad_norm": 3.0295022580033124, + "learning_rate": 8.49651428350901e-06, + "loss": 0.4096, "step": 11932 }, { - "epoch": 2.5022017194380375, - "grad_norm": 4.79172093440161, - "learning_rate": 1.4098111806676095e-06, - "loss": 0.1554, + "epoch": 1.6849759954815025, + "grad_norm": 2.562232881069511, + "learning_rate": 8.495007176409084e-06, + "loss": 0.4406, "step": 11933 }, { - "epoch": 2.5024114070035646, - "grad_norm": 4.341485738331202, - "learning_rate": 1.4086524188911677e-06, - "loss": 0.1349, + "epoch": 1.6851171985314881, + "grad_norm": 3.6538137182320933, + "learning_rate": 8.493500104284539e-06, + "loss": 0.6067, "step": 11934 }, { - "epoch": 2.502621094569092, - "grad_norm": 3.915140412456316, - "learning_rate": 1.4074940974361872e-06, - "loss": 0.1279, + "epoch": 1.685258401581474, + "grad_norm": 3.143094142837082, + "learning_rate": 8.491993067170402e-06, + "loss": 0.5265, "step": 11935 }, { - "epoch": 2.5028307821346196, - "grad_norm": 4.968978162466264, - "learning_rate": 1.4063362163620397e-06, - "loss": 0.1358, + "epoch": 1.6853996046314599, + "grad_norm": 3.812238440942335, + "learning_rate": 8.490486065101698e-06, + "loss": 0.6259, "step": 11936 }, { - "epoch": 2.5030404697001467, - "grad_norm": 4.410629895339175, - "learning_rate": 1.4051787757280632e-06, - "loss": 0.1137, + "epoch": 1.6855408076814458, + "grad_norm": 3.696254718380954, + "learning_rate": 8.488979098113443e-06, + "loss": 0.5517, "step": 11937 }, { - "epoch": 2.5032501572656742, - "grad_norm": 4.811616931515607, - "learning_rate": 1.4040217755935847e-06, - "loss": 0.1526, + "epoch": 1.6856820107314316, + "grad_norm": 4.679432889335485, + "learning_rate": 8.487472166240665e-06, + "loss": 0.7741, "step": 11938 }, { - "epoch": 2.5034598448312018, - "grad_norm": 5.096220643223814, - "learning_rate": 1.402865216017899e-06, - "loss": 0.1355, + "epoch": 1.6858232137814175, + "grad_norm": 3.554304780013752, + "learning_rate": 8.485965269518376e-06, + "loss": 0.5494, "step": 11939 }, { - "epoch": 2.503669532396729, - "grad_norm": 3.1627960000059105, - "learning_rate": 1.4017090970602786e-06, - "loss": 0.0897, + "epoch": 1.6859644168314034, + "grad_norm": 3.4813361364161315, + "learning_rate": 8.484458407981601e-06, + "loss": 0.5278, "step": 11940 }, { - "epoch": 2.5038792199622564, - "grad_norm": 4.976500763802138, - "learning_rate": 1.4005534187799807e-06, - "loss": 0.1876, + "epoch": 1.6861056198813893, + "grad_norm": 3.591135728061469, + "learning_rate": 8.482951581665359e-06, + "loss": 0.5692, "step": 11941 }, { - "epoch": 2.5040889075277835, - "grad_norm": 5.218924275003913, - "learning_rate": 1.39939818123624e-06, - "loss": 0.1678, + "epoch": 1.6862468229313752, + "grad_norm": 4.142239881597264, + "learning_rate": 8.481444790604668e-06, + "loss": 0.6604, "step": 11942 }, { - "epoch": 2.504298595093311, - "grad_norm": 3.584119962900178, - "learning_rate": 1.3982433844882548e-06, - "loss": 0.1082, + "epoch": 1.686388025981361, + "grad_norm": 3.83618552375508, + "learning_rate": 8.479938034834544e-06, + "loss": 0.6222, "step": 11943 }, { - "epoch": 2.504508282658838, - "grad_norm": 4.01421327292469, - "learning_rate": 1.3970890285952155e-06, - "loss": 0.1482, + "epoch": 1.686529229031347, + "grad_norm": 3.1993426566388967, + "learning_rate": 8.478431314390002e-06, + "loss": 0.4609, "step": 11944 }, { - "epoch": 2.5047179702243656, - "grad_norm": 3.562057101592667, - "learning_rate": 1.3959351136162858e-06, - "loss": 0.1407, + "epoch": 1.6866704320813328, + "grad_norm": 3.4014673568017053, + "learning_rate": 8.47692462930606e-06, + "loss": 0.6253, "step": 11945 }, { - "epoch": 2.504927657789893, - "grad_norm": 3.533009160033275, - "learning_rate": 1.394781639610605e-06, - "loss": 0.133, + "epoch": 1.6868116351313187, + "grad_norm": 3.495760303803192, + "learning_rate": 8.475417979617732e-06, + "loss": 0.5142, "step": 11946 }, { - "epoch": 2.50513734535542, - "grad_norm": 3.8199708111494934, - "learning_rate": 1.3936286066372874e-06, - "loss": 0.101, + "epoch": 1.6869528381813046, + "grad_norm": 3.153674721075382, + "learning_rate": 8.473911365360034e-06, + "loss": 0.5628, "step": 11947 }, { - "epoch": 2.5053470329209477, - "grad_norm": 3.9010696471935677, - "learning_rate": 1.392476014755434e-06, - "loss": 0.1213, + "epoch": 1.6870940412312905, + "grad_norm": 3.3615557192352616, + "learning_rate": 8.472404786567974e-06, + "loss": 0.6918, "step": 11948 }, { - "epoch": 2.5055567204864753, - "grad_norm": 4.568045863687571, - "learning_rate": 1.391323864024111e-06, - "loss": 0.1584, + "epoch": 1.6872352442812764, + "grad_norm": 3.341137733312112, + "learning_rate": 8.470898243276567e-06, + "loss": 0.5935, "step": 11949 }, { - "epoch": 2.5057664080520023, - "grad_norm": 4.273464364393656, - "learning_rate": 1.3901721545023727e-06, - "loss": 0.135, + "epoch": 1.6873764473312622, + "grad_norm": 4.166899821663311, + "learning_rate": 8.469391735520824e-06, + "loss": 0.6654, "step": 11950 }, { - "epoch": 2.50597609561753, - "grad_norm": 4.957609641711073, - "learning_rate": 1.3890208862492449e-06, - "loss": 0.1421, + "epoch": 1.6875176503812481, + "grad_norm": 2.593401378475758, + "learning_rate": 8.467885263335758e-06, + "loss": 0.4129, "step": 11951 }, { - "epoch": 2.5061857831830574, - "grad_norm": 3.625858105636269, - "learning_rate": 1.3878700593237305e-06, - "loss": 0.1555, + "epoch": 1.687658853431234, + "grad_norm": 3.0162962430221807, + "learning_rate": 8.466378826756373e-06, + "loss": 0.4495, "step": 11952 }, { - "epoch": 2.5063954707485845, - "grad_norm": 3.7182816054675873, - "learning_rate": 1.386719673784811e-06, - "loss": 0.1388, + "epoch": 1.68780005648122, + "grad_norm": 4.630351245004714, + "learning_rate": 8.464872425817685e-06, + "loss": 0.6902, "step": 11953 }, { - "epoch": 2.506605158314112, - "grad_norm": 3.916620171108524, - "learning_rate": 1.3855697296914494e-06, - "loss": 0.1454, + "epoch": 1.6879412595312058, + "grad_norm": 3.5297394210807904, + "learning_rate": 8.463366060554698e-06, + "loss": 0.5952, "step": 11954 }, { - "epoch": 2.5068148458796395, - "grad_norm": 4.053578213200887, - "learning_rate": 1.3844202271025808e-06, - "loss": 0.1579, + "epoch": 1.6880824625811917, + "grad_norm": 4.213731713748684, + "learning_rate": 8.461859731002424e-06, + "loss": 0.5896, "step": 11955 }, { - "epoch": 2.5070245334451666, - "grad_norm": 3.7737396159521586, - "learning_rate": 1.383271166077117e-06, - "loss": 0.1041, + "epoch": 1.6882236656311775, + "grad_norm": 3.646529711848193, + "learning_rate": 8.460353437195864e-06, + "loss": 0.564, "step": 11956 }, { - "epoch": 2.507234221010694, - "grad_norm": 5.14656676329622, - "learning_rate": 1.3821225466739497e-06, - "loss": 0.1447, + "epoch": 1.6883648686811634, + "grad_norm": 4.012265138496627, + "learning_rate": 8.45884717917002e-06, + "loss": 0.5982, "step": 11957 }, { - "epoch": 2.5074439085762217, - "grad_norm": 5.132166351965843, - "learning_rate": 1.3809743689519517e-06, - "loss": 0.1484, + "epoch": 1.6885060717311493, + "grad_norm": 3.3979530396281064, + "learning_rate": 8.457340956959905e-06, + "loss": 0.5098, "step": 11958 }, { - "epoch": 2.5076535961417488, - "grad_norm": 4.81106712073615, - "learning_rate": 1.3798266329699662e-06, - "loss": 0.14, + "epoch": 1.6886472747811352, + "grad_norm": 3.662600791436109, + "learning_rate": 8.455834770600522e-06, + "loss": 0.4891, "step": 11959 }, { - "epoch": 2.5078632837072763, - "grad_norm": 5.235037893516624, - "learning_rate": 1.3786793387868136e-06, - "loss": 0.1846, + "epoch": 1.688788477831121, + "grad_norm": 3.953349824344211, + "learning_rate": 8.454328620126871e-06, + "loss": 0.5781, "step": 11960 }, { - "epoch": 2.5080729712728034, - "grad_norm": 4.257054935877029, - "learning_rate": 1.3775324864613005e-06, - "loss": 0.1157, + "epoch": 1.688929680881107, + "grad_norm": 3.6772406665479784, + "learning_rate": 8.452822505573952e-06, + "loss": 0.5629, "step": 11961 }, { - "epoch": 2.508282658838331, - "grad_norm": 4.103525328318305, - "learning_rate": 1.3763860760522007e-06, - "loss": 0.1356, + "epoch": 1.6890708839310928, + "grad_norm": 3.1185899892635205, + "learning_rate": 8.451316426976773e-06, + "loss": 0.4564, "step": 11962 }, { - "epoch": 2.508492346403858, - "grad_norm": 3.149713995941829, - "learning_rate": 1.3752401076182732e-06, - "loss": 0.0879, + "epoch": 1.6892120869810787, + "grad_norm": 3.813657972758221, + "learning_rate": 8.44981038437033e-06, + "loss": 0.6108, "step": 11963 }, { - "epoch": 2.5087020339693855, - "grad_norm": 4.429226229382079, - "learning_rate": 1.3740945812182493e-06, - "loss": 0.1267, + "epoch": 1.6893532900310646, + "grad_norm": 3.2089515395563075, + "learning_rate": 8.448304377789628e-06, + "loss": 0.376, "step": 11964 }, { - "epoch": 2.508911721534913, - "grad_norm": 6.547600176378466, - "learning_rate": 1.372949496910837e-06, - "loss": 0.1851, + "epoch": 1.6894944930810505, + "grad_norm": 3.9801835298305046, + "learning_rate": 8.44679840726966e-06, + "loss": 0.5776, "step": 11965 }, { - "epoch": 2.50912140910044, - "grad_norm": 5.478268930419371, - "learning_rate": 1.3718048547547247e-06, - "loss": 0.1578, + "epoch": 1.6896356961310364, + "grad_norm": 3.8884405820722763, + "learning_rate": 8.445292472845423e-06, + "loss": 0.5844, "step": 11966 }, { - "epoch": 2.5093310966659677, - "grad_norm": 3.4089511869476508, - "learning_rate": 1.3706606548085843e-06, - "loss": 0.1025, + "epoch": 1.6897768991810223, + "grad_norm": 3.1653196236020205, + "learning_rate": 8.44378657455192e-06, + "loss": 0.4808, "step": 11967 }, { - "epoch": 2.509540784231495, - "grad_norm": 4.272993198373761, - "learning_rate": 1.3695168971310468e-06, - "loss": 0.1182, + "epoch": 1.6899181022310081, + "grad_norm": 4.127114579013589, + "learning_rate": 8.442280712424146e-06, + "loss": 0.6901, "step": 11968 }, { - "epoch": 2.5097504717970223, - "grad_norm": 3.8687937158312473, - "learning_rate": 1.3683735817807375e-06, - "loss": 0.123, + "epoch": 1.690059305280994, + "grad_norm": 4.033862980037114, + "learning_rate": 8.440774886497091e-06, + "loss": 0.666, "step": 11969 }, { - "epoch": 2.50996015936255, - "grad_norm": 5.116617846085351, - "learning_rate": 1.3672307088162552e-06, - "loss": 0.1805, + "epoch": 1.69020050833098, + "grad_norm": 3.8528238498850946, + "learning_rate": 8.439269096805758e-06, + "loss": 0.5511, "step": 11970 }, { - "epoch": 2.5101698469280773, - "grad_norm": 3.416887576970437, - "learning_rate": 1.3660882782961716e-06, - "loss": 0.1152, + "epoch": 1.6903417113809658, + "grad_norm": 2.8284339327128256, + "learning_rate": 8.437763343385139e-06, + "loss": 0.4253, "step": 11971 }, { - "epoch": 2.5103795344936044, - "grad_norm": 4.35034445318283, - "learning_rate": 1.3649462902790366e-06, - "loss": 0.1452, + "epoch": 1.6904829144309517, + "grad_norm": 3.3682359222072615, + "learning_rate": 8.436257626270225e-06, + "loss": 0.4953, "step": 11972 }, { - "epoch": 2.510589222059132, - "grad_norm": 5.142296630161542, - "learning_rate": 1.3638047448233828e-06, - "loss": 0.1513, + "epoch": 1.6906241174809375, + "grad_norm": 3.71691343224829, + "learning_rate": 8.434751945496006e-06, + "loss": 0.6153, "step": 11973 }, { - "epoch": 2.5107989096246595, - "grad_norm": 4.3035813605968425, - "learning_rate": 1.3626636419877127e-06, - "loss": 0.15, + "epoch": 1.6907653205309234, + "grad_norm": 3.7289965768185294, + "learning_rate": 8.433246301097477e-06, + "loss": 0.6627, "step": 11974 }, { - "epoch": 2.5110085971901865, - "grad_norm": 4.168059628049141, - "learning_rate": 1.361522981830513e-06, - "loss": 0.1372, + "epoch": 1.6909065235809093, + "grad_norm": 3.148609326314491, + "learning_rate": 8.431740693109624e-06, + "loss": 0.4549, "step": 11975 }, { - "epoch": 2.511218284755714, - "grad_norm": 4.294870549754423, - "learning_rate": 1.3603827644102442e-06, - "loss": 0.1195, + "epoch": 1.6910477266308952, + "grad_norm": 3.302838609807822, + "learning_rate": 8.430235121567444e-06, + "loss": 0.5476, "step": 11976 }, { - "epoch": 2.5114279723212416, - "grad_norm": 4.442401668544624, - "learning_rate": 1.359242989785341e-06, - "loss": 0.1594, + "epoch": 1.691188929680881, + "grad_norm": 4.193818876843916, + "learning_rate": 8.42872958650592e-06, + "loss": 0.549, "step": 11977 }, { - "epoch": 2.5116376598867687, - "grad_norm": 4.309957658452955, - "learning_rate": 1.3581036580142203e-06, - "loss": 0.1546, + "epoch": 1.691330132730867, + "grad_norm": 3.7707387315257583, + "learning_rate": 8.42722408796004e-06, + "loss": 0.5613, "step": 11978 }, { - "epoch": 2.511847347452296, - "grad_norm": 3.985671009795197, - "learning_rate": 1.356964769155279e-06, - "loss": 0.1014, + "epoch": 1.6914713357808528, + "grad_norm": 4.104939895534065, + "learning_rate": 8.425718625964796e-06, + "loss": 0.5696, "step": 11979 }, { - "epoch": 2.5120570350178233, - "grad_norm": 4.412188089368526, - "learning_rate": 1.3558263232668834e-06, - "loss": 0.1645, + "epoch": 1.6916125388308387, + "grad_norm": 4.471213485672427, + "learning_rate": 8.424213200555171e-06, + "loss": 0.6467, "step": 11980 }, { - "epoch": 2.512266722583351, - "grad_norm": 5.79969767940507, - "learning_rate": 1.3546883204073791e-06, - "loss": 0.1546, + "epoch": 1.6917537418808246, + "grad_norm": 3.5709493862125705, + "learning_rate": 8.422707811766153e-06, + "loss": 0.5402, "step": 11981 }, { - "epoch": 2.512476410148878, - "grad_norm": 4.120770821190215, - "learning_rate": 1.353550760635095e-06, - "loss": 0.1303, + "epoch": 1.6918949449308105, + "grad_norm": 4.186321471820759, + "learning_rate": 8.42120245963272e-06, + "loss": 0.5831, "step": 11982 }, { - "epoch": 2.5126860977144054, - "grad_norm": 4.666419260275762, - "learning_rate": 1.352413644008329e-06, - "loss": 0.1216, + "epoch": 1.6920361479807964, + "grad_norm": 4.426383209009122, + "learning_rate": 8.419697144189861e-06, + "loss": 0.5967, "step": 11983 }, { - "epoch": 2.512895785279933, - "grad_norm": 4.828422451771381, - "learning_rate": 1.3512769705853645e-06, - "loss": 0.1379, + "epoch": 1.6921773510307823, + "grad_norm": 3.6132591871042257, + "learning_rate": 8.418191865472559e-06, + "loss": 0.4595, "step": 11984 }, { - "epoch": 2.51310547284546, - "grad_norm": 5.226512129823166, - "learning_rate": 1.350140740424456e-06, - "loss": 0.1472, + "epoch": 1.6923185540807681, + "grad_norm": 3.5302187678725034, + "learning_rate": 8.416686623515794e-06, + "loss": 0.5307, "step": 11985 }, { - "epoch": 2.5133151604109876, - "grad_norm": 3.553453144006635, - "learning_rate": 1.3490049535838346e-06, - "loss": 0.1275, + "epoch": 1.692459757130754, + "grad_norm": 3.543861937630865, + "learning_rate": 8.415181418354548e-06, + "loss": 0.5766, "step": 11986 }, { - "epoch": 2.513524847976515, - "grad_norm": 3.909624118065923, - "learning_rate": 1.3478696101217147e-06, - "loss": 0.1281, + "epoch": 1.69260096018074, + "grad_norm": 3.190792974932769, + "learning_rate": 8.4136762500238e-06, + "loss": 0.4955, "step": 11987 }, { - "epoch": 2.513734535542042, - "grad_norm": 3.401753074454119, - "learning_rate": 1.3467347100962846e-06, - "loss": 0.139, + "epoch": 1.6927421632307258, + "grad_norm": 3.628818886049226, + "learning_rate": 8.412171118558534e-06, + "loss": 0.5936, "step": 11988 }, { - "epoch": 2.5139442231075697, - "grad_norm": 4.843046943435912, - "learning_rate": 1.3456002535657075e-06, - "loss": 0.1664, + "epoch": 1.6928833662807117, + "grad_norm": 4.0765163571882645, + "learning_rate": 8.410666023993727e-06, + "loss": 0.6075, "step": 11989 }, { - "epoch": 2.5141539106730972, - "grad_norm": 4.106305895195221, - "learning_rate": 1.344466240588127e-06, - "loss": 0.1149, + "epoch": 1.6930245693306976, + "grad_norm": 3.7392567212805994, + "learning_rate": 8.409160966364351e-06, + "loss": 0.5722, "step": 11990 }, { - "epoch": 2.5143635982386243, - "grad_norm": 3.5149617416525523, - "learning_rate": 1.3433326712216655e-06, - "loss": 0.1361, + "epoch": 1.6931657723806834, + "grad_norm": 3.1492757431061995, + "learning_rate": 8.40765594570539e-06, + "loss": 0.4482, "step": 11991 }, { - "epoch": 2.514573285804152, - "grad_norm": 4.009176114200098, - "learning_rate": 1.3421995455244197e-06, - "loss": 0.1114, + "epoch": 1.6933069754306693, + "grad_norm": 3.901422606042593, + "learning_rate": 8.406150962051813e-06, + "loss": 0.632, "step": 11992 }, { - "epoch": 2.5147829733696794, - "grad_norm": 6.195707950592568, - "learning_rate": 1.3410668635544611e-06, - "loss": 0.1424, + "epoch": 1.6934481784806552, + "grad_norm": 3.6485869025523656, + "learning_rate": 8.404646015438602e-06, + "loss": 0.5655, "step": 11993 }, { - "epoch": 2.5149926609352065, - "grad_norm": 4.342843562877368, - "learning_rate": 1.3399346253698453e-06, - "loss": 0.1674, + "epoch": 1.693589381530641, + "grad_norm": 3.337371130768836, + "learning_rate": 8.40314110590073e-06, + "loss": 0.5529, "step": 11994 }, { - "epoch": 2.515202348500734, - "grad_norm": 4.2814391036323824, - "learning_rate": 1.3388028310285984e-06, - "loss": 0.1472, + "epoch": 1.693730584580627, + "grad_norm": 3.072518133162186, + "learning_rate": 8.401636233473164e-06, + "loss": 0.4901, "step": 11995 }, { - "epoch": 2.5154120360662615, - "grad_norm": 3.8543805964456577, - "learning_rate": 1.3376714805887313e-06, - "loss": 0.1184, + "epoch": 1.6938717876306129, + "grad_norm": 4.066319098280157, + "learning_rate": 8.400131398190887e-06, + "loss": 0.6101, "step": 11996 }, { - "epoch": 2.5156217236317886, - "grad_norm": 5.104708674824505, - "learning_rate": 1.3365405741082238e-06, - "loss": 0.1436, + "epoch": 1.6940129906805987, + "grad_norm": 3.091061795611824, + "learning_rate": 8.398626600088866e-06, + "loss": 0.5372, "step": 11997 }, { - "epoch": 2.515831411197316, - "grad_norm": 4.311931555366447, - "learning_rate": 1.3354101116450358e-06, - "loss": 0.1526, + "epoch": 1.6941541937305846, + "grad_norm": 3.408413783819742, + "learning_rate": 8.397121839202069e-06, + "loss": 0.6069, "step": 11998 }, { - "epoch": 2.516041098762843, - "grad_norm": 4.714281472228413, - "learning_rate": 1.3342800932571088e-06, - "loss": 0.1513, + "epoch": 1.6942953967805705, + "grad_norm": 3.530090811552823, + "learning_rate": 8.395617115565468e-06, + "loss": 0.5876, "step": 11999 }, { - "epoch": 2.5162507863283707, - "grad_norm": 4.154490004019689, - "learning_rate": 1.3331505190023575e-06, - "loss": 0.0979, + "epoch": 1.6944365998305564, + "grad_norm": 3.1920548127487316, + "learning_rate": 8.394112429214032e-06, + "loss": 0.4328, "step": 12000 }, { - "epoch": 2.516460473893898, - "grad_norm": 3.556873884857982, - "learning_rate": 1.3320213889386758e-06, - "loss": 0.0952, + "epoch": 1.6945778028805423, + "grad_norm": 4.896839745851315, + "learning_rate": 8.39260778018273e-06, + "loss": 0.9414, "step": 12001 }, { - "epoch": 2.5166701614594253, - "grad_norm": 6.4603490518250855, - "learning_rate": 1.3308927031239283e-06, - "loss": 0.1621, + "epoch": 1.6947190059305282, + "grad_norm": 2.625872035510413, + "learning_rate": 8.391103168506529e-06, + "loss": 0.4458, "step": 12002 }, { - "epoch": 2.516879849024953, - "grad_norm": 3.5325152788296665, - "learning_rate": 1.3297644616159678e-06, - "loss": 0.1358, + "epoch": 1.694860208980514, + "grad_norm": 3.6048671729879818, + "learning_rate": 8.389598594220395e-06, + "loss": 0.5273, "step": 12003 }, { - "epoch": 2.51708953659048, - "grad_norm": 4.93648754299256, - "learning_rate": 1.3286366644726167e-06, - "loss": 0.1642, + "epoch": 1.6950014120305, + "grad_norm": 3.2422973570379474, + "learning_rate": 8.388094057359295e-06, + "loss": 0.4852, "step": 12004 }, { - "epoch": 2.5172992241560075, - "grad_norm": 4.869240160726903, - "learning_rate": 1.3275093117516736e-06, - "loss": 0.1486, + "epoch": 1.6951426150804858, + "grad_norm": 3.5559299875739123, + "learning_rate": 8.386589557958192e-06, + "loss": 0.6569, "step": 12005 }, { - "epoch": 2.517508911721535, - "grad_norm": 4.149418153152551, - "learning_rate": 1.326382403510923e-06, - "loss": 0.1435, + "epoch": 1.6952838181304717, + "grad_norm": 3.709460825178122, + "learning_rate": 8.385085096052053e-06, + "loss": 0.5762, "step": 12006 }, { - "epoch": 2.517718599287062, - "grad_norm": 3.2486215991616367, - "learning_rate": 1.3252559398081143e-06, - "loss": 0.1182, + "epoch": 1.6954250211804576, + "grad_norm": 4.413747350314546, + "learning_rate": 8.383580671675839e-06, + "loss": 0.6347, "step": 12007 }, { - "epoch": 2.5179282868525896, - "grad_norm": 4.42344339710403, - "learning_rate": 1.3241299207009884e-06, - "loss": 0.1441, + "epoch": 1.6955662242304435, + "grad_norm": 3.4335817173708554, + "learning_rate": 8.38207628486451e-06, + "loss": 0.5083, "step": 12008 }, { - "epoch": 2.518137974418117, - "grad_norm": 3.5949422929091317, - "learning_rate": 1.3230043462472497e-06, - "loss": 0.1115, + "epoch": 1.6957074272804293, + "grad_norm": 4.666933867619174, + "learning_rate": 8.380571935653029e-06, + "loss": 0.7187, "step": 12009 }, { - "epoch": 2.5183476619836442, - "grad_norm": 3.7304959039049366, - "learning_rate": 1.3218792165045868e-06, - "loss": 0.1156, + "epoch": 1.6958486303304152, + "grad_norm": 3.705435840967929, + "learning_rate": 8.379067624076358e-06, + "loss": 0.6462, "step": 12010 }, { - "epoch": 2.5185573495491718, - "grad_norm": 4.615123594995057, - "learning_rate": 1.3207545315306647e-06, - "loss": 0.1259, + "epoch": 1.695989833380401, + "grad_norm": 3.1508963033032775, + "learning_rate": 8.377563350169456e-06, + "loss": 0.4522, "step": 12011 }, { - "epoch": 2.5187670371146993, - "grad_norm": 4.589079645450028, - "learning_rate": 1.3196302913831294e-06, - "loss": 0.1156, + "epoch": 1.696131036430387, + "grad_norm": 3.787420179070938, + "learning_rate": 8.376059113967279e-06, + "loss": 0.5155, "step": 12012 }, { - "epoch": 2.5189767246802264, - "grad_norm": 4.313588161138957, - "learning_rate": 1.318506496119596e-06, - "loss": 0.1217, + "epoch": 1.6962722394803729, + "grad_norm": 3.1270551577841146, + "learning_rate": 8.374554915504787e-06, + "loss": 0.4895, "step": 12013 }, { - "epoch": 2.519186412245754, - "grad_norm": 3.4927513074928407, - "learning_rate": 1.3173831457976605e-06, - "loss": 0.1244, + "epoch": 1.6964134425303588, + "grad_norm": 3.0872476778712374, + "learning_rate": 8.373050754816942e-06, + "loss": 0.3875, "step": 12014 }, { - "epoch": 2.5193960998112814, - "grad_norm": 4.340219023502884, - "learning_rate": 1.316260240474897e-06, - "loss": 0.1357, + "epoch": 1.6965546455803446, + "grad_norm": 4.539227383276902, + "learning_rate": 8.37154663193869e-06, + "loss": 0.6069, "step": 12015 }, { - "epoch": 2.5196057873768085, - "grad_norm": 4.089764962489509, - "learning_rate": 1.3151377802088605e-06, - "loss": 0.1335, + "epoch": 1.6966958486303305, + "grad_norm": 3.523181698555804, + "learning_rate": 8.370042546904992e-06, + "loss": 0.48, "step": 12016 }, { - "epoch": 2.519815474942336, - "grad_norm": 4.450427337379712, - "learning_rate": 1.3140157650570762e-06, - "loss": 0.1692, + "epoch": 1.6968370516803164, + "grad_norm": 4.015778502127796, + "learning_rate": 8.368538499750803e-06, + "loss": 0.6202, "step": 12017 }, { - "epoch": 2.520025162507863, - "grad_norm": 4.7051551218925844, - "learning_rate": 1.3128941950770457e-06, - "loss": 0.145, + "epoch": 1.6969782547303023, + "grad_norm": 3.9183380504274234, + "learning_rate": 8.36703449051107e-06, + "loss": 0.5462, "step": 12018 }, { - "epoch": 2.5202348500733907, - "grad_norm": 5.663195163320106, - "learning_rate": 1.3117730703262576e-06, - "loss": 0.1676, + "epoch": 1.6971194577802882, + "grad_norm": 3.7154730430025285, + "learning_rate": 8.365530519220753e-06, + "loss": 0.5657, "step": 12019 }, { - "epoch": 2.5204445376389177, - "grad_norm": 4.918368963329637, - "learning_rate": 1.310652390862166e-06, - "loss": 0.1763, + "epoch": 1.697260660830274, + "grad_norm": 2.8318631002108927, + "learning_rate": 8.364026585914802e-06, + "loss": 0.4155, "step": 12020 }, { - "epoch": 2.5206542252044453, - "grad_norm": 4.095045628502151, - "learning_rate": 1.3095321567422125e-06, - "loss": 0.1267, + "epoch": 1.69740186388026, + "grad_norm": 3.2430120254574146, + "learning_rate": 8.362522690628165e-06, + "loss": 0.4545, "step": 12021 }, { - "epoch": 2.520863912769973, - "grad_norm": 4.391280609356593, - "learning_rate": 1.3084123680238093e-06, - "loss": 0.1359, + "epoch": 1.6975430669302458, + "grad_norm": 3.4776238402179014, + "learning_rate": 8.361018833395792e-06, + "loss": 0.4995, "step": 12022 }, { - "epoch": 2.5210736003355, - "grad_norm": 3.9288519600029908, - "learning_rate": 1.3072930247643446e-06, - "loss": 0.1255, + "epoch": 1.6976842699802317, + "grad_norm": 4.08553572457225, + "learning_rate": 8.35951501425264e-06, + "loss": 0.5972, "step": 12023 }, { - "epoch": 2.5212832879010274, - "grad_norm": 4.777140562168834, - "learning_rate": 1.306174127021189e-06, - "loss": 0.1261, + "epoch": 1.6978254730302176, + "grad_norm": 3.920123467933314, + "learning_rate": 8.358011233233646e-06, + "loss": 0.5979, "step": 12024 }, { - "epoch": 2.521492975466555, - "grad_norm": 4.355173517921103, - "learning_rate": 1.3050556748516928e-06, - "loss": 0.1759, + "epoch": 1.6979666760802035, + "grad_norm": 3.8094740367571287, + "learning_rate": 8.356507490373761e-06, + "loss": 0.6649, "step": 12025 }, { - "epoch": 2.521702663032082, - "grad_norm": 4.45975007443483, - "learning_rate": 1.3039376683131699e-06, - "loss": 0.1403, + "epoch": 1.6981078791301893, + "grad_norm": 3.6749445415920676, + "learning_rate": 8.355003785707932e-06, + "loss": 0.4896, "step": 12026 }, { - "epoch": 2.5219123505976095, - "grad_norm": 4.598514329648075, - "learning_rate": 1.3028201074629232e-06, - "loss": 0.162, + "epoch": 1.6982490821801752, + "grad_norm": 3.663610078213634, + "learning_rate": 8.353500119271106e-06, + "loss": 0.5436, "step": 12027 }, { - "epoch": 2.522122038163137, - "grad_norm": 4.305533104953591, - "learning_rate": 1.3017029923582325e-06, - "loss": 0.1732, + "epoch": 1.6983902852301611, + "grad_norm": 3.515624139345645, + "learning_rate": 8.351996491098227e-06, + "loss": 0.6488, "step": 12028 }, { - "epoch": 2.522331725728664, - "grad_norm": 3.3156434984605694, - "learning_rate": 1.300586323056351e-06, - "loss": 0.1259, + "epoch": 1.698531488280147, + "grad_norm": 3.3996488399975986, + "learning_rate": 8.350492901224237e-06, + "loss": 0.5146, "step": 12029 }, { - "epoch": 2.5225414132941917, - "grad_norm": 5.20202719323522, - "learning_rate": 1.299470099614506e-06, - "loss": 0.1702, + "epoch": 1.6986726913301329, + "grad_norm": 3.402353464726417, + "learning_rate": 8.348989349684077e-06, + "loss": 0.4307, "step": 12030 }, { - "epoch": 2.522751100859719, - "grad_norm": 5.03580238929334, - "learning_rate": 1.2983543220899109e-06, - "loss": 0.1698, + "epoch": 1.6988138943801188, + "grad_norm": 4.288558881589772, + "learning_rate": 8.347485836512696e-06, + "loss": 0.6446, "step": 12031 }, { - "epoch": 2.5229607884252463, - "grad_norm": 3.654331092047329, - "learning_rate": 1.2972389905397464e-06, - "loss": 0.1164, + "epoch": 1.6989550974301046, + "grad_norm": 3.716701170843746, + "learning_rate": 8.345982361745029e-06, + "loss": 0.4738, "step": 12032 }, { - "epoch": 2.523170475990774, - "grad_norm": 3.9086765493936197, - "learning_rate": 1.2961241050211803e-06, - "loss": 0.1512, + "epoch": 1.6990963004800905, + "grad_norm": 4.435573069445306, + "learning_rate": 8.344478925416017e-06, + "loss": 0.643, "step": 12033 }, { - "epoch": 2.5233801635563013, - "grad_norm": 5.749241166765074, - "learning_rate": 1.2950096655913502e-06, - "loss": 0.19, + "epoch": 1.6992375035300764, + "grad_norm": 4.15979042953053, + "learning_rate": 8.342975527560601e-06, + "loss": 0.5794, "step": 12034 }, { - "epoch": 2.5235898511218284, - "grad_norm": 4.504616013744337, - "learning_rate": 1.2938956723073702e-06, - "loss": 0.1533, + "epoch": 1.6993787065800623, + "grad_norm": 5.12326878406178, + "learning_rate": 8.341472168213714e-06, + "loss": 0.729, "step": 12035 }, { - "epoch": 2.523799538687356, - "grad_norm": 4.314661691316422, - "learning_rate": 1.2927821252263374e-06, - "loss": 0.1602, + "epoch": 1.699519909630048, + "grad_norm": 3.3165732673117296, + "learning_rate": 8.339968847410301e-06, + "loss": 0.5068, "step": 12036 }, { - "epoch": 2.524009226252883, - "grad_norm": 4.136352361865108, - "learning_rate": 1.2916690244053242e-06, - "loss": 0.1492, + "epoch": 1.6996611126800338, + "grad_norm": 3.8424771936465563, + "learning_rate": 8.338465565185295e-06, + "loss": 0.5853, "step": 12037 }, { - "epoch": 2.5242189138184106, - "grad_norm": 4.595024843800442, - "learning_rate": 1.290556369901378e-06, - "loss": 0.1283, + "epoch": 1.6998023157300197, + "grad_norm": 3.9273386462087925, + "learning_rate": 8.33696232157363e-06, + "loss": 0.5612, "step": 12038 }, { - "epoch": 2.524428601383938, - "grad_norm": 3.8727865206151737, - "learning_rate": 1.2894441617715203e-06, - "loss": 0.1525, + "epoch": 1.6999435187800056, + "grad_norm": 3.6001428234598616, + "learning_rate": 8.335459116610243e-06, + "loss": 0.631, "step": 12039 }, { - "epoch": 2.524638288949465, - "grad_norm": 3.9519770389262066, - "learning_rate": 1.2883324000727604e-06, - "loss": 0.1117, + "epoch": 1.7000847218299915, + "grad_norm": 3.749783494577981, + "learning_rate": 8.33395595033007e-06, + "loss": 0.6889, "step": 12040 }, { - "epoch": 2.5248479765149927, - "grad_norm": 4.368750688028554, - "learning_rate": 1.2872210848620714e-06, - "loss": 0.1499, + "epoch": 1.7002259248799774, + "grad_norm": 3.532879681610789, + "learning_rate": 8.33245282276804e-06, + "loss": 0.6552, "step": 12041 }, { - "epoch": 2.52505766408052, - "grad_norm": 3.555174894899924, - "learning_rate": 1.2861102161964168e-06, - "loss": 0.123, + "epoch": 1.7003671279299633, + "grad_norm": 3.635617782995768, + "learning_rate": 8.330949733959084e-06, + "loss": 0.6501, "step": 12042 }, { - "epoch": 2.5252673516460473, - "grad_norm": 3.9178413015490037, - "learning_rate": 1.2849997941327263e-06, - "loss": 0.1205, + "epoch": 1.7005083309799491, + "grad_norm": 3.278750401326439, + "learning_rate": 8.329446683938137e-06, + "loss": 0.5318, "step": 12043 }, { - "epoch": 2.525477039211575, - "grad_norm": 4.067045514018608, - "learning_rate": 1.2838898187279092e-06, - "loss": 0.1321, + "epoch": 1.700649534029935, + "grad_norm": 4.171329853229081, + "learning_rate": 8.327943672740126e-06, + "loss": 0.6559, "step": 12044 }, { - "epoch": 2.525686726777102, - "grad_norm": 6.014739319544709, - "learning_rate": 1.2827802900388565e-06, - "loss": 0.1853, + "epoch": 1.700790737079921, + "grad_norm": 4.093493653682459, + "learning_rate": 8.326440700399985e-06, + "loss": 0.6663, "step": 12045 }, { - "epoch": 2.5258964143426295, - "grad_norm": 4.17066000362069, - "learning_rate": 1.281671208122438e-06, - "loss": 0.1544, + "epoch": 1.7009319401299068, + "grad_norm": 3.5065347595147016, + "learning_rate": 8.324937766952638e-06, + "loss": 0.513, "step": 12046 }, { - "epoch": 2.526106101908157, - "grad_norm": 4.341134418176675, - "learning_rate": 1.2805625730354864e-06, - "loss": 0.1242, + "epoch": 1.7010731431798927, + "grad_norm": 3.6517954017906176, + "learning_rate": 8.323434872433011e-06, + "loss": 0.6003, "step": 12047 }, { - "epoch": 2.526315789473684, - "grad_norm": 3.922347868807532, - "learning_rate": 1.2794543848348263e-06, - "loss": 0.1365, + "epoch": 1.7012143462298785, + "grad_norm": 4.225272733663781, + "learning_rate": 8.32193201687604e-06, + "loss": 0.6019, "step": 12048 }, { - "epoch": 2.5265254770392116, - "grad_norm": 4.054015972457748, - "learning_rate": 1.2783466435772561e-06, - "loss": 0.1409, + "epoch": 1.7013555492798644, + "grad_norm": 3.1345018115954746, + "learning_rate": 8.320429200316638e-06, + "loss": 0.5076, "step": 12049 }, { - "epoch": 2.526735164604739, - "grad_norm": 4.0070372787037165, - "learning_rate": 1.2772393493195478e-06, - "loss": 0.1191, + "epoch": 1.7014967523298503, + "grad_norm": 3.2586703089023796, + "learning_rate": 8.31892642278974e-06, + "loss": 0.4971, "step": 12050 }, { - "epoch": 2.526944852170266, - "grad_norm": 5.828187932896821, - "learning_rate": 1.276132502118449e-06, - "loss": 0.1759, + "epoch": 1.7016379553798362, + "grad_norm": 3.1505818877166183, + "learning_rate": 8.317423684330263e-06, + "loss": 0.5951, "step": 12051 }, { - "epoch": 2.5271545397357937, - "grad_norm": 4.536732875710181, - "learning_rate": 1.2750261020306932e-06, - "loss": 0.1559, + "epoch": 1.701779158429822, + "grad_norm": 3.5226273104754986, + "learning_rate": 8.315920984973134e-06, + "loss": 0.6036, "step": 12052 }, { - "epoch": 2.5273642273013213, - "grad_norm": 4.102793797323526, - "learning_rate": 1.2739201491129793e-06, - "loss": 0.1697, + "epoch": 1.701920361479808, + "grad_norm": 4.409133754091182, + "learning_rate": 8.314418324753274e-06, + "loss": 0.6299, "step": 12053 }, { - "epoch": 2.5275739148668483, - "grad_norm": 3.5724096703256745, - "learning_rate": 1.2728146434219945e-06, - "loss": 0.1179, + "epoch": 1.7020615645297938, + "grad_norm": 3.7511346204498572, + "learning_rate": 8.312915703705603e-06, + "loss": 0.6291, "step": 12054 }, { - "epoch": 2.527783602432376, - "grad_norm": 4.669263104624574, - "learning_rate": 1.2717095850143957e-06, - "loss": 0.163, + "epoch": 1.7022027675797797, + "grad_norm": 3.1977204581923235, + "learning_rate": 8.311413121865044e-06, + "loss": 0.4962, "step": 12055 }, { - "epoch": 2.527993289997903, - "grad_norm": 4.6980404046320885, - "learning_rate": 1.2706049739468173e-06, - "loss": 0.1585, + "epoch": 1.7023439706297656, + "grad_norm": 3.6153937999220846, + "learning_rate": 8.309910579266514e-06, + "loss": 0.5574, "step": 12056 }, { - "epoch": 2.5282029775634305, - "grad_norm": 3.466171322114327, - "learning_rate": 1.2695008102758745e-06, - "loss": 0.1099, + "epoch": 1.7024851736797515, + "grad_norm": 3.672048776242605, + "learning_rate": 8.308408075944935e-06, + "loss": 0.594, "step": 12057 }, { - "epoch": 2.528412665128958, - "grad_norm": 3.8391463490694577, - "learning_rate": 1.2683970940581592e-06, - "loss": 0.1324, + "epoch": 1.7026263767297374, + "grad_norm": 4.491399068546746, + "learning_rate": 8.306905611935221e-06, + "loss": 0.5801, "step": 12058 }, { - "epoch": 2.528622352694485, - "grad_norm": 3.974302202206801, - "learning_rate": 1.2672938253502376e-06, - "loss": 0.1326, + "epoch": 1.7027675797797233, + "grad_norm": 3.3535772981552103, + "learning_rate": 8.305403187272288e-06, + "loss": 0.5142, "step": 12059 }, { - "epoch": 2.5288320402600126, - "grad_norm": 2.9930757205397143, - "learning_rate": 1.2661910042086511e-06, - "loss": 0.09, + "epoch": 1.7029087828297091, + "grad_norm": 3.666192198898361, + "learning_rate": 8.303900801991052e-06, + "loss": 0.5106, "step": 12060 }, { - "epoch": 2.5290417278255397, - "grad_norm": 4.293084581468155, - "learning_rate": 1.2650886306899257e-06, - "loss": 0.1623, + "epoch": 1.703049985879695, + "grad_norm": 2.8242289968525878, + "learning_rate": 8.302398456126429e-06, + "loss": 0.3964, "step": 12061 }, { - "epoch": 2.5292514153910672, - "grad_norm": 4.316655061744194, - "learning_rate": 1.2639867048505561e-06, - "loss": 0.1454, + "epoch": 1.703191188929681, + "grad_norm": 4.164194688050874, + "learning_rate": 8.300896149713334e-06, + "loss": 0.6733, "step": 12062 }, { - "epoch": 2.5294611029565948, - "grad_norm": 4.052800192011256, - "learning_rate": 1.2628852267470214e-06, - "loss": 0.1489, + "epoch": 1.7033323919796668, + "grad_norm": 2.6959153575720403, + "learning_rate": 8.299393882786679e-06, + "loss": 0.4173, "step": 12063 }, { - "epoch": 2.529670790522122, - "grad_norm": 4.266121938206998, - "learning_rate": 1.2617841964357735e-06, - "loss": 0.1304, + "epoch": 1.7034735950296527, + "grad_norm": 4.0850340507385585, + "learning_rate": 8.297891655381375e-06, + "loss": 0.6352, "step": 12064 }, { - "epoch": 2.5298804780876494, - "grad_norm": 4.010083714910853, - "learning_rate": 1.2606836139732381e-06, - "loss": 0.1256, + "epoch": 1.7036147980796386, + "grad_norm": 3.376835139144042, + "learning_rate": 8.296389467532338e-06, + "loss": 0.5652, "step": 12065 }, { - "epoch": 2.530090165653177, - "grad_norm": 5.1010578544502465, - "learning_rate": 1.2595834794158278e-06, - "loss": 0.1878, + "epoch": 1.7037560011296244, + "grad_norm": 3.7693602300000935, + "learning_rate": 8.294887319274467e-06, + "loss": 0.5357, "step": 12066 }, { - "epoch": 2.530299853218704, - "grad_norm": 4.626352693511925, - "learning_rate": 1.2584837928199234e-06, - "loss": 0.1639, + "epoch": 1.7038972041796103, + "grad_norm": 3.376901775109604, + "learning_rate": 8.29338521064268e-06, + "loss": 0.5715, "step": 12067 }, { - "epoch": 2.5305095407842315, - "grad_norm": 4.109030503348539, - "learning_rate": 1.257384554241884e-06, - "loss": 0.1417, + "epoch": 1.7040384072295962, + "grad_norm": 3.518801575256679, + "learning_rate": 8.291883141671888e-06, + "loss": 0.5103, "step": 12068 }, { - "epoch": 2.530719228349759, - "grad_norm": 4.345874135370887, - "learning_rate": 1.2562857637380487e-06, - "loss": 0.1285, + "epoch": 1.704179610279582, + "grad_norm": 3.2649482779591725, + "learning_rate": 8.290381112396989e-06, + "loss": 0.4803, "step": 12069 }, { - "epoch": 2.530928915915286, - "grad_norm": 3.897247403588684, - "learning_rate": 1.2551874213647363e-06, - "loss": 0.127, + "epoch": 1.704320813329568, + "grad_norm": 3.7257827447448, + "learning_rate": 8.288879122852897e-06, + "loss": 0.6265, "step": 12070 }, { - "epoch": 2.5311386034808137, - "grad_norm": 5.046250749443973, - "learning_rate": 1.254089527178235e-06, - "loss": 0.178, + "epoch": 1.7044620163795536, + "grad_norm": 3.8864217531436775, + "learning_rate": 8.287377173074515e-06, + "loss": 0.643, "step": 12071 }, { - "epoch": 2.531348291046341, - "grad_norm": 3.923858141060383, - "learning_rate": 1.2529920812348129e-06, - "loss": 0.1153, + "epoch": 1.7046032194295395, + "grad_norm": 2.6843198184670163, + "learning_rate": 8.285875263096746e-06, + "loss": 0.4418, "step": 12072 }, { - "epoch": 2.5315579786118683, - "grad_norm": 4.857445168353048, - "learning_rate": 1.2518950835907196e-06, - "loss": 0.1494, + "epoch": 1.7047444224795254, + "grad_norm": 3.3659497245912977, + "learning_rate": 8.284373392954496e-06, + "loss": 0.5174, "step": 12073 }, { - "epoch": 2.531767666177396, - "grad_norm": 3.876565023185596, - "learning_rate": 1.2507985343021734e-06, - "loss": 0.1229, + "epoch": 1.7048856255295113, + "grad_norm": 4.965912136104793, + "learning_rate": 8.282871562682673e-06, + "loss": 0.6999, "step": 12074 }, { - "epoch": 2.5319773537429233, - "grad_norm": 4.286467944324081, - "learning_rate": 1.2497024334253794e-06, - "loss": 0.1652, + "epoch": 1.7050268285794972, + "grad_norm": 3.016330326272305, + "learning_rate": 8.28136977231617e-06, + "loss": 0.4263, "step": 12075 }, { - "epoch": 2.5321870413084504, - "grad_norm": 5.175026986386072, - "learning_rate": 1.2486067810165103e-06, - "loss": 0.1579, + "epoch": 1.705168031629483, + "grad_norm": 3.532192561985221, + "learning_rate": 8.27986802188989e-06, + "loss": 0.6173, "step": 12076 }, { - "epoch": 2.532396728873978, - "grad_norm": 5.199711775349933, - "learning_rate": 1.2475115771317247e-06, - "loss": 0.1594, + "epoch": 1.705309234679469, + "grad_norm": 4.4643229228843175, + "learning_rate": 8.278366311438735e-06, + "loss": 0.6838, "step": 12077 }, { - "epoch": 2.532606416439505, - "grad_norm": 3.231076942436497, - "learning_rate": 1.2464168218271488e-06, - "loss": 0.1059, + "epoch": 1.7054504377294548, + "grad_norm": 4.213501917480052, + "learning_rate": 8.276864640997602e-06, + "loss": 0.499, "step": 12078 }, { - "epoch": 2.5328161040050325, - "grad_norm": 4.880412867082489, - "learning_rate": 1.2453225151588955e-06, - "loss": 0.1153, + "epoch": 1.7055916407794407, + "grad_norm": 3.1710108834049353, + "learning_rate": 8.275363010601392e-06, + "loss": 0.5628, "step": 12079 }, { - "epoch": 2.5330257915705596, - "grad_norm": 4.1682662607423895, - "learning_rate": 1.2442286571830463e-06, - "loss": 0.1456, + "epoch": 1.7057328438294266, + "grad_norm": 4.700198323306438, + "learning_rate": 8.273861420285e-06, + "loss": 0.6558, "step": 12080 }, { - "epoch": 2.533235479136087, - "grad_norm": 4.932167973036506, - "learning_rate": 1.2431352479556635e-06, - "loss": 0.1463, + "epoch": 1.7058740468794125, + "grad_norm": 3.608002308278103, + "learning_rate": 8.272359870083321e-06, + "loss": 0.5464, "step": 12081 }, { - "epoch": 2.5334451667016147, - "grad_norm": 3.106961178091361, - "learning_rate": 1.2420422875327876e-06, - "loss": 0.137, + "epoch": 1.7060152499293983, + "grad_norm": 3.458487830224624, + "learning_rate": 8.27085836003126e-06, + "loss": 0.5832, "step": 12082 }, { - "epoch": 2.5336548542671418, - "grad_norm": 4.5231630360642185, - "learning_rate": 1.2409497759704381e-06, - "loss": 0.1101, + "epoch": 1.7061564529793842, + "grad_norm": 4.19363310732357, + "learning_rate": 8.269356890163698e-06, + "loss": 0.5265, "step": 12083 }, { - "epoch": 2.5338645418326693, - "grad_norm": 3.9527462531187547, - "learning_rate": 1.2398577133245993e-06, - "loss": 0.1304, + "epoch": 1.7062976560293701, + "grad_norm": 3.827286089545369, + "learning_rate": 8.267855460515536e-06, + "loss": 0.6019, "step": 12084 }, { - "epoch": 2.534074229398197, - "grad_norm": 3.710421360035279, - "learning_rate": 1.2387660996512475e-06, - "loss": 0.1222, + "epoch": 1.706438859079356, + "grad_norm": 3.521000897084492, + "learning_rate": 8.266354071121665e-06, + "loss": 0.5756, "step": 12085 }, { - "epoch": 2.534283916963724, - "grad_norm": 4.980933200086237, - "learning_rate": 1.23767493500633e-06, - "loss": 0.1441, + "epoch": 1.7065800621293419, + "grad_norm": 3.7811564502788824, + "learning_rate": 8.264852722016974e-06, + "loss": 0.6296, "step": 12086 }, { - "epoch": 2.5344936045292514, - "grad_norm": 4.464371317468486, - "learning_rate": 1.23658421944577e-06, - "loss": 0.1289, + "epoch": 1.7067212651793278, + "grad_norm": 3.3938452950977003, + "learning_rate": 8.263351413236359e-06, + "loss": 0.6497, "step": 12087 }, { - "epoch": 2.534703292094779, - "grad_norm": 4.246290248094093, - "learning_rate": 1.235493953025466e-06, - "loss": 0.0971, + "epoch": 1.7068624682293136, + "grad_norm": 3.6560818787316047, + "learning_rate": 8.261850144814707e-06, + "loss": 0.5346, "step": 12088 }, { - "epoch": 2.534912979660306, - "grad_norm": 3.75869695858341, - "learning_rate": 1.2344041358013004e-06, - "loss": 0.1259, + "epoch": 1.7070036712792995, + "grad_norm": 3.677929389546445, + "learning_rate": 8.260348916786907e-06, + "loss": 0.5612, "step": 12089 }, { - "epoch": 2.5351226672258336, - "grad_norm": 3.9317376316210533, - "learning_rate": 1.2333147678291246e-06, - "loss": 0.1158, + "epoch": 1.7071448743292854, + "grad_norm": 4.051830010039964, + "learning_rate": 8.258847729187845e-06, + "loss": 0.6617, "step": 12090 }, { - "epoch": 2.535332354791361, - "grad_norm": 5.026491155850308, - "learning_rate": 1.2322258491647732e-06, - "loss": 0.1732, + "epoch": 1.7072860773792713, + "grad_norm": 3.432787356197747, + "learning_rate": 8.257346582052414e-06, + "loss": 0.5598, "step": 12091 }, { - "epoch": 2.535542042356888, - "grad_norm": 4.072500172156615, - "learning_rate": 1.2311373798640536e-06, - "loss": 0.1289, + "epoch": 1.7074272804292572, + "grad_norm": 3.4563040551317394, + "learning_rate": 8.255845475415493e-06, + "loss": 0.5517, "step": 12092 }, { - "epoch": 2.5357517299224157, - "grad_norm": 3.6114886183997132, - "learning_rate": 1.2300493599827512e-06, - "loss": 0.1215, + "epoch": 1.707568483479243, + "grad_norm": 3.5320357396688524, + "learning_rate": 8.254344409311972e-06, + "loss": 0.5488, "step": 12093 }, { - "epoch": 2.5359614174879432, - "grad_norm": 4.924318047779965, - "learning_rate": 1.228961789576628e-06, - "loss": 0.1841, + "epoch": 1.707709686529229, + "grad_norm": 3.147980133731133, + "learning_rate": 8.252843383776731e-06, + "loss": 0.541, "step": 12094 }, { - "epoch": 2.5361711050534703, - "grad_norm": 3.660489197343683, - "learning_rate": 1.2278746687014298e-06, - "loss": 0.1366, + "epoch": 1.7078508895792148, + "grad_norm": 2.7469826274699467, + "learning_rate": 8.251342398844654e-06, + "loss": 0.4586, "step": 12095 }, { - "epoch": 2.536380792618998, - "grad_norm": 3.075210888560592, - "learning_rate": 1.2267879974128682e-06, - "loss": 0.1183, + "epoch": 1.7079920926292007, + "grad_norm": 3.0709922517883053, + "learning_rate": 8.249841454550626e-06, + "loss": 0.4501, "step": 12096 }, { - "epoch": 2.536590480184525, - "grad_norm": 4.842141918057536, - "learning_rate": 1.2257017757666357e-06, - "loss": 0.1768, + "epoch": 1.7081332956791866, + "grad_norm": 3.872673651572887, + "learning_rate": 8.248340550929527e-06, + "loss": 0.5906, "step": 12097 }, { - "epoch": 2.5368001677500525, - "grad_norm": 4.5117673317039975, - "learning_rate": 1.2246160038184086e-06, - "loss": 0.172, + "epoch": 1.7082744987291725, + "grad_norm": 3.9838360671146806, + "learning_rate": 8.246839688016235e-06, + "loss": 0.7099, "step": 12098 }, { - "epoch": 2.5370098553155795, - "grad_norm": 3.2133546925982905, - "learning_rate": 1.223530681623829e-06, - "loss": 0.1076, + "epoch": 1.7084157017791584, + "grad_norm": 4.2997128062141075, + "learning_rate": 8.245338865845638e-06, + "loss": 0.7455, "step": 12099 }, { - "epoch": 2.537219542881107, - "grad_norm": 4.121729677409907, - "learning_rate": 1.2224458092385249e-06, - "loss": 0.1068, + "epoch": 1.7085569048291442, + "grad_norm": 3.426254077382975, + "learning_rate": 8.243838084452603e-06, + "loss": 0.4575, "step": 12100 }, { - "epoch": 2.5374292304466346, - "grad_norm": 3.1666248258739556, - "learning_rate": 1.2213613867180984e-06, - "loss": 0.1027, + "epoch": 1.7086981078791301, + "grad_norm": 3.7480365668054887, + "learning_rate": 8.242337343872012e-06, + "loss": 0.556, "step": 12101 }, { - "epoch": 2.5376389180121617, - "grad_norm": 3.9782939538031443, - "learning_rate": 1.220277414118124e-06, - "loss": 0.1402, + "epoch": 1.708839310929116, + "grad_norm": 3.839889287703012, + "learning_rate": 8.240836644138743e-06, + "loss": 0.6454, "step": 12102 }, { - "epoch": 2.537848605577689, - "grad_norm": 4.721290246767434, - "learning_rate": 1.2191938914941604e-06, - "loss": 0.1643, + "epoch": 1.708980513979102, + "grad_norm": 3.504475924167166, + "learning_rate": 8.239335985287669e-06, + "loss": 0.5728, "step": 12103 }, { - "epoch": 2.5380582931432167, - "grad_norm": 3.7179590897315387, - "learning_rate": 1.218110818901742e-06, - "loss": 0.1446, + "epoch": 1.7091217170290878, + "grad_norm": 3.8067273471746277, + "learning_rate": 8.237835367353668e-06, + "loss": 0.5528, "step": 12104 }, { - "epoch": 2.538267980708744, - "grad_norm": 3.887924404743961, - "learning_rate": 1.2170281963963726e-06, - "loss": 0.133, + "epoch": 1.7092629200790737, + "grad_norm": 3.960033390955305, + "learning_rate": 8.236334790371612e-06, + "loss": 0.7281, "step": 12105 }, { - "epoch": 2.5384776682742713, - "grad_norm": 3.842412955451362, - "learning_rate": 1.2159460240335418e-06, - "loss": 0.1034, + "epoch": 1.7094041231290595, + "grad_norm": 4.221282115117411, + "learning_rate": 8.234834254376375e-06, + "loss": 0.586, "step": 12106 }, { - "epoch": 2.538687355839799, - "grad_norm": 3.248488634442362, - "learning_rate": 1.2148643018687134e-06, - "loss": 0.0961, + "epoch": 1.7095453261790454, + "grad_norm": 3.534784188061318, + "learning_rate": 8.233333759402823e-06, + "loss": 0.5233, "step": 12107 }, { - "epoch": 2.538897043405326, - "grad_norm": 2.800003844753062, - "learning_rate": 1.2137830299573273e-06, - "loss": 0.1049, + "epoch": 1.7096865292290313, + "grad_norm": 3.7869981817278062, + "learning_rate": 8.231833305485841e-06, + "loss": 0.6853, "step": 12108 }, { - "epoch": 2.5391067309708535, - "grad_norm": 3.9780201446554218, - "learning_rate": 1.212702208354798e-06, - "loss": 0.1589, + "epoch": 1.7098277322790172, + "grad_norm": 3.2844084763260684, + "learning_rate": 8.230332892660282e-06, + "loss": 0.6304, "step": 12109 }, { - "epoch": 2.539316418536381, - "grad_norm": 4.836320944876949, - "learning_rate": 1.2116218371165222e-06, - "loss": 0.1555, + "epoch": 1.709968935329003, + "grad_norm": 5.002310325414373, + "learning_rate": 8.228832520961023e-06, + "loss": 0.6713, "step": 12110 }, { - "epoch": 2.539526106101908, - "grad_norm": 3.4471527807202156, - "learning_rate": 1.2105419162978681e-06, - "loss": 0.1064, + "epoch": 1.710110138378989, + "grad_norm": 4.713410233599082, + "learning_rate": 8.227332190422931e-06, + "loss": 0.6872, "step": 12111 }, { - "epoch": 2.5397357936674356, - "grad_norm": 3.2225002560285274, - "learning_rate": 1.2094624459541882e-06, - "loss": 0.1312, + "epoch": 1.7102513414289748, + "grad_norm": 3.286573245850408, + "learning_rate": 8.225831901080874e-06, + "loss": 0.5107, "step": 12112 }, { - "epoch": 2.539945481232963, - "grad_norm": 4.685250277471805, - "learning_rate": 1.2083834261408046e-06, - "loss": 0.1749, + "epoch": 1.7103925444789607, + "grad_norm": 4.595493875791277, + "learning_rate": 8.224331652969717e-06, + "loss": 0.6759, "step": 12113 }, { - "epoch": 2.5401551687984902, - "grad_norm": 3.822468710446993, - "learning_rate": 1.2073048569130152e-06, - "loss": 0.1239, + "epoch": 1.7105337475289466, + "grad_norm": 2.781788048847269, + "learning_rate": 8.222831446124327e-06, + "loss": 0.4307, "step": 12114 }, { - "epoch": 2.5403648563640178, - "grad_norm": 4.116738197839818, - "learning_rate": 1.2062267383261039e-06, - "loss": 0.1265, + "epoch": 1.7106749505789325, + "grad_norm": 3.8196337329699155, + "learning_rate": 8.221331280579564e-06, + "loss": 0.5458, "step": 12115 }, { - "epoch": 2.540574543929545, - "grad_norm": 5.412632927717294, - "learning_rate": 1.2051490704353252e-06, - "loss": 0.1638, + "epoch": 1.7108161536289184, + "grad_norm": 3.1943440336277584, + "learning_rate": 8.2198311563703e-06, + "loss": 0.4822, "step": 12116 }, { - "epoch": 2.5407842314950724, - "grad_norm": 6.344457023928824, - "learning_rate": 1.2040718532959106e-06, - "loss": 0.1685, + "epoch": 1.7109573566789043, + "grad_norm": 3.546531341180148, + "learning_rate": 8.218331073531385e-06, + "loss": 0.5193, "step": 12117 }, { - "epoch": 2.5409939190605995, - "grad_norm": 3.1100604668164613, - "learning_rate": 1.2029950869630669e-06, - "loss": 0.1045, + "epoch": 1.7110985597288901, + "grad_norm": 3.9259198236937567, + "learning_rate": 8.216831032097689e-06, + "loss": 0.7017, "step": 12118 }, { - "epoch": 2.541203606626127, - "grad_norm": 4.019892926529375, - "learning_rate": 1.2019187714919856e-06, - "loss": 0.1236, + "epoch": 1.711239762778876, + "grad_norm": 3.4020336498043635, + "learning_rate": 8.215331032104069e-06, + "loss": 0.5081, "step": 12119 }, { - "epoch": 2.5414132941916545, - "grad_norm": 3.6330324999787353, - "learning_rate": 1.2008429069378237e-06, - "loss": 0.12, + "epoch": 1.711380965828862, + "grad_norm": 2.9410025017694172, + "learning_rate": 8.213831073585385e-06, + "loss": 0.462, "step": 12120 }, { - "epoch": 2.5416229817571816, - "grad_norm": 3.963197008644183, - "learning_rate": 1.1997674933557268e-06, - "loss": 0.1182, + "epoch": 1.7115221688788478, + "grad_norm": 2.9289316138982557, + "learning_rate": 8.212331156576494e-06, + "loss": 0.4996, "step": 12121 }, { - "epoch": 2.541832669322709, - "grad_norm": 4.492945230785115, - "learning_rate": 1.1986925308008092e-06, - "loss": 0.1607, + "epoch": 1.7116633719288337, + "grad_norm": 3.9403214989180726, + "learning_rate": 8.210831281112257e-06, + "loss": 0.5537, "step": 12122 }, { - "epoch": 2.5420423568882367, - "grad_norm": 4.552721754242347, - "learning_rate": 1.1976180193281618e-06, - "loss": 0.1298, + "epoch": 1.7118045749788195, + "grad_norm": 3.0106434552851646, + "learning_rate": 8.209331447227527e-06, + "loss": 0.6173, "step": 12123 }, { - "epoch": 2.5422520444537637, - "grad_norm": 4.630313105402612, - "learning_rate": 1.1965439589928585e-06, - "loss": 0.1133, + "epoch": 1.7119457780288054, + "grad_norm": 3.6693322858761617, + "learning_rate": 8.207831654957162e-06, + "loss": 0.5414, "step": 12124 }, { - "epoch": 2.5424617320192913, - "grad_norm": 3.703271664132475, - "learning_rate": 1.1954703498499499e-06, - "loss": 0.101, + "epoch": 1.7120869810787913, + "grad_norm": 3.2299484171198496, + "learning_rate": 8.206331904336018e-06, + "loss": 0.5561, "step": 12125 }, { - "epoch": 2.542671419584819, - "grad_norm": 3.426792288780992, - "learning_rate": 1.1943971919544528e-06, - "loss": 0.1219, + "epoch": 1.7122281841287772, + "grad_norm": 3.8857129556178727, + "learning_rate": 8.204832195398941e-06, + "loss": 0.6388, "step": 12126 }, { - "epoch": 2.542881107150346, - "grad_norm": 4.877799172989464, - "learning_rate": 1.1933244853613723e-06, - "loss": 0.1536, + "epoch": 1.712369387178763, + "grad_norm": 3.806817808003596, + "learning_rate": 8.20333252818079e-06, + "loss": 0.5558, "step": 12127 }, { - "epoch": 2.5430907947158734, - "grad_norm": 3.6176260210292006, - "learning_rate": 1.1922522301256878e-06, - "loss": 0.1422, + "epoch": 1.712510590228749, + "grad_norm": 3.7137945755281407, + "learning_rate": 8.201832902716416e-06, + "loss": 0.4898, "step": 12128 }, { - "epoch": 2.543300482281401, - "grad_norm": 4.322031373852679, - "learning_rate": 1.191180426302353e-06, - "loss": 0.121, + "epoch": 1.7126517932787348, + "grad_norm": 4.109023340479056, + "learning_rate": 8.200333319040667e-06, + "loss": 0.5986, "step": 12129 }, { - "epoch": 2.543510169846928, - "grad_norm": 3.6169406834211695, - "learning_rate": 1.1901090739462972e-06, - "loss": 0.1128, + "epoch": 1.7127929963287207, + "grad_norm": 4.054893392640815, + "learning_rate": 8.198833777188396e-06, + "loss": 0.6688, "step": 12130 }, { - "epoch": 2.5437198574124555, - "grad_norm": 3.9027743174794405, - "learning_rate": 1.1890381731124335e-06, - "loss": 0.1409, + "epoch": 1.7129341993787066, + "grad_norm": 3.9183425907148917, + "learning_rate": 8.19733427719445e-06, + "loss": 0.5966, "step": 12131 }, { - "epoch": 2.543929544977983, - "grad_norm": 4.355749160780657, - "learning_rate": 1.1879677238556441e-06, - "loss": 0.1397, + "epoch": 1.7130754024286925, + "grad_norm": 2.996937512998745, + "learning_rate": 8.195834819093677e-06, + "loss": 0.504, "step": 12132 }, { - "epoch": 2.54413923254351, - "grad_norm": 4.913415519397946, - "learning_rate": 1.1868977262307934e-06, - "loss": 0.1789, + "epoch": 1.7132166054786784, + "grad_norm": 3.526809740074254, + "learning_rate": 8.194335402920926e-06, + "loss": 0.552, "step": 12133 }, { - "epoch": 2.5443489201090377, - "grad_norm": 5.227240550887243, - "learning_rate": 1.1858281802927207e-06, - "loss": 0.1592, + "epoch": 1.7133578085286643, + "grad_norm": 3.2563473316884326, + "learning_rate": 8.192836028711036e-06, + "loss": 0.4956, "step": 12134 }, { - "epoch": 2.5445586076745648, - "grad_norm": 3.613132505834895, - "learning_rate": 1.1847590860962387e-06, - "loss": 0.1316, + "epoch": 1.7134990115786501, + "grad_norm": 3.801827567299866, + "learning_rate": 8.19133669649886e-06, + "loss": 0.555, "step": 12135 }, { - "epoch": 2.5447682952400923, - "grad_norm": 3.5862282596674095, - "learning_rate": 1.1836904436961427e-06, - "loss": 0.1332, + "epoch": 1.713640214628636, + "grad_norm": 3.635986046368952, + "learning_rate": 8.189837406319233e-06, + "loss": 0.4912, "step": 12136 }, { - "epoch": 2.5449779828056194, - "grad_norm": 4.509062968467781, - "learning_rate": 1.1826222531472042e-06, - "loss": 0.1508, + "epoch": 1.713781417678622, + "grad_norm": 3.3138475747002674, + "learning_rate": 8.18833815820701e-06, + "loss": 0.5174, "step": 12137 }, { - "epoch": 2.545187670371147, - "grad_norm": 4.084265435634725, - "learning_rate": 1.1815545145041684e-06, - "loss": 0.0946, + "epoch": 1.7139226207286078, + "grad_norm": 4.249289845465241, + "learning_rate": 8.186838952197019e-06, + "loss": 0.5674, "step": 12138 }, { - "epoch": 2.5453973579366744, - "grad_norm": 3.3336203354876597, - "learning_rate": 1.1804872278217572e-06, - "loss": 0.09, + "epoch": 1.7140638237785937, + "grad_norm": 3.282852163807536, + "learning_rate": 8.18533978832411e-06, + "loss": 0.4974, "step": 12139 }, { - "epoch": 2.5456070455022015, - "grad_norm": 4.291465095902692, - "learning_rate": 1.179420393154672e-06, - "loss": 0.1372, + "epoch": 1.7142050268285796, + "grad_norm": 3.870769621176407, + "learning_rate": 8.183840666623123e-06, + "loss": 0.6881, "step": 12140 }, { - "epoch": 2.545816733067729, - "grad_norm": 3.8620663893691805, - "learning_rate": 1.1783540105575919e-06, - "loss": 0.1385, + "epoch": 1.7143462298785654, + "grad_norm": 3.750560031335267, + "learning_rate": 8.18234158712889e-06, + "loss": 0.5684, "step": 12141 }, { - "epoch": 2.5460264206332566, - "grad_norm": 3.8750848466434524, - "learning_rate": 1.1772880800851693e-06, - "loss": 0.1278, + "epoch": 1.7144874329285513, + "grad_norm": 4.053014601195905, + "learning_rate": 8.18084254987626e-06, + "loss": 0.5868, "step": 12142 }, { - "epoch": 2.5462361081987837, - "grad_norm": 4.490364689655418, - "learning_rate": 1.176222601792032e-06, - "loss": 0.108, + "epoch": 1.7146286359785372, + "grad_norm": 3.551553934584082, + "learning_rate": 8.179343554900058e-06, + "loss": 0.5523, "step": 12143 }, { - "epoch": 2.546445795764311, - "grad_norm": 5.810093459382098, - "learning_rate": 1.1751575757327938e-06, - "loss": 0.1535, + "epoch": 1.714769839028523, + "grad_norm": 4.491036406895923, + "learning_rate": 8.177844602235128e-06, + "loss": 0.6575, "step": 12144 }, { - "epoch": 2.5466554833298387, - "grad_norm": 4.040677427863925, - "learning_rate": 1.1740930019620344e-06, - "loss": 0.1357, + "epoch": 1.714911042078509, + "grad_norm": 3.034906255915081, + "learning_rate": 8.176345691916301e-06, + "loss": 0.5007, "step": 12145 }, { - "epoch": 2.546865170895366, - "grad_norm": 4.856363106439736, - "learning_rate": 1.173028880534316e-06, - "loss": 0.1551, + "epoch": 1.7150522451284949, + "grad_norm": 3.936583464549599, + "learning_rate": 8.174846823978412e-06, + "loss": 0.6016, "step": 12146 }, { - "epoch": 2.5470748584608933, - "grad_norm": 4.619290062541604, - "learning_rate": 1.171965211504178e-06, - "loss": 0.1591, + "epoch": 1.7151934481784807, + "grad_norm": 3.5754729594935517, + "learning_rate": 8.173347998456297e-06, + "loss": 0.5849, "step": 12147 }, { - "epoch": 2.547284546026421, - "grad_norm": 3.1866159584941838, - "learning_rate": 1.1709019949261324e-06, - "loss": 0.105, + "epoch": 1.7153346512284666, + "grad_norm": 4.149353547153928, + "learning_rate": 8.171849215384786e-06, + "loss": 0.6609, "step": 12148 }, { - "epoch": 2.547494233591948, - "grad_norm": 3.8019816413326537, - "learning_rate": 1.1698392308546747e-06, - "loss": 0.1136, + "epoch": 1.7154758542784525, + "grad_norm": 3.816472918127026, + "learning_rate": 8.170350474798707e-06, + "loss": 0.649, "step": 12149 }, { - "epoch": 2.5477039211574755, - "grad_norm": 4.153000596744503, - "learning_rate": 1.1687769193442723e-06, - "loss": 0.116, + "epoch": 1.7156170573284384, + "grad_norm": 3.4280514357370793, + "learning_rate": 8.168851776732897e-06, + "loss": 0.5663, "step": 12150 }, { - "epoch": 2.547913608723003, - "grad_norm": 4.438509064105294, - "learning_rate": 1.167715060449367e-06, - "loss": 0.1697, + "epoch": 1.7157582603784243, + "grad_norm": 3.6173490166521423, + "learning_rate": 8.167353121222179e-06, + "loss": 0.5239, "step": 12151 }, { - "epoch": 2.54812329628853, - "grad_norm": 5.281248480874893, - "learning_rate": 1.1666536542243845e-06, - "loss": 0.1282, + "epoch": 1.7158994634284102, + "grad_norm": 4.253521523913439, + "learning_rate": 8.16585450830138e-06, + "loss": 0.5368, "step": 12152 }, { - "epoch": 2.5483329838540576, - "grad_norm": 4.527590737916612, - "learning_rate": 1.1655927007237245e-06, - "loss": 0.1192, + "epoch": 1.716040666478396, + "grad_norm": 4.244863967749043, + "learning_rate": 8.164355938005332e-06, + "loss": 0.5973, "step": 12153 }, { - "epoch": 2.5485426714195847, - "grad_norm": 4.2638813752373474, - "learning_rate": 1.1645322000017611e-06, - "loss": 0.1312, + "epoch": 1.716181869528382, + "grad_norm": 3.8748081115052817, + "learning_rate": 8.162857410368859e-06, + "loss": 0.6155, "step": 12154 }, { - "epoch": 2.548752358985112, - "grad_norm": 4.775688618058503, - "learning_rate": 1.1634721521128444e-06, - "loss": 0.1444, + "epoch": 1.7163230725783678, + "grad_norm": 3.8257371082469103, + "learning_rate": 8.161358925426786e-06, + "loss": 0.5558, "step": 12155 }, { - "epoch": 2.5489620465506393, - "grad_norm": 6.646884400612622, - "learning_rate": 1.1624125571113075e-06, - "loss": 0.1615, + "epoch": 1.7164642756283537, + "grad_norm": 3.6584318333380854, + "learning_rate": 8.159860483213938e-06, + "loss": 0.5269, "step": 12156 }, { - "epoch": 2.549171734116167, - "grad_norm": 3.667316037949729, - "learning_rate": 1.161353415051455e-06, - "loss": 0.1395, + "epoch": 1.7166054786783396, + "grad_norm": 3.092629331673926, + "learning_rate": 8.158362083765139e-06, + "loss": 0.5034, "step": 12157 }, { - "epoch": 2.5493814216816943, - "grad_norm": 4.5606721567798285, - "learning_rate": 1.1602947259875707e-06, - "loss": 0.1377, + "epoch": 1.7167466817283255, + "grad_norm": 3.6774075867857627, + "learning_rate": 8.15686372711521e-06, + "loss": 0.6203, "step": 12158 }, { - "epoch": 2.5495911092472214, - "grad_norm": 4.8617299094134125, - "learning_rate": 1.1592364899739127e-06, - "loss": 0.1427, + "epoch": 1.7168878847783113, + "grad_norm": 4.227954583628856, + "learning_rate": 8.155365413298972e-06, + "loss": 0.484, "step": 12159 }, { - "epoch": 2.549800796812749, - "grad_norm": 4.074640328585298, - "learning_rate": 1.1581787070647177e-06, - "loss": 0.1475, + "epoch": 1.7170290878282972, + "grad_norm": 3.434216604324574, + "learning_rate": 8.153867142351242e-06, + "loss": 0.5147, "step": 12160 }, { - "epoch": 2.5500104843782765, - "grad_norm": 4.661976486560679, - "learning_rate": 1.1571213773141976e-06, - "loss": 0.1389, + "epoch": 1.717170290878283, + "grad_norm": 4.067747650666089, + "learning_rate": 8.152368914306846e-06, + "loss": 0.5967, "step": 12161 }, { - "epoch": 2.5502201719438036, - "grad_norm": 4.080649597937662, - "learning_rate": 1.1560645007765492e-06, - "loss": 0.1506, + "epoch": 1.717311493928269, + "grad_norm": 3.074072588046501, + "learning_rate": 8.150870729200595e-06, + "loss": 0.5234, "step": 12162 }, { - "epoch": 2.550429859509331, - "grad_norm": 4.26948789915272, - "learning_rate": 1.1550080775059302e-06, - "loss": 0.154, + "epoch": 1.7174526969782549, + "grad_norm": 3.800746424640453, + "learning_rate": 8.149372587067313e-06, + "loss": 0.5936, "step": 12163 }, { - "epoch": 2.5506395470748586, - "grad_norm": 4.09343523487981, - "learning_rate": 1.1539521075564874e-06, - "loss": 0.1558, + "epoch": 1.7175939000282407, + "grad_norm": 3.844190459457351, + "learning_rate": 8.147874487941809e-06, + "loss": 0.6711, "step": 12164 }, { - "epoch": 2.5508492346403857, - "grad_norm": 5.117742504350601, - "learning_rate": 1.1528965909823443e-06, - "loss": 0.13, + "epoch": 1.7177351030782266, + "grad_norm": 3.9560662085909204, + "learning_rate": 8.146376431858904e-06, + "loss": 0.5657, "step": 12165 }, { - "epoch": 2.5510589222059132, - "grad_norm": 3.3540629526650645, - "learning_rate": 1.151841527837596e-06, - "loss": 0.1015, + "epoch": 1.7178763061282125, + "grad_norm": 3.0361422510309146, + "learning_rate": 8.144878418853412e-06, + "loss": 0.4404, "step": 12166 }, { - "epoch": 2.5512686097714408, - "grad_norm": 4.435219058861561, - "learning_rate": 1.1507869181763131e-06, - "loss": 0.1565, + "epoch": 1.7180175091781984, + "grad_norm": 3.611330695746184, + "learning_rate": 8.143380448960145e-06, + "loss": 0.5732, "step": 12167 }, { - "epoch": 2.551478297336968, - "grad_norm": 5.601116834718981, - "learning_rate": 1.14973276205255e-06, - "loss": 0.1488, + "epoch": 1.7181587122281843, + "grad_norm": 4.423159504551577, + "learning_rate": 8.141882522213913e-06, + "loss": 0.661, "step": 12168 }, { - "epoch": 2.5516879849024954, - "grad_norm": 3.9945961107232746, - "learning_rate": 1.1486790595203324e-06, - "loss": 0.1186, + "epoch": 1.7182999152781702, + "grad_norm": 4.237863380851219, + "learning_rate": 8.140384638649526e-06, + "loss": 0.628, "step": 12169 }, { - "epoch": 2.551897672468023, - "grad_norm": 5.422935968288645, - "learning_rate": 1.1476258106336658e-06, - "loss": 0.1851, + "epoch": 1.718441118328156, + "grad_norm": 3.407136614865714, + "learning_rate": 8.1388867983018e-06, + "loss": 0.509, "step": 12170 }, { - "epoch": 2.55210736003355, - "grad_norm": 3.768837945187381, - "learning_rate": 1.1465730154465304e-06, - "loss": 0.1171, + "epoch": 1.718582321378142, + "grad_norm": 2.6554550088819107, + "learning_rate": 8.13738900120554e-06, + "loss": 0.3896, "step": 12171 }, { - "epoch": 2.5523170475990775, - "grad_norm": 4.774831157902279, - "learning_rate": 1.1455206740128822e-06, - "loss": 0.1569, + "epoch": 1.7187235244281278, + "grad_norm": 3.6975887047241605, + "learning_rate": 8.135891247395554e-06, + "loss": 0.5816, "step": 12172 }, { - "epoch": 2.5525267351646046, - "grad_norm": 5.162741883923306, - "learning_rate": 1.1444687863866556e-06, - "loss": 0.1473, + "epoch": 1.7188647274781135, + "grad_norm": 4.986008878103458, + "learning_rate": 8.13439353690665e-06, + "loss": 0.6702, "step": 12173 }, { - "epoch": 2.552736422730132, - "grad_norm": 4.466673355371275, - "learning_rate": 1.1434173526217652e-06, - "loss": 0.1723, + "epoch": 1.7190059305280994, + "grad_norm": 2.885638809655849, + "learning_rate": 8.132895869773638e-06, + "loss": 0.4377, "step": 12174 }, { - "epoch": 2.552946110295659, - "grad_norm": 4.600500063498581, - "learning_rate": 1.1423663727720968e-06, - "loss": 0.1406, + "epoch": 1.7191471335780852, + "grad_norm": 3.6411908937846045, + "learning_rate": 8.13139824603132e-06, + "loss": 0.5092, "step": 12175 }, { - "epoch": 2.5531557978611867, - "grad_norm": 3.1679131878414806, - "learning_rate": 1.141315846891512e-06, - "loss": 0.1024, + "epoch": 1.7192883366280711, + "grad_norm": 3.3225901546561256, + "learning_rate": 8.129900665714498e-06, + "loss": 0.5362, "step": 12176 }, { - "epoch": 2.5533654854267143, - "grad_norm": 4.55054469783078, - "learning_rate": 1.1402657750338563e-06, - "loss": 0.1567, + "epoch": 1.719429539678057, + "grad_norm": 3.870434208267787, + "learning_rate": 8.128403128857975e-06, + "loss": 0.6169, "step": 12177 }, { - "epoch": 2.5535751729922413, - "grad_norm": 3.6596099490095693, - "learning_rate": 1.1392161572529447e-06, - "loss": 0.1109, + "epoch": 1.719570742728043, + "grad_norm": 3.10875241153497, + "learning_rate": 8.126905635496557e-06, + "loss": 0.4896, "step": 12178 }, { - "epoch": 2.553784860557769, - "grad_norm": 4.326735151682969, - "learning_rate": 1.1381669936025752e-06, - "loss": 0.1238, + "epoch": 1.7197119457780288, + "grad_norm": 3.891532963967446, + "learning_rate": 8.125408185665042e-06, + "loss": 0.5825, "step": 12179 }, { - "epoch": 2.5539945481232964, - "grad_norm": 4.698726728378975, - "learning_rate": 1.1371182841365181e-06, - "loss": 0.1501, + "epoch": 1.7198531488280147, + "grad_norm": 4.121449673029451, + "learning_rate": 8.123910779398233e-06, + "loss": 0.6444, "step": 12180 }, { - "epoch": 2.5542042356888235, - "grad_norm": 4.62920152682113, - "learning_rate": 1.136070028908518e-06, - "loss": 0.142, + "epoch": 1.7199943518780005, + "grad_norm": 3.178926346655412, + "learning_rate": 8.122413416730924e-06, + "loss": 0.4653, "step": 12181 }, { - "epoch": 2.554413923254351, - "grad_norm": 4.708000937078805, - "learning_rate": 1.1350222279723034e-06, - "loss": 0.1473, + "epoch": 1.7201355549279864, + "grad_norm": 2.85923260405976, + "learning_rate": 8.120916097697918e-06, + "loss": 0.4963, "step": 12182 }, { - "epoch": 2.5546236108198785, - "grad_norm": 3.394441095714444, - "learning_rate": 1.1339748813815777e-06, - "loss": 0.1157, + "epoch": 1.7202767579779723, + "grad_norm": 3.574173476979432, + "learning_rate": 8.119418822334012e-06, + "loss": 0.5874, "step": 12183 }, { - "epoch": 2.5548332983854056, - "grad_norm": 6.067154093817903, - "learning_rate": 1.1329279891900136e-06, - "loss": 0.1245, + "epoch": 1.7204179610279582, + "grad_norm": 2.7183218053107105, + "learning_rate": 8.117921590674002e-06, + "loss": 0.3856, "step": 12184 }, { - "epoch": 2.555042985950933, - "grad_norm": 4.518632434887595, - "learning_rate": 1.1318815514512693e-06, - "loss": 0.152, + "epoch": 1.720559164077944, + "grad_norm": 4.421908563295283, + "learning_rate": 8.116424402752679e-06, + "loss": 0.609, "step": 12185 }, { - "epoch": 2.5552526735164607, - "grad_norm": 4.916971049978362, - "learning_rate": 1.1308355682189787e-06, - "loss": 0.1526, + "epoch": 1.72070036712793, + "grad_norm": 3.7065062414041186, + "learning_rate": 8.114927258604837e-06, + "loss": 0.6704, "step": 12186 }, { - "epoch": 2.5554623610819878, - "grad_norm": 4.108214294436182, - "learning_rate": 1.1297900395467476e-06, - "loss": 0.1436, + "epoch": 1.7208415701779158, + "grad_norm": 3.751844731557451, + "learning_rate": 8.113430158265273e-06, + "loss": 0.619, "step": 12187 }, { - "epoch": 2.5556720486475153, - "grad_norm": 3.965589090580005, - "learning_rate": 1.128744965488159e-06, - "loss": 0.1286, + "epoch": 1.7209827732279017, + "grad_norm": 3.9804235848732907, + "learning_rate": 8.111933101768779e-06, + "loss": 0.6074, "step": 12188 }, { - "epoch": 2.555881736213043, - "grad_norm": 4.645654937237945, - "learning_rate": 1.1277003460967794e-06, - "loss": 0.1335, + "epoch": 1.7211239762778876, + "grad_norm": 3.077736258206469, + "learning_rate": 8.110436089150141e-06, + "loss": 0.5151, "step": 12189 }, { - "epoch": 2.55609142377857, - "grad_norm": 4.019856458206179, - "learning_rate": 1.1266561814261445e-06, - "loss": 0.1522, + "epoch": 1.7212651793278735, + "grad_norm": 3.052250760966108, + "learning_rate": 8.108939120444154e-06, + "loss": 0.4722, "step": 12190 }, { - "epoch": 2.5563011113440974, - "grad_norm": 4.660677481605267, - "learning_rate": 1.1256124715297711e-06, - "loss": 0.1362, + "epoch": 1.7214063823778594, + "grad_norm": 3.68651662490432, + "learning_rate": 8.107442195685607e-06, + "loss": 0.585, "step": 12191 }, { - "epoch": 2.5565107989096245, - "grad_norm": 4.876085646494219, - "learning_rate": 1.124569216461151e-06, - "loss": 0.169, + "epoch": 1.7215475854278453, + "grad_norm": 3.571257340141153, + "learning_rate": 8.105945314909287e-06, + "loss": 0.5229, "step": 12192 }, { - "epoch": 2.556720486475152, - "grad_norm": 4.122476044013269, - "learning_rate": 1.1235264162737503e-06, - "loss": 0.1372, + "epoch": 1.7216887884778311, + "grad_norm": 4.08641754134329, + "learning_rate": 8.104448478149978e-06, + "loss": 0.5884, "step": 12193 }, { - "epoch": 2.556930174040679, - "grad_norm": 4.0528665743947965, - "learning_rate": 1.1224840710210161e-06, - "loss": 0.1081, + "epoch": 1.721829991527817, + "grad_norm": 4.045181988121996, + "learning_rate": 8.102951685442466e-06, + "loss": 0.6772, "step": 12194 }, { - "epoch": 2.5571398616062067, - "grad_norm": 4.019532730094852, - "learning_rate": 1.1214421807563726e-06, - "loss": 0.1217, + "epoch": 1.721971194577803, + "grad_norm": 3.849027603738308, + "learning_rate": 8.101454936821538e-06, + "loss": 0.5483, "step": 12195 }, { - "epoch": 2.557349549171734, - "grad_norm": 3.5413099709455973, - "learning_rate": 1.1204007455332154e-06, - "loss": 0.1075, + "epoch": 1.7221123976277888, + "grad_norm": 3.2420325570681845, + "learning_rate": 8.099958232321978e-06, + "loss": 0.4969, "step": 12196 }, { - "epoch": 2.5575592367372613, - "grad_norm": 5.388968882960073, - "learning_rate": 1.1193597654049204e-06, - "loss": 0.1746, + "epoch": 1.7222536006777747, + "grad_norm": 3.9568091341176594, + "learning_rate": 8.098461571978568e-06, + "loss": 0.6268, "step": 12197 }, { - "epoch": 2.557768924302789, - "grad_norm": 3.946131583797869, - "learning_rate": 1.1183192404248388e-06, - "loss": 0.1282, + "epoch": 1.7223948037277605, + "grad_norm": 3.724914901107439, + "learning_rate": 8.09696495582609e-06, + "loss": 0.6149, "step": 12198 }, { - "epoch": 2.5579786118683163, - "grad_norm": 5.113424632449606, - "learning_rate": 1.1172791706463038e-06, - "loss": 0.1532, + "epoch": 1.7225360067777464, + "grad_norm": 3.7612845298481394, + "learning_rate": 8.095468383899325e-06, + "loss": 0.6019, "step": 12199 }, { - "epoch": 2.5581882994338434, - "grad_norm": 4.214236422415214, - "learning_rate": 1.1162395561226168e-06, - "loss": 0.1298, + "epoch": 1.7226772098277323, + "grad_norm": 4.874308144556847, + "learning_rate": 8.093971856233051e-06, + "loss": 0.7785, "step": 12200 }, { - "epoch": 2.558397986999371, - "grad_norm": 4.206197625949368, - "learning_rate": 1.115200396907059e-06, - "loss": 0.1476, + "epoch": 1.7228184128777182, + "grad_norm": 3.414374388096351, + "learning_rate": 8.092475372862053e-06, + "loss": 0.5765, "step": 12201 }, { - "epoch": 2.5586076745648985, - "grad_norm": 4.713390017189801, - "learning_rate": 1.114161693052893e-06, - "loss": 0.1489, + "epoch": 1.722959615927704, + "grad_norm": 3.507195475531474, + "learning_rate": 8.0909789338211e-06, + "loss": 0.6245, "step": 12202 }, { - "epoch": 2.5588173621304255, - "grad_norm": 4.192222933383105, - "learning_rate": 1.1131234446133498e-06, - "loss": 0.1418, + "epoch": 1.72310081897769, + "grad_norm": 3.075922119038128, + "learning_rate": 8.089482539144969e-06, + "loss": 0.4874, "step": 12203 }, { - "epoch": 2.559027049695953, - "grad_norm": 5.337746146445536, - "learning_rate": 1.1120856516416455e-06, - "loss": 0.1692, + "epoch": 1.7232420220276758, + "grad_norm": 3.6150831567255186, + "learning_rate": 8.087986188868441e-06, + "loss": 0.6373, "step": 12204 }, { - "epoch": 2.5592367372614806, - "grad_norm": 4.7023551210195285, - "learning_rate": 1.1110483141909667e-06, - "loss": 0.1312, + "epoch": 1.7233832250776617, + "grad_norm": 5.305551573919046, + "learning_rate": 8.086489883026289e-06, + "loss": 0.6291, "step": 12205 }, { - "epoch": 2.5594464248270077, - "grad_norm": 3.778900162503842, - "learning_rate": 1.1100114323144772e-06, - "loss": 0.113, + "epoch": 1.7235244281276476, + "grad_norm": 3.434006304427154, + "learning_rate": 8.084993621653283e-06, + "loss": 0.5147, "step": 12206 }, { - "epoch": 2.559656112392535, - "grad_norm": 5.119940671582861, - "learning_rate": 1.1089750060653216e-06, - "loss": 0.1881, + "epoch": 1.7236656311776333, + "grad_norm": 3.887252616656358, + "learning_rate": 8.083497404784201e-06, + "loss": 0.588, "step": 12207 }, { - "epoch": 2.5598657999580627, - "grad_norm": 3.8437742240024204, - "learning_rate": 1.107939035496619e-06, - "loss": 0.1068, + "epoch": 1.7238068342276192, + "grad_norm": 3.7938974416963327, + "learning_rate": 8.08200123245381e-06, + "loss": 0.5923, "step": 12208 }, { - "epoch": 2.56007548752359, - "grad_norm": 4.278520482989656, - "learning_rate": 1.106903520661461e-06, - "loss": 0.1406, + "epoch": 1.723948037277605, + "grad_norm": 3.9916202014473225, + "learning_rate": 8.080505104696888e-06, + "loss": 0.5968, "step": 12209 }, { - "epoch": 2.5602851750891173, - "grad_norm": 3.412520794337883, - "learning_rate": 1.1058684616129212e-06, - "loss": 0.1085, + "epoch": 1.724089240327591, + "grad_norm": 2.910236642107398, + "learning_rate": 8.079009021548193e-06, + "loss": 0.3988, "step": 12210 }, { - "epoch": 2.5604948626546444, - "grad_norm": 3.6774329992886505, - "learning_rate": 1.1048338584040519e-06, - "loss": 0.1357, + "epoch": 1.7242304433775768, + "grad_norm": 3.599685527091173, + "learning_rate": 8.0775129830425e-06, + "loss": 0.5523, "step": 12211 }, { - "epoch": 2.560704550220172, - "grad_norm": 3.9448686457466104, - "learning_rate": 1.103799711087874e-06, - "loss": 0.1328, + "epoch": 1.7243716464275627, + "grad_norm": 3.553002712567692, + "learning_rate": 8.076016989214572e-06, + "loss": 0.6312, "step": 12212 }, { - "epoch": 2.560914237785699, - "grad_norm": 5.905435880051503, - "learning_rate": 1.1027660197173894e-06, - "loss": 0.1606, + "epoch": 1.7245128494775486, + "grad_norm": 3.7422758875627724, + "learning_rate": 8.07452104009918e-06, + "loss": 0.6952, "step": 12213 }, { - "epoch": 2.5611239253512266, - "grad_norm": 3.0940281823139086, - "learning_rate": 1.1017327843455794e-06, - "loss": 0.095, + "epoch": 1.7246540525275345, + "grad_norm": 3.593462025981845, + "learning_rate": 8.07302513573109e-06, + "loss": 0.5626, "step": 12214 }, { - "epoch": 2.561333612916754, - "grad_norm": 4.072362399838126, - "learning_rate": 1.1007000050253958e-06, - "loss": 0.0924, + "epoch": 1.7247952555775203, + "grad_norm": 3.7338387602964254, + "learning_rate": 8.071529276145058e-06, + "loss": 0.6589, "step": 12215 }, { - "epoch": 2.561543300482281, - "grad_norm": 4.1515451672202595, - "learning_rate": 1.099667681809774e-06, - "loss": 0.1333, + "epoch": 1.7249364586275062, + "grad_norm": 3.590416260315603, + "learning_rate": 8.070033461375857e-06, + "loss": 0.565, "step": 12216 }, { - "epoch": 2.5617529880478087, - "grad_norm": 4.208427931873919, - "learning_rate": 1.0986358147516207e-06, - "loss": 0.1257, + "epoch": 1.725077661677492, + "grad_norm": 2.620999421055926, + "learning_rate": 8.068537691458245e-06, + "loss": 0.4334, "step": 12217 }, { - "epoch": 2.5619626756133362, - "grad_norm": 5.05182675256152, - "learning_rate": 1.0976044039038181e-06, - "loss": 0.18, + "epoch": 1.725218864727478, + "grad_norm": 3.625520189382327, + "learning_rate": 8.067041966426984e-06, + "loss": 0.5656, "step": 12218 }, { - "epoch": 2.5621723631788633, - "grad_norm": 4.015620119489866, - "learning_rate": 1.0965734493192293e-06, - "loss": 0.119, + "epoch": 1.7253600677774639, + "grad_norm": 3.783579599330728, + "learning_rate": 8.065546286316831e-06, + "loss": 0.6062, "step": 12219 }, { - "epoch": 2.562382050744391, - "grad_norm": 3.9258383541556316, - "learning_rate": 1.0955429510506966e-06, - "loss": 0.1142, + "epoch": 1.7255012708274498, + "grad_norm": 4.306797541216445, + "learning_rate": 8.064050651162546e-06, + "loss": 0.5288, "step": 12220 }, { - "epoch": 2.5625917383099184, - "grad_norm": 4.6704836551966915, - "learning_rate": 1.0945129091510308e-06, - "loss": 0.1177, + "epoch": 1.7256424738774356, + "grad_norm": 3.388195268447762, + "learning_rate": 8.06255506099889e-06, + "loss": 0.4538, "step": 12221 }, { - "epoch": 2.5628014258754455, - "grad_norm": 3.693553465065929, - "learning_rate": 1.0934833236730235e-06, - "loss": 0.1128, + "epoch": 1.7257836769274215, + "grad_norm": 4.201347192704246, + "learning_rate": 8.061059515860616e-06, + "loss": 0.7752, "step": 12222 }, { - "epoch": 2.563011113440973, - "grad_norm": 4.291241429776918, - "learning_rate": 1.0924541946694444e-06, - "loss": 0.1356, + "epoch": 1.7259248799774074, + "grad_norm": 3.9111571200621924, + "learning_rate": 8.059564015782482e-06, + "loss": 0.5626, "step": 12223 }, { - "epoch": 2.5632208010065005, - "grad_norm": 3.559320233172159, - "learning_rate": 1.091425522193037e-06, - "loss": 0.1146, + "epoch": 1.7260660830273933, + "grad_norm": 3.76524335082086, + "learning_rate": 8.058068560799241e-06, + "loss": 0.5256, "step": 12224 }, { - "epoch": 2.5634304885720276, - "grad_norm": 4.704572678966715, - "learning_rate": 1.0903973062965212e-06, - "loss": 0.1563, + "epoch": 1.7262072860773792, + "grad_norm": 4.9649659984590535, + "learning_rate": 8.05657315094565e-06, + "loss": 0.6715, "step": 12225 }, { - "epoch": 2.563640176137555, - "grad_norm": 4.144404446116386, - "learning_rate": 1.0893695470325994e-06, - "loss": 0.1301, + "epoch": 1.726348489127365, + "grad_norm": 3.0642175156289473, + "learning_rate": 8.05507778625646e-06, + "loss": 0.4551, "step": 12226 }, { - "epoch": 2.5638498637030827, - "grad_norm": 3.935310163786143, - "learning_rate": 1.0883422444539393e-06, - "loss": 0.1316, + "epoch": 1.726489692177351, + "grad_norm": 3.526888362696003, + "learning_rate": 8.053582466766423e-06, + "loss": 0.5123, "step": 12227 }, { - "epoch": 2.5640595512686097, - "grad_norm": 4.017918351176443, - "learning_rate": 1.087315398613198e-06, - "loss": 0.1503, + "epoch": 1.7266308952273368, + "grad_norm": 5.064137671008736, + "learning_rate": 8.052087192510285e-06, + "loss": 0.8105, "step": 12228 }, { - "epoch": 2.5642692388341373, - "grad_norm": 4.518043726409943, - "learning_rate": 1.086289009563002e-06, - "loss": 0.134, + "epoch": 1.7267720982773227, + "grad_norm": 4.630414867952721, + "learning_rate": 8.0505919635228e-06, + "loss": 0.7968, "step": 12229 }, { - "epoch": 2.5644789263996643, - "grad_norm": 3.79619339797227, - "learning_rate": 1.0852630773559514e-06, - "loss": 0.1245, + "epoch": 1.7269133013273086, + "grad_norm": 3.3879606359060763, + "learning_rate": 8.04909677983872e-06, + "loss": 0.4738, "step": 12230 }, { - "epoch": 2.564688613965192, - "grad_norm": 3.7546005057328546, - "learning_rate": 1.08423760204463e-06, - "loss": 0.1236, + "epoch": 1.7270545043772945, + "grad_norm": 3.9782570775956607, + "learning_rate": 8.047601641492784e-06, + "loss": 0.6933, "step": 12231 }, { - "epoch": 2.564898301530719, - "grad_norm": 4.071772273456543, - "learning_rate": 1.0832125836815965e-06, - "loss": 0.1223, + "epoch": 1.7271957074272803, + "grad_norm": 3.623446529655996, + "learning_rate": 8.046106548519743e-06, + "loss": 0.5441, "step": 12232 }, { - "epoch": 2.5651079890962465, - "grad_norm": 3.872014189381969, - "learning_rate": 1.0821880223193837e-06, - "loss": 0.134, + "epoch": 1.7273369104772662, + "grad_norm": 3.4763148097066545, + "learning_rate": 8.044611500954344e-06, + "loss": 0.5492, "step": 12233 }, { - "epoch": 2.565317676661774, - "grad_norm": 3.194526472651351, - "learning_rate": 1.0811639180105006e-06, - "loss": 0.1125, + "epoch": 1.7274781135272521, + "grad_norm": 3.7734385018433123, + "learning_rate": 8.043116498831328e-06, + "loss": 0.5965, "step": 12234 }, { - "epoch": 2.565527364227301, - "grad_norm": 3.702729304993627, - "learning_rate": 1.0801402708074371e-06, - "loss": 0.1153, + "epoch": 1.727619316577238, + "grad_norm": 5.27261544030686, + "learning_rate": 8.041621542185442e-06, + "loss": 0.8437, "step": 12235 }, { - "epoch": 2.5657370517928286, - "grad_norm": 5.725080956473324, - "learning_rate": 1.079117080762654e-06, - "loss": 0.197, + "epoch": 1.7277605196272239, + "grad_norm": 3.39506498132273, + "learning_rate": 8.040126631051425e-06, + "loss": 0.5913, "step": 12236 }, { - "epoch": 2.565946739358356, - "grad_norm": 3.8026105029586894, - "learning_rate": 1.0780943479285944e-06, - "loss": 0.0891, + "epoch": 1.7279017226772098, + "grad_norm": 3.0154087166645396, + "learning_rate": 8.038631765464016e-06, + "loss": 0.4831, "step": 12237 }, { - "epoch": 2.5661564269238832, - "grad_norm": 3.6374062786869104, - "learning_rate": 1.0770720723576734e-06, - "loss": 0.1262, + "epoch": 1.7280429257271956, + "grad_norm": 3.9602428805395347, + "learning_rate": 8.037136945457959e-06, + "loss": 0.8279, "step": 12238 }, { - "epoch": 2.5663661144894108, - "grad_norm": 4.014396149426894, - "learning_rate": 1.0760502541022832e-06, - "loss": 0.1004, + "epoch": 1.7281841287771815, + "grad_norm": 3.09684090788484, + "learning_rate": 8.035642171067992e-06, + "loss": 0.5233, "step": 12239 }, { - "epoch": 2.5665758020549383, - "grad_norm": 5.320708273599552, - "learning_rate": 1.0750288932147957e-06, - "loss": 0.1724, + "epoch": 1.7283253318271674, + "grad_norm": 3.805726032756525, + "learning_rate": 8.034147442328852e-06, + "loss": 0.6807, "step": 12240 }, { - "epoch": 2.5667854896204654, - "grad_norm": 4.322144374254234, - "learning_rate": 1.0740079897475609e-06, - "loss": 0.1601, + "epoch": 1.7284665348771533, + "grad_norm": 3.3237179438899593, + "learning_rate": 8.032652759275276e-06, + "loss": 0.5801, "step": 12241 }, { - "epoch": 2.566995177185993, - "grad_norm": 4.1033927148071685, - "learning_rate": 1.0729875437528937e-06, - "loss": 0.1251, + "epoch": 1.7286077379271392, + "grad_norm": 3.425530020948634, + "learning_rate": 8.031158121942001e-06, + "loss": 0.5666, "step": 12242 }, { - "epoch": 2.5672048647515204, - "grad_norm": 3.6541957358274852, - "learning_rate": 1.0719675552830977e-06, - "loss": 0.138, + "epoch": 1.728748940977125, + "grad_norm": 3.873430501135082, + "learning_rate": 8.029663530363763e-06, + "loss": 0.6271, "step": 12243 }, { - "epoch": 2.5674145523170475, - "grad_norm": 4.2540995389288065, - "learning_rate": 1.0709480243904514e-06, - "loss": 0.1332, + "epoch": 1.728890144027111, + "grad_norm": 3.676871430132923, + "learning_rate": 8.028168984575292e-06, + "loss": 0.6457, "step": 12244 }, { - "epoch": 2.567624239882575, - "grad_norm": 5.301371938737536, - "learning_rate": 1.069928951127206e-06, - "loss": 0.1603, + "epoch": 1.7290313470770968, + "grad_norm": 3.0676100948401634, + "learning_rate": 8.026674484611321e-06, + "loss": 0.4898, "step": 12245 }, { - "epoch": 2.5678339274481026, - "grad_norm": 4.56346530533326, - "learning_rate": 1.0689103355455887e-06, - "loss": 0.1339, + "epoch": 1.7291725501270827, + "grad_norm": 3.5734219914097354, + "learning_rate": 8.025180030506584e-06, + "loss": 0.7054, "step": 12246 }, { - "epoch": 2.5680436150136297, - "grad_norm": 4.690190320853709, - "learning_rate": 1.067892177697808e-06, - "loss": 0.1705, + "epoch": 1.7293137531770686, + "grad_norm": 3.903938404793958, + "learning_rate": 8.023685622295809e-06, + "loss": 0.5416, "step": 12247 }, { - "epoch": 2.568253302579157, - "grad_norm": 4.791196674976329, - "learning_rate": 1.066874477636043e-06, - "loss": 0.1866, + "epoch": 1.7294549562270545, + "grad_norm": 3.385259919057092, + "learning_rate": 8.022191260013727e-06, + "loss": 0.4163, "step": 12248 }, { - "epoch": 2.5684629901446843, - "grad_norm": 3.529413524804673, - "learning_rate": 1.0658572354124574e-06, - "loss": 0.1407, + "epoch": 1.7295961592770404, + "grad_norm": 2.99959604340233, + "learning_rate": 8.020696943695065e-06, + "loss": 0.4735, "step": 12249 }, { - "epoch": 2.568672677710212, - "grad_norm": 4.4444206608379195, - "learning_rate": 1.0648404510791832e-06, - "loss": 0.1312, + "epoch": 1.7297373623270262, + "grad_norm": 3.5629449555996784, + "learning_rate": 8.019202673374554e-06, + "loss": 0.5286, "step": 12250 }, { - "epoch": 2.568882365275739, - "grad_norm": 4.10217111465742, - "learning_rate": 1.063824124688332e-06, - "loss": 0.1349, + "epoch": 1.7298785653770121, + "grad_norm": 3.020364585083156, + "learning_rate": 8.017708449086916e-06, + "loss": 0.4624, "step": 12251 }, { - "epoch": 2.5690920528412664, - "grad_norm": 5.416873948388693, - "learning_rate": 1.062808256291993e-06, - "loss": 0.1667, + "epoch": 1.730019768426998, + "grad_norm": 4.263244098164861, + "learning_rate": 8.01621427086688e-06, + "loss": 0.6008, "step": 12252 }, { - "epoch": 2.569301740406794, - "grad_norm": 4.193222655873493, - "learning_rate": 1.0617928459422345e-06, - "loss": 0.1358, + "epoch": 1.7301609714769839, + "grad_norm": 2.9861079764136367, + "learning_rate": 8.014720138749166e-06, + "loss": 0.4721, "step": 12253 }, { - "epoch": 2.569511427972321, - "grad_norm": 3.342166632981273, - "learning_rate": 1.060777893691094e-06, - "loss": 0.1009, + "epoch": 1.7303021745269698, + "grad_norm": 3.037958425612766, + "learning_rate": 8.013226052768498e-06, + "loss": 0.4973, "step": 12254 }, { - "epoch": 2.5697211155378485, - "grad_norm": 3.553759240487575, - "learning_rate": 1.0597633995905887e-06, - "loss": 0.1199, + "epoch": 1.7304433775769557, + "grad_norm": 3.177595732084706, + "learning_rate": 8.011732012959596e-06, + "loss": 0.4774, "step": 12255 }, { - "epoch": 2.569930803103376, - "grad_norm": 3.375077053990747, - "learning_rate": 1.058749363692718e-06, - "loss": 0.1234, + "epoch": 1.7305845806269415, + "grad_norm": 4.1008592476658485, + "learning_rate": 8.010238019357185e-06, + "loss": 0.5575, "step": 12256 }, { - "epoch": 2.570140490668903, - "grad_norm": 3.6939890540088736, - "learning_rate": 1.0577357860494475e-06, - "loss": 0.1194, + "epoch": 1.7307257836769274, + "grad_norm": 3.0706694362789597, + "learning_rate": 8.008744071995987e-06, + "loss": 0.4586, "step": 12257 }, { - "epoch": 2.5703501782344307, - "grad_norm": 3.920686872268463, - "learning_rate": 1.0567226667127306e-06, - "loss": 0.0996, + "epoch": 1.7308669867269133, + "grad_norm": 3.3933264570107813, + "learning_rate": 8.00725017091071e-06, + "loss": 0.5053, "step": 12258 }, { - "epoch": 2.570559865799958, - "grad_norm": 4.526284188267804, - "learning_rate": 1.0557100057344871e-06, - "loss": 0.1646, + "epoch": 1.7310081897768992, + "grad_norm": 3.942766825970233, + "learning_rate": 8.005756316136083e-06, + "loss": 0.6197, "step": 12259 }, { - "epoch": 2.5707695533654853, - "grad_norm": 4.077610365346558, - "learning_rate": 1.0546978031666177e-06, - "loss": 0.1617, + "epoch": 1.731149392826885, + "grad_norm": 2.6765244404896733, + "learning_rate": 8.004262507706819e-06, + "loss": 0.346, "step": 12260 }, { - "epoch": 2.570979240931013, - "grad_norm": 4.556265039249083, - "learning_rate": 1.0536860590610009e-06, - "loss": 0.1324, + "epoch": 1.731290595876871, + "grad_norm": 3.4399286566659795, + "learning_rate": 8.002768745657632e-06, + "loss": 0.4843, "step": 12261 }, { - "epoch": 2.5711889284965403, - "grad_norm": 3.7457350186463954, - "learning_rate": 1.0526747734694908e-06, - "loss": 0.1504, + "epoch": 1.7314317989268568, + "grad_norm": 3.1918969918617055, + "learning_rate": 8.001275030023234e-06, + "loss": 0.4269, "step": 12262 }, { - "epoch": 2.5713986160620674, - "grad_norm": 3.8296243671396617, - "learning_rate": 1.0516639464439182e-06, - "loss": 0.1423, + "epoch": 1.7315730019768427, + "grad_norm": 4.600125060250842, + "learning_rate": 7.999781360838342e-06, + "loss": 0.6863, "step": 12263 }, { - "epoch": 2.571608303627595, - "grad_norm": 4.285112907804235, - "learning_rate": 1.050653578036086e-06, - "loss": 0.1323, + "epoch": 1.7317142050268286, + "grad_norm": 3.847966620955857, + "learning_rate": 7.998287738137669e-06, + "loss": 0.487, "step": 12264 }, { - "epoch": 2.5718179911931225, - "grad_norm": 6.78428986667062, - "learning_rate": 1.0496436682977807e-06, - "loss": 0.1788, + "epoch": 1.7318554080768145, + "grad_norm": 3.836696575294387, + "learning_rate": 7.996794161955921e-06, + "loss": 0.4876, "step": 12265 }, { - "epoch": 2.5720276787586496, - "grad_norm": 4.808252569237714, - "learning_rate": 1.048634217280764e-06, - "loss": 0.1827, + "epoch": 1.7319966111268004, + "grad_norm": 4.0118424865781455, + "learning_rate": 7.995300632327816e-06, + "loss": 0.4952, "step": 12266 }, { - "epoch": 2.572237366324177, - "grad_norm": 3.1081839596677767, - "learning_rate": 1.0476252250367647e-06, - "loss": 0.0751, + "epoch": 1.7321378141767863, + "grad_norm": 4.245363982826849, + "learning_rate": 7.993807149288053e-06, + "loss": 0.572, "step": 12267 }, { - "epoch": 2.572447053889704, - "grad_norm": 5.2551302669926745, - "learning_rate": 1.0466166916175003e-06, - "loss": 0.1568, + "epoch": 1.7322790172267721, + "grad_norm": 3.4529183617002683, + "learning_rate": 7.99231371287135e-06, + "loss": 0.5066, "step": 12268 }, { - "epoch": 2.5726567414552317, - "grad_norm": 4.567603992500484, - "learning_rate": 1.0456086170746615e-06, - "loss": 0.1528, + "epoch": 1.732420220276758, + "grad_norm": 3.5250300523353606, + "learning_rate": 7.99082032311241e-06, + "loss": 0.5158, "step": 12269 }, { - "epoch": 2.572866429020759, - "grad_norm": 4.920386645666056, - "learning_rate": 1.0446010014599116e-06, - "loss": 0.168, + "epoch": 1.732561423326744, + "grad_norm": 3.6279822389578245, + "learning_rate": 7.989326980045937e-06, + "loss": 0.5948, "step": 12270 }, { - "epoch": 2.5730761165862863, - "grad_norm": 4.14092376919901, - "learning_rate": 1.0435938448248906e-06, - "loss": 0.1486, + "epoch": 1.7327026263767298, + "grad_norm": 3.94906311511895, + "learning_rate": 7.987833683706637e-06, + "loss": 0.6903, "step": 12271 }, { - "epoch": 2.573285804151814, - "grad_norm": 3.389975600827999, - "learning_rate": 1.0425871472212211e-06, - "loss": 0.1034, + "epoch": 1.7328438294267157, + "grad_norm": 4.078626684325957, + "learning_rate": 7.98634043412921e-06, + "loss": 0.5793, "step": 12272 }, { - "epoch": 2.573495491717341, - "grad_norm": 3.810779613429181, - "learning_rate": 1.0415809087004936e-06, - "loss": 0.103, + "epoch": 1.7329850324767015, + "grad_norm": 3.6157947825926033, + "learning_rate": 7.984847231348363e-06, + "loss": 0.5574, "step": 12273 }, { - "epoch": 2.5737051792828685, - "grad_norm": 4.147369823663081, - "learning_rate": 1.0405751293142851e-06, - "loss": 0.1617, + "epoch": 1.7331262355266874, + "grad_norm": 3.2365441168173015, + "learning_rate": 7.983354075398797e-06, + "loss": 0.5132, "step": 12274 }, { - "epoch": 2.573914866848396, - "grad_norm": 3.5918291470590544, - "learning_rate": 1.0395698091141405e-06, - "loss": 0.1301, + "epoch": 1.7332674385766733, + "grad_norm": 3.783482920836712, + "learning_rate": 7.98186096631521e-06, + "loss": 0.5376, "step": 12275 }, { - "epoch": 2.574124554413923, - "grad_norm": 5.702992779361755, - "learning_rate": 1.0385649481515814e-06, - "loss": 0.2085, + "epoch": 1.7334086416266592, + "grad_norm": 4.510950147147192, + "learning_rate": 7.980367904132303e-06, + "loss": 0.6456, "step": 12276 }, { - "epoch": 2.5743342419794506, - "grad_norm": 3.4956848584711424, - "learning_rate": 1.0375605464781125e-06, - "loss": 0.1432, + "epoch": 1.733549844676645, + "grad_norm": 3.877354506538229, + "learning_rate": 7.978874888884777e-06, + "loss": 0.6097, "step": 12277 }, { - "epoch": 2.574543929544978, - "grad_norm": 4.6571068116420475, - "learning_rate": 1.0365566041452114e-06, - "loss": 0.1495, + "epoch": 1.733691047726631, + "grad_norm": 4.167590620925349, + "learning_rate": 7.977381920607324e-06, + "loss": 0.5913, "step": 12278 }, { - "epoch": 2.574753617110505, - "grad_norm": 4.936938095977498, - "learning_rate": 1.0355531212043312e-06, - "loss": 0.1333, + "epoch": 1.7338322507766168, + "grad_norm": 3.5721782283903636, + "learning_rate": 7.97588899933464e-06, + "loss": 0.5914, "step": 12279 }, { - "epoch": 2.5749633046760327, - "grad_norm": 4.785161220976136, - "learning_rate": 1.034550097706899e-06, - "loss": 0.1607, + "epoch": 1.7339734538266027, + "grad_norm": 4.076474059302508, + "learning_rate": 7.97439612510142e-06, + "loss": 0.6127, "step": 12280 }, { - "epoch": 2.5751729922415603, - "grad_norm": 4.802038718101305, - "learning_rate": 1.0335475337043266e-06, - "loss": 0.1736, + "epoch": 1.7341146568765886, + "grad_norm": 4.399695408243882, + "learning_rate": 7.972903297942361e-06, + "loss": 0.6075, "step": 12281 }, { - "epoch": 2.5753826798070873, - "grad_norm": 4.615253150500586, - "learning_rate": 1.032545429247992e-06, - "loss": 0.1694, + "epoch": 1.7342558599265745, + "grad_norm": 3.4154928638674287, + "learning_rate": 7.971410517892155e-06, + "loss": 0.5369, "step": 12282 }, { - "epoch": 2.575592367372615, - "grad_norm": 5.385899732711564, - "learning_rate": 1.0315437843892595e-06, - "loss": 0.2194, + "epoch": 1.7343970629765604, + "grad_norm": 3.203158479025877, + "learning_rate": 7.969917784985493e-06, + "loss": 0.4545, "step": 12283 }, { - "epoch": 2.5758020549381424, - "grad_norm": 4.670532837635864, - "learning_rate": 1.0305425991794638e-06, - "loss": 0.1502, + "epoch": 1.7345382660265463, + "grad_norm": 3.9318904239073995, + "learning_rate": 7.968425099257062e-06, + "loss": 0.7079, "step": 12284 }, { - "epoch": 2.5760117425036695, - "grad_norm": 5.528930059006826, - "learning_rate": 1.0295418736699137e-06, - "loss": 0.1756, + "epoch": 1.7346794690765321, + "grad_norm": 3.8422767264503106, + "learning_rate": 7.966932460741557e-06, + "loss": 0.583, "step": 12285 }, { - "epoch": 2.576221430069197, - "grad_norm": 3.6247727438738417, - "learning_rate": 1.0285416079119036e-06, - "loss": 0.1202, + "epoch": 1.734820672126518, + "grad_norm": 3.46944674871209, + "learning_rate": 7.965439869473664e-06, + "loss": 0.5556, "step": 12286 }, { - "epoch": 2.576431117634724, - "grad_norm": 4.327164974280052, - "learning_rate": 1.0275418019566962e-06, - "loss": 0.1117, + "epoch": 1.734961875176504, + "grad_norm": 3.3739363239272655, + "learning_rate": 7.96394732548807e-06, + "loss": 0.477, "step": 12287 }, { - "epoch": 2.5766408052002516, - "grad_norm": 5.33326951502281, - "learning_rate": 1.0265424558555314e-06, - "loss": 0.1522, + "epoch": 1.7351030782264898, + "grad_norm": 3.8110579664551705, + "learning_rate": 7.96245482881946e-06, + "loss": 0.5566, "step": 12288 }, { - "epoch": 2.576850492765779, - "grad_norm": 4.883478154967185, - "learning_rate": 1.0255435696596294e-06, - "loss": 0.2121, + "epoch": 1.7352442812764757, + "grad_norm": 3.809238411332083, + "learning_rate": 7.960962379502516e-06, + "loss": 0.5784, "step": 12289 }, { - "epoch": 2.5770601803313062, - "grad_norm": 4.216356297517448, - "learning_rate": 1.024545143420187e-06, - "loss": 0.1219, + "epoch": 1.7353854843264616, + "grad_norm": 3.510142243588005, + "learning_rate": 7.95946997757193e-06, + "loss": 0.5417, "step": 12290 }, { - "epoch": 2.5772698678968338, - "grad_norm": 4.424947280455243, - "learning_rate": 1.023547177188373e-06, - "loss": 0.149, + "epoch": 1.7355266873764474, + "grad_norm": 3.5286695784832562, + "learning_rate": 7.957977623062379e-06, + "loss": 0.5101, "step": 12291 }, { - "epoch": 2.577479555462361, - "grad_norm": 4.420572162774648, - "learning_rate": 1.0225496710153337e-06, - "loss": 0.1287, + "epoch": 1.7356678904264333, + "grad_norm": 3.2894451457279605, + "learning_rate": 7.956485316008545e-06, + "loss": 0.4857, "step": 12292 }, { - "epoch": 2.5776892430278884, - "grad_norm": 4.611145224532912, - "learning_rate": 1.0215526249521957e-06, - "loss": 0.1359, + "epoch": 1.7358090934764192, + "grad_norm": 3.184568229054848, + "learning_rate": 7.95499305644511e-06, + "loss": 0.5463, "step": 12293 }, { - "epoch": 2.577898930593416, - "grad_norm": 3.6589471589406437, - "learning_rate": 1.0205560390500557e-06, - "loss": 0.1239, + "epoch": 1.735950296526405, + "grad_norm": 3.6959208738156804, + "learning_rate": 7.953500844406758e-06, + "loss": 0.6844, "step": 12294 }, { - "epoch": 2.578108618158943, - "grad_norm": 3.4525076688009344, - "learning_rate": 1.0195599133599955e-06, - "loss": 0.1111, + "epoch": 1.736091499576391, + "grad_norm": 3.373305534261933, + "learning_rate": 7.95200867992816e-06, + "loss": 0.4855, "step": 12295 }, { - "epoch": 2.5783183057244705, - "grad_norm": 6.611862239505533, - "learning_rate": 1.018564247933065e-06, - "loss": 0.1724, + "epoch": 1.7362327026263769, + "grad_norm": 3.21993280665393, + "learning_rate": 7.950516563043994e-06, + "loss": 0.5964, "step": 12296 }, { - "epoch": 2.578527993289998, - "grad_norm": 4.123234539355565, - "learning_rate": 1.0175690428202923e-06, - "loss": 0.152, + "epoch": 1.7363739056763627, + "grad_norm": 3.516494198029777, + "learning_rate": 7.949024493788938e-06, + "loss": 0.4791, "step": 12297 }, { - "epoch": 2.578737680855525, - "grad_norm": 4.1703687536714416, - "learning_rate": 1.0165742980726857e-06, - "loss": 0.1642, + "epoch": 1.7365151087263486, + "grad_norm": 2.866289877521798, + "learning_rate": 7.947532472197668e-06, + "loss": 0.3723, "step": 12298 }, { - "epoch": 2.5789473684210527, - "grad_norm": 3.9378480112927687, - "learning_rate": 1.0155800137412287e-06, - "loss": 0.1306, + "epoch": 1.7366563117763345, + "grad_norm": 3.5286222145144577, + "learning_rate": 7.946040498304857e-06, + "loss": 0.4949, "step": 12299 }, { - "epoch": 2.57915705598658, - "grad_norm": 4.524950133211574, - "learning_rate": 1.0145861898768794e-06, - "loss": 0.1629, + "epoch": 1.7367975148263204, + "grad_norm": 3.9486269367211313, + "learning_rate": 7.944548572145178e-06, + "loss": 0.6941, "step": 12300 }, { - "epoch": 2.5793667435521073, - "grad_norm": 4.2750388273436934, - "learning_rate": 1.0135928265305695e-06, - "loss": 0.1238, + "epoch": 1.7369387178763063, + "grad_norm": 3.7737242937431392, + "learning_rate": 7.9430566937533e-06, + "loss": 0.5685, "step": 12301 }, { - "epoch": 2.579576431117635, - "grad_norm": 5.15976927076516, - "learning_rate": 1.0125999237532159e-06, - "loss": 0.1354, + "epoch": 1.7370799209262922, + "grad_norm": 4.260221950896501, + "learning_rate": 7.941564863163899e-06, + "loss": 0.5547, "step": 12302 }, { - "epoch": 2.5797861186831623, - "grad_norm": 3.6394099688880814, - "learning_rate": 1.0116074815957044e-06, - "loss": 0.1509, + "epoch": 1.737221123976278, + "grad_norm": 3.452874788743592, + "learning_rate": 7.940073080411643e-06, + "loss": 0.5751, "step": 12303 }, { - "epoch": 2.5799958062486894, - "grad_norm": 4.993492256314699, - "learning_rate": 1.010615500108897e-06, - "loss": 0.1544, + "epoch": 1.737362327026264, + "grad_norm": 3.330685658869131, + "learning_rate": 7.938581345531197e-06, + "loss": 0.5395, "step": 12304 }, { - "epoch": 2.580205493814217, - "grad_norm": 3.7232198597526147, - "learning_rate": 1.009623979343638e-06, - "loss": 0.1122, + "epoch": 1.7375035300762498, + "grad_norm": 3.645365460990179, + "learning_rate": 7.93708965855723e-06, + "loss": 0.6058, "step": 12305 }, { - "epoch": 2.580415181379744, - "grad_norm": 4.037771377627534, - "learning_rate": 1.0086329193507405e-06, - "loss": 0.1027, + "epoch": 1.7376447331262357, + "grad_norm": 3.073263168403292, + "learning_rate": 7.935598019524406e-06, + "loss": 0.567, "step": 12306 }, { - "epoch": 2.5806248689452715, - "grad_norm": 6.990721620577069, - "learning_rate": 1.0076423201810036e-06, - "loss": 0.1271, + "epoch": 1.7377859361762216, + "grad_norm": 3.8088501374111416, + "learning_rate": 7.934106428467397e-06, + "loss": 0.5371, "step": 12307 }, { - "epoch": 2.580834556510799, - "grad_norm": 4.1410746106497935, - "learning_rate": 1.0066521818851938e-06, - "loss": 0.1276, + "epoch": 1.7379271392262075, + "grad_norm": 3.1999865097594413, + "learning_rate": 7.932614885420859e-06, + "loss": 0.5054, "step": 12308 }, { - "epoch": 2.581044244076326, - "grad_norm": 4.774771339537855, - "learning_rate": 1.0056625045140566e-06, - "loss": 0.1472, + "epoch": 1.7380683422761931, + "grad_norm": 4.15971483965218, + "learning_rate": 7.931123390419458e-06, + "loss": 0.5387, "step": 12309 }, { - "epoch": 2.5812539316418537, - "grad_norm": 3.6652852329458665, - "learning_rate": 1.0046732881183152e-06, - "loss": 0.1173, + "epoch": 1.738209545326179, + "grad_norm": 3.606674944923736, + "learning_rate": 7.929631943497858e-06, + "loss": 0.5649, "step": 12310 }, { - "epoch": 2.5814636192073808, - "grad_norm": 3.266901874208994, - "learning_rate": 1.003684532748671e-06, - "loss": 0.139, + "epoch": 1.7383507483761649, + "grad_norm": 3.4011682593208175, + "learning_rate": 7.928140544690719e-06, + "loss": 0.6067, "step": 12311 }, { - "epoch": 2.5816733067729083, - "grad_norm": 3.171937295050627, - "learning_rate": 1.0026962384557993e-06, - "loss": 0.0748, + "epoch": 1.7384919514261508, + "grad_norm": 4.738748730683039, + "learning_rate": 7.926649194032699e-06, + "loss": 0.6545, "step": 12312 }, { - "epoch": 2.581882994338436, - "grad_norm": 3.891274431280533, - "learning_rate": 1.001708405290347e-06, - "loss": 0.1602, + "epoch": 1.7386331544761366, + "grad_norm": 4.004762251212013, + "learning_rate": 7.925157891558455e-06, + "loss": 0.5294, "step": 12313 }, { - "epoch": 2.582092681903963, - "grad_norm": 4.587337410199784, - "learning_rate": 1.0007210333029483e-06, - "loss": 0.1533, + "epoch": 1.7387743575261225, + "grad_norm": 3.6087246256532475, + "learning_rate": 7.923666637302643e-06, + "loss": 0.5755, "step": 12314 }, { - "epoch": 2.5823023694694904, - "grad_norm": 3.2107477587422926, - "learning_rate": 9.997341225442026e-07, - "loss": 0.0839, + "epoch": 1.7389155605761084, + "grad_norm": 3.868696021497168, + "learning_rate": 7.922175431299922e-06, + "loss": 0.6617, "step": 12315 }, { - "epoch": 2.582512057035018, - "grad_norm": 3.635218908219605, - "learning_rate": 9.98747673064696e-07, - "loss": 0.1458, + "epoch": 1.7390567636260943, + "grad_norm": 4.074657425117152, + "learning_rate": 7.920684273584949e-06, + "loss": 0.6987, "step": 12316 }, { - "epoch": 2.582721744600545, - "grad_norm": 3.3544491051596212, - "learning_rate": 9.97761684914983e-07, - "loss": 0.0909, + "epoch": 1.7391979666760802, + "grad_norm": 3.6695572768126796, + "learning_rate": 7.919193164192374e-06, + "loss": 0.6106, "step": 12317 }, { - "epoch": 2.5829314321660726, - "grad_norm": 5.018317903454361, - "learning_rate": 9.967761581455948e-07, - "loss": 0.1633, + "epoch": 1.739339169726066, + "grad_norm": 3.229760717080736, + "learning_rate": 7.91770210315685e-06, + "loss": 0.4719, "step": 12318 }, { - "epoch": 2.5831411197316, - "grad_norm": 5.542974511527306, - "learning_rate": 9.957910928070436e-07, - "loss": 0.1783, + "epoch": 1.739480372776052, + "grad_norm": 3.588585125261572, + "learning_rate": 7.916211090513037e-06, + "loss": 0.572, "step": 12319 }, { - "epoch": 2.583350807297127, - "grad_norm": 5.166646153995886, - "learning_rate": 9.9480648894982e-07, - "loss": 0.1569, + "epoch": 1.7396215758260378, + "grad_norm": 3.8831475524331966, + "learning_rate": 7.914720126295572e-06, + "loss": 0.6116, "step": 12320 }, { - "epoch": 2.5835604948626547, - "grad_norm": 3.0579825928907676, - "learning_rate": 9.93822346624379e-07, - "loss": 0.1201, + "epoch": 1.7397627788760237, + "grad_norm": 4.00616999922467, + "learning_rate": 7.913229210539113e-06, + "loss": 0.5617, "step": 12321 }, { - "epoch": 2.5837701824281822, - "grad_norm": 5.0069304113773585, - "learning_rate": 9.928386658811629e-07, - "loss": 0.0922, + "epoch": 1.7399039819260096, + "grad_norm": 3.6380298624212393, + "learning_rate": 7.911738343278303e-06, + "loss": 0.6849, "step": 12322 }, { - "epoch": 2.5839798699937093, - "grad_norm": 4.820975851444204, - "learning_rate": 9.918554467705877e-07, - "loss": 0.1669, + "epoch": 1.7400451849759955, + "grad_norm": 3.4289387645820333, + "learning_rate": 7.910247524547793e-06, + "loss": 0.5663, "step": 12323 }, { - "epoch": 2.584189557559237, - "grad_norm": 3.336565209904173, - "learning_rate": 9.908726893430475e-07, - "loss": 0.111, + "epoch": 1.7401863880259814, + "grad_norm": 3.4743933179940227, + "learning_rate": 7.908756754382228e-06, + "loss": 0.5575, "step": 12324 }, { - "epoch": 2.5843992451247644, - "grad_norm": 4.531203786433443, - "learning_rate": 9.898903936489046e-07, - "loss": 0.1486, + "epoch": 1.7403275910759672, + "grad_norm": 3.3365012277165125, + "learning_rate": 7.907266032816254e-06, + "loss": 0.565, "step": 12325 }, { - "epoch": 2.5846089326902915, - "grad_norm": 4.778731013707345, - "learning_rate": 9.889085597385062e-07, - "loss": 0.1316, + "epoch": 1.7404687941259531, + "grad_norm": 3.8360519938743716, + "learning_rate": 7.905775359884514e-06, + "loss": 0.6907, "step": 12326 }, { - "epoch": 2.584818620255819, - "grad_norm": 3.461830449944108, - "learning_rate": 9.879271876621743e-07, - "loss": 0.1098, + "epoch": 1.740609997175939, + "grad_norm": 3.6741361140004134, + "learning_rate": 7.904284735621648e-06, + "loss": 0.5443, "step": 12327 }, { - "epoch": 2.585028307821346, - "grad_norm": 4.587505915874927, - "learning_rate": 9.869462774702054e-07, - "loss": 0.1163, + "epoch": 1.7407512002259249, + "grad_norm": 3.5222500025388084, + "learning_rate": 7.902794160062303e-06, + "loss": 0.5869, "step": 12328 }, { - "epoch": 2.5852379953868736, - "grad_norm": 5.625875259226333, - "learning_rate": 9.859658292128704e-07, - "loss": 0.1676, + "epoch": 1.7408924032759108, + "grad_norm": 4.316278890500796, + "learning_rate": 7.901303633241115e-06, + "loss": 0.6863, "step": 12329 }, { - "epoch": 2.5854476829524007, - "grad_norm": 8.017736701930426, - "learning_rate": 9.849858429404223e-07, - "loss": 0.1161, + "epoch": 1.7410336063258967, + "grad_norm": 4.237230454234031, + "learning_rate": 7.899813155192723e-06, + "loss": 0.7047, "step": 12330 }, { - "epoch": 2.585657370517928, - "grad_norm": 3.573495855668602, - "learning_rate": 9.840063187030836e-07, - "loss": 0.1138, + "epoch": 1.7411748093758825, + "grad_norm": 3.3224374764895765, + "learning_rate": 7.898322725951768e-06, + "loss": 0.5892, "step": 12331 }, { - "epoch": 2.5858670580834557, - "grad_norm": 3.4503796062193133, - "learning_rate": 9.8302725655106e-07, - "loss": 0.0979, + "epoch": 1.7413160124258684, + "grad_norm": 3.8412073499102792, + "learning_rate": 7.896832345552882e-06, + "loss": 0.672, "step": 12332 }, { - "epoch": 2.586076745648983, - "grad_norm": 4.621692222897732, - "learning_rate": 9.820486565345288e-07, - "loss": 0.1685, + "epoch": 1.7414572154758543, + "grad_norm": 3.3120997495042865, + "learning_rate": 7.895342014030706e-06, + "loss": 0.4388, "step": 12333 }, { - "epoch": 2.5862864332145103, - "grad_norm": 4.726241924314743, - "learning_rate": 9.81070518703643e-07, - "loss": 0.14, + "epoch": 1.7415984185258402, + "grad_norm": 3.566704976149142, + "learning_rate": 7.893851731419872e-06, + "loss": 0.6121, "step": 12334 }, { - "epoch": 2.586496120780038, - "grad_norm": 4.494331973336337, - "learning_rate": 9.800928431085366e-07, - "loss": 0.142, + "epoch": 1.741739621575826, + "grad_norm": 3.8738259805703534, + "learning_rate": 7.892361497755013e-06, + "loss": 0.5813, "step": 12335 }, { - "epoch": 2.586705808345565, - "grad_norm": 4.267128428153658, - "learning_rate": 9.791156297993165e-07, - "loss": 0.1631, + "epoch": 1.741880824625812, + "grad_norm": 3.1056172410959446, + "learning_rate": 7.890871313070768e-06, + "loss": 0.494, "step": 12336 }, { - "epoch": 2.5869154959110925, - "grad_norm": 3.83347913844968, - "learning_rate": 9.78138878826067e-07, - "loss": 0.1095, + "epoch": 1.7420220276757978, + "grad_norm": 3.947035380131418, + "learning_rate": 7.889381177401758e-06, + "loss": 0.5936, "step": 12337 }, { - "epoch": 2.58712518347662, - "grad_norm": 4.911332265194813, - "learning_rate": 9.77162590238846e-07, - "loss": 0.182, + "epoch": 1.7421632307257837, + "grad_norm": 4.301704831686292, + "learning_rate": 7.88789109078262e-06, + "loss": 0.6153, "step": 12338 }, { - "epoch": 2.587334871042147, - "grad_norm": 4.350936330948426, - "learning_rate": 9.761867640876932e-07, - "loss": 0.1395, + "epoch": 1.7423044337757696, + "grad_norm": 3.179182520795745, + "learning_rate": 7.886401053247982e-06, + "loss": 0.5162, "step": 12339 }, { - "epoch": 2.5875445586076746, - "grad_norm": 3.988771498604097, - "learning_rate": 9.752114004226176e-07, - "loss": 0.1337, + "epoch": 1.7424456368257555, + "grad_norm": 2.949479357994074, + "learning_rate": 7.884911064832466e-06, + "loss": 0.4034, "step": 12340 }, { - "epoch": 2.587754246173202, - "grad_norm": 5.569465467641354, - "learning_rate": 9.742364992936126e-07, - "loss": 0.1452, + "epoch": 1.7425868398757414, + "grad_norm": 3.656064398150352, + "learning_rate": 7.883421125570705e-06, + "loss": 0.5672, "step": 12341 }, { - "epoch": 2.5879639337387292, - "grad_norm": 3.6471334386970233, - "learning_rate": 9.732620607506415e-07, - "loss": 0.1285, + "epoch": 1.7427280429257272, + "grad_norm": 3.0039954588497304, + "learning_rate": 7.881931235497324e-06, + "loss": 0.4362, "step": 12342 }, { - "epoch": 2.5881736213042568, - "grad_norm": 5.384274756106724, - "learning_rate": 9.722880848436444e-07, - "loss": 0.1293, + "epoch": 1.742869245975713, + "grad_norm": 3.57816052300096, + "learning_rate": 7.880441394646947e-06, + "loss": 0.6384, "step": 12343 }, { - "epoch": 2.5883833088697843, - "grad_norm": 4.488561480088919, - "learning_rate": 9.713145716225404e-07, - "loss": 0.1369, + "epoch": 1.7430104490256988, + "grad_norm": 3.598478295490384, + "learning_rate": 7.878951603054195e-06, + "loss": 0.6154, "step": 12344 }, { - "epoch": 2.5885929964353114, - "grad_norm": 4.471419853164163, - "learning_rate": 9.70341521137227e-07, - "loss": 0.1469, + "epoch": 1.7431516520756847, + "grad_norm": 3.0525034275813905, + "learning_rate": 7.877461860753697e-06, + "loss": 0.541, "step": 12345 }, { - "epoch": 2.588802684000839, - "grad_norm": 4.847982399486932, - "learning_rate": 9.693689334375688e-07, - "loss": 0.1433, + "epoch": 1.7432928551256706, + "grad_norm": 3.2114003288540327, + "learning_rate": 7.875972167780063e-06, + "loss": 0.4746, "step": 12346 }, { - "epoch": 2.589012371566366, - "grad_norm": 3.7620133454695317, - "learning_rate": 9.683968085734152e-07, - "loss": 0.1224, + "epoch": 1.7434340581756564, + "grad_norm": 3.4013373130005826, + "learning_rate": 7.87448252416792e-06, + "loss": 0.6067, "step": 12347 }, { - "epoch": 2.5892220591318935, - "grad_norm": 3.2875661580763658, - "learning_rate": 9.674251465945917e-07, - "loss": 0.1143, + "epoch": 1.7435752612256423, + "grad_norm": 3.351396816533248, + "learning_rate": 7.872992929951886e-06, + "loss": 0.5184, "step": 12348 }, { - "epoch": 2.5894317466974206, - "grad_norm": 3.9115075483117043, - "learning_rate": 9.664539475508961e-07, - "loss": 0.1311, + "epoch": 1.7437164642756282, + "grad_norm": 3.560096971177, + "learning_rate": 7.871503385166575e-06, + "loss": 0.4978, "step": 12349 }, { - "epoch": 2.589641434262948, - "grad_norm": 4.343426686907019, - "learning_rate": 9.65483211492102e-07, - "loss": 0.1538, + "epoch": 1.743857667325614, + "grad_norm": 2.920457301641549, + "learning_rate": 7.870013889846608e-06, + "loss": 0.4201, "step": 12350 }, { - "epoch": 2.5898511218284757, - "grad_norm": 4.62105227687183, - "learning_rate": 9.64512938467964e-07, - "loss": 0.1285, + "epoch": 1.7439988703756, + "grad_norm": 3.6718460401748185, + "learning_rate": 7.868524444026599e-06, + "loss": 0.5081, "step": 12351 }, { - "epoch": 2.5900608093940027, - "grad_norm": 3.7838127011193947, - "learning_rate": 9.635431285282071e-07, - "loss": 0.1186, + "epoch": 1.7441400734255859, + "grad_norm": 3.7099225855011904, + "learning_rate": 7.867035047741159e-06, + "loss": 0.4773, "step": 12352 }, { - "epoch": 2.5902704969595303, - "grad_norm": 4.176735639346055, - "learning_rate": 9.6257378172254e-07, - "loss": 0.1485, + "epoch": 1.7442812764755717, + "grad_norm": 4.019298564382873, + "learning_rate": 7.865545701024909e-06, + "loss": 0.6674, "step": 12353 }, { - "epoch": 2.590480184525058, - "grad_norm": 4.227462236275158, - "learning_rate": 9.616048981006409e-07, - "loss": 0.1398, + "epoch": 1.7444224795255576, + "grad_norm": 3.9919315067912393, + "learning_rate": 7.86405640391245e-06, + "loss": 0.6395, "step": 12354 }, { - "epoch": 2.590689872090585, - "grad_norm": 4.2016849265512715, - "learning_rate": 9.606364777121656e-07, - "loss": 0.1512, + "epoch": 1.7445636825755435, + "grad_norm": 3.864375827067851, + "learning_rate": 7.8625671564384e-06, + "loss": 0.5734, "step": 12355 }, { - "epoch": 2.5908995596561124, - "grad_norm": 3.050513212011917, - "learning_rate": 9.59668520606749e-07, - "loss": 0.1074, + "epoch": 1.7447048856255294, + "grad_norm": 4.496668351196489, + "learning_rate": 7.861077958637365e-06, + "loss": 0.7376, "step": 12356 }, { - "epoch": 2.59110924722164, - "grad_norm": 5.191146736102522, - "learning_rate": 9.587010268340013e-07, - "loss": 0.1552, + "epoch": 1.7448460886755153, + "grad_norm": 3.7312405891850133, + "learning_rate": 7.859588810543954e-06, + "loss": 0.6316, "step": 12357 }, { - "epoch": 2.591318934787167, - "grad_norm": 4.291988724326341, - "learning_rate": 9.577339964435084e-07, - "loss": 0.132, + "epoch": 1.7449872917255012, + "grad_norm": 4.256561724193676, + "learning_rate": 7.858099712192774e-06, + "loss": 0.6544, "step": 12358 }, { - "epoch": 2.5915286223526945, - "grad_norm": 3.433029391661557, - "learning_rate": 9.567674294848294e-07, - "loss": 0.11, + "epoch": 1.745128494775487, + "grad_norm": 2.9174758539217875, + "learning_rate": 7.856610663618433e-06, + "loss": 0.4834, "step": 12359 }, { - "epoch": 2.591738309918222, - "grad_norm": 2.8292189033213, - "learning_rate": 9.558013260075049e-07, - "loss": 0.1062, + "epoch": 1.745269697825473, + "grad_norm": 3.2666706788577127, + "learning_rate": 7.855121664855535e-06, + "loss": 0.6432, "step": 12360 }, { - "epoch": 2.591947997483749, - "grad_norm": 4.0476764291349765, - "learning_rate": 9.548356860610486e-07, - "loss": 0.1146, + "epoch": 1.7454109008754588, + "grad_norm": 3.6525343795097487, + "learning_rate": 7.853632715938681e-06, + "loss": 0.5931, "step": 12361 }, { - "epoch": 2.5921576850492767, - "grad_norm": 3.401370098423267, - "learning_rate": 9.53870509694952e-07, - "loss": 0.1213, + "epoch": 1.7455521039254447, + "grad_norm": 2.985662491137568, + "learning_rate": 7.852143816902483e-06, + "loss": 0.3859, "step": 12362 }, { - "epoch": 2.592367372614804, - "grad_norm": 4.292439880066373, - "learning_rate": 9.529057969586819e-07, - "loss": 0.1383, + "epoch": 1.7456933069754306, + "grad_norm": 3.2167960396533264, + "learning_rate": 7.850654967781528e-06, + "loss": 0.3888, "step": 12363 }, { - "epoch": 2.5925770601803313, - "grad_norm": 3.969880981773326, - "learning_rate": 9.519415479016792e-07, - "loss": 0.1363, + "epoch": 1.7458345100254165, + "grad_norm": 3.161667660721012, + "learning_rate": 7.849166168610424e-06, + "loss": 0.4959, "step": 12364 }, { - "epoch": 2.592786747745859, - "grad_norm": 4.634108141293236, - "learning_rate": 9.509777625733674e-07, - "loss": 0.1, + "epoch": 1.7459757130754023, + "grad_norm": 4.706689960674808, + "learning_rate": 7.847677419423771e-06, + "loss": 0.6461, "step": 12365 }, { - "epoch": 2.592996435311386, - "grad_norm": 3.8278376763399, - "learning_rate": 9.500144410231393e-07, - "loss": 0.099, + "epoch": 1.7461169161253882, + "grad_norm": 3.822588998727122, + "learning_rate": 7.846188720256162e-06, + "loss": 0.5328, "step": 12366 }, { - "epoch": 2.5932061228769134, - "grad_norm": 2.8536345560223273, - "learning_rate": 9.490515833003666e-07, - "loss": 0.0887, + "epoch": 1.746258119175374, + "grad_norm": 3.73248783854724, + "learning_rate": 7.844700071142199e-06, + "loss": 0.4458, "step": 12367 }, { - "epoch": 2.5934158104424405, - "grad_norm": 3.8841582914818025, - "learning_rate": 9.480891894543975e-07, - "loss": 0.1327, + "epoch": 1.74639932222536, + "grad_norm": 3.599584735453198, + "learning_rate": 7.843211472116476e-06, + "loss": 0.5464, "step": 12368 }, { - "epoch": 2.593625498007968, - "grad_norm": 4.258583417054837, - "learning_rate": 9.471272595345593e-07, - "loss": 0.1438, + "epoch": 1.7465405252753459, + "grad_norm": 4.020897616261642, + "learning_rate": 7.841722923213588e-06, + "loss": 0.7149, "step": 12369 }, { - "epoch": 2.5938351855734956, - "grad_norm": 3.931479907221189, - "learning_rate": 9.461657935901502e-07, - "loss": 0.1666, + "epoch": 1.7466817283253318, + "grad_norm": 3.5873575416194647, + "learning_rate": 7.84023442446813e-06, + "loss": 0.519, "step": 12370 }, { - "epoch": 2.5940448731390227, - "grad_norm": 3.829509438421232, - "learning_rate": 9.452047916704466e-07, - "loss": 0.1088, + "epoch": 1.7468229313753176, + "grad_norm": 3.6164550347190683, + "learning_rate": 7.838745975914685e-06, + "loss": 0.5589, "step": 12371 }, { - "epoch": 2.59425456070455, - "grad_norm": 4.69056035375459, - "learning_rate": 9.442442538247032e-07, - "loss": 0.1538, + "epoch": 1.7469641344253035, + "grad_norm": 3.260271711054987, + "learning_rate": 7.837257577587853e-06, + "loss": 0.4767, "step": 12372 }, { - "epoch": 2.5944642482700777, - "grad_norm": 5.926750805684561, - "learning_rate": 9.432841801021453e-07, - "loss": 0.1447, + "epoch": 1.7471053374752894, + "grad_norm": 3.3743143240128006, + "learning_rate": 7.83576922952222e-06, + "loss": 0.5715, "step": 12373 }, { - "epoch": 2.594673935835605, - "grad_norm": 5.4662174324588335, - "learning_rate": 9.423245705519845e-07, - "loss": 0.1932, + "epoch": 1.7472465405252753, + "grad_norm": 4.713378204797731, + "learning_rate": 7.834280931752375e-06, + "loss": 0.9426, "step": 12374 }, { - "epoch": 2.5948836234011323, - "grad_norm": 4.349322831822157, - "learning_rate": 9.413654252233995e-07, - "loss": 0.1582, + "epoch": 1.7473877435752612, + "grad_norm": 4.309944586408834, + "learning_rate": 7.832792684312906e-06, + "loss": 0.5755, "step": 12375 }, { - "epoch": 2.59509331096666, - "grad_norm": 4.201727018760815, - "learning_rate": 9.404067441655451e-07, - "loss": 0.1341, + "epoch": 1.747528946625247, + "grad_norm": 4.197338238469789, + "learning_rate": 7.8313044872384e-06, + "loss": 0.4115, "step": 12376 }, { - "epoch": 2.595302998532187, - "grad_norm": 2.768546211643465, - "learning_rate": 9.394485274275578e-07, - "loss": 0.0868, + "epoch": 1.747670149675233, + "grad_norm": 4.027456567216389, + "learning_rate": 7.829816340563442e-06, + "loss": 0.5859, "step": 12377 }, { - "epoch": 2.5955126860977145, - "grad_norm": 4.541204083028868, - "learning_rate": 9.384907750585504e-07, - "loss": 0.1391, + "epoch": 1.7478113527252188, + "grad_norm": 3.2214342329578365, + "learning_rate": 7.828328244322612e-06, + "loss": 0.5247, "step": 12378 }, { - "epoch": 2.595722373663242, - "grad_norm": 3.9930183684231673, - "learning_rate": 9.375334871076081e-07, - "loss": 0.1312, + "epoch": 1.7479525557752047, + "grad_norm": 4.315903548684153, + "learning_rate": 7.826840198550502e-06, + "loss": 0.7698, "step": 12379 }, { - "epoch": 2.595932061228769, - "grad_norm": 4.659188083864193, - "learning_rate": 9.365766636237894e-07, - "loss": 0.0996, + "epoch": 1.7480937588251906, + "grad_norm": 3.7478556416410194, + "learning_rate": 7.825352203281682e-06, + "loss": 0.6152, "step": 12380 }, { - "epoch": 2.5961417487942966, - "grad_norm": 4.915618001560934, - "learning_rate": 9.356203046561396e-07, - "loss": 0.1794, + "epoch": 1.7482349618751765, + "grad_norm": 3.3887610738317826, + "learning_rate": 7.823864258550737e-06, + "loss": 0.5404, "step": 12381 }, { - "epoch": 2.596351436359824, - "grad_norm": 3.7286374243684497, - "learning_rate": 9.346644102536695e-07, - "loss": 0.1158, + "epoch": 1.7483761649251623, + "grad_norm": 3.2784850387918087, + "learning_rate": 7.822376364392248e-06, + "loss": 0.4762, "step": 12382 }, { - "epoch": 2.596561123925351, - "grad_norm": 4.860765299376346, - "learning_rate": 9.337089804653699e-07, - "loss": 0.1788, + "epoch": 1.7485173679751482, + "grad_norm": 4.983852373183085, + "learning_rate": 7.820888520840792e-06, + "loss": 0.7832, "step": 12383 }, { - "epoch": 2.5967708114908787, - "grad_norm": 3.875729838813462, - "learning_rate": 9.327540153402092e-07, - "loss": 0.1374, + "epoch": 1.7486585710251341, + "grad_norm": 3.8465093684948175, + "learning_rate": 7.819400727930947e-06, + "loss": 0.5706, "step": 12384 }, { - "epoch": 2.596980499056406, - "grad_norm": 4.869701193710267, - "learning_rate": 9.317995149271342e-07, - "loss": 0.1449, + "epoch": 1.74879977407512, + "grad_norm": 3.1149277383497225, + "learning_rate": 7.817912985697285e-06, + "loss": 0.4687, "step": 12385 }, { - "epoch": 2.5971901866219333, - "grad_norm": 4.197722150672071, - "learning_rate": 9.30845479275062e-07, - "loss": 0.1553, + "epoch": 1.7489409771251059, + "grad_norm": 3.233405587984255, + "learning_rate": 7.816425294174385e-06, + "loss": 0.5024, "step": 12386 }, { - "epoch": 2.5973998741874604, - "grad_norm": 5.244335472138452, - "learning_rate": 9.298919084328861e-07, - "loss": 0.1667, + "epoch": 1.7490821801750918, + "grad_norm": 2.974922456001024, + "learning_rate": 7.81493765339682e-06, + "loss": 0.4442, "step": 12387 }, { - "epoch": 2.597609561752988, - "grad_norm": 5.631474119833839, - "learning_rate": 9.289388024494839e-07, - "loss": 0.172, + "epoch": 1.7492233832250776, + "grad_norm": 2.9463989654925804, + "learning_rate": 7.813450063399158e-06, + "loss": 0.4865, "step": 12388 }, { - "epoch": 2.5978192493185155, - "grad_norm": 4.833733407258562, - "learning_rate": 9.279861613736985e-07, - "loss": 0.1447, + "epoch": 1.7493645862750635, + "grad_norm": 3.272557570518201, + "learning_rate": 7.811962524215969e-06, + "loss": 0.5506, "step": 12389 }, { - "epoch": 2.5980289368840426, - "grad_norm": 4.373315789365453, - "learning_rate": 9.270339852543598e-07, - "loss": 0.1529, + "epoch": 1.7495057893250494, + "grad_norm": 2.851310692924613, + "learning_rate": 7.81047503588183e-06, + "loss": 0.4693, "step": 12390 }, { - "epoch": 2.59823862444957, - "grad_norm": 3.4214431413838233, - "learning_rate": 9.260822741402652e-07, - "loss": 0.1207, + "epoch": 1.7496469923750353, + "grad_norm": 3.857979085930518, + "learning_rate": 7.808987598431303e-06, + "loss": 0.576, "step": 12391 }, { - "epoch": 2.5984483120150976, - "grad_norm": 4.946652803292614, - "learning_rate": 9.251310280801895e-07, - "loss": 0.1349, + "epoch": 1.7497881954250212, + "grad_norm": 3.477501051022583, + "learning_rate": 7.807500211898959e-06, + "loss": 0.4537, "step": 12392 }, { - "epoch": 2.5986579995806247, - "grad_norm": 4.043580326065742, - "learning_rate": 9.241802471228889e-07, - "loss": 0.1337, + "epoch": 1.749929398475007, + "grad_norm": 3.841066769323869, + "learning_rate": 7.806012876319363e-06, + "loss": 0.6522, "step": 12393 }, { - "epoch": 2.5988676871461522, - "grad_norm": 4.528488112469475, - "learning_rate": 9.232299313170922e-07, - "loss": 0.1345, + "epoch": 1.750070601524993, + "grad_norm": 4.4022426387169595, + "learning_rate": 7.804525591727081e-06, + "loss": 0.597, "step": 12394 }, { - "epoch": 2.5990773747116798, - "grad_norm": 4.327430887308288, - "learning_rate": 9.222800807115062e-07, - "loss": 0.1019, + "epoch": 1.7502118045749788, + "grad_norm": 3.1189317036371524, + "learning_rate": 7.803038358156674e-06, + "loss": 0.4532, "step": 12395 }, { - "epoch": 2.599287062277207, - "grad_norm": 5.17068866271806, - "learning_rate": 9.213306953548085e-07, - "loss": 0.1661, + "epoch": 1.7503530076249647, + "grad_norm": 3.0483550107122, + "learning_rate": 7.801551175642715e-06, + "loss": 0.4644, "step": 12396 }, { - "epoch": 2.5994967498427344, - "grad_norm": 3.5983958628270947, - "learning_rate": 9.203817752956601e-07, - "loss": 0.1242, + "epoch": 1.7504942106749506, + "grad_norm": 3.740861349048714, + "learning_rate": 7.800064044219748e-06, + "loss": 0.5674, "step": 12397 }, { - "epoch": 2.599706437408262, - "grad_norm": 5.728291676075231, - "learning_rate": 9.194333205826911e-07, - "loss": 0.1451, + "epoch": 1.7506354137249365, + "grad_norm": 3.183118901989265, + "learning_rate": 7.798576963922347e-06, + "loss": 0.453, "step": 12398 }, { - "epoch": 2.599916124973789, - "grad_norm": 3.245235570748017, - "learning_rate": 9.18485331264517e-07, - "loss": 0.1171, + "epoch": 1.7507766167749224, + "grad_norm": 3.5555690871886783, + "learning_rate": 7.797089934785064e-06, + "loss": 0.5352, "step": 12399 }, { - "epoch": 2.6001258125393165, - "grad_norm": 4.471708132896414, - "learning_rate": 9.175378073897201e-07, - "loss": 0.1254, + "epoch": 1.7509178198249082, + "grad_norm": 3.214463952819515, + "learning_rate": 7.79560295684246e-06, + "loss": 0.4298, "step": 12400 }, { - "epoch": 2.600335500104844, - "grad_norm": 3.108410809324801, - "learning_rate": 9.165907490068615e-07, - "loss": 0.1036, + "epoch": 1.7510590228748941, + "grad_norm": 3.6092818374244424, + "learning_rate": 7.794116030129092e-06, + "loss": 0.498, "step": 12401 }, { - "epoch": 2.600545187670371, - "grad_norm": 3.8026375521905784, - "learning_rate": 9.156441561644814e-07, - "loss": 0.1171, + "epoch": 1.75120022592488, + "grad_norm": 3.481068837760789, + "learning_rate": 7.792629154679514e-06, + "loss": 0.5789, "step": 12402 }, { - "epoch": 2.6007548752358987, - "grad_norm": 5.045848602372193, - "learning_rate": 9.146980289110974e-07, - "loss": 0.1558, + "epoch": 1.7513414289748659, + "grad_norm": 3.320025410895974, + "learning_rate": 7.791142330528282e-06, + "loss": 0.4826, "step": 12403 }, { - "epoch": 2.6009645628014257, - "grad_norm": 3.949997308953114, - "learning_rate": 9.137523672951942e-07, - "loss": 0.1273, + "epoch": 1.7514826320248518, + "grad_norm": 4.287026252117888, + "learning_rate": 7.789655557709951e-06, + "loss": 0.64, "step": 12404 }, { - "epoch": 2.6011742503669533, - "grad_norm": 3.7647474361271924, - "learning_rate": 9.128071713652409e-07, - "loss": 0.1194, + "epoch": 1.7516238350748377, + "grad_norm": 3.5418558294109657, + "learning_rate": 7.788168836259068e-06, + "loss": 0.512, "step": 12405 }, { - "epoch": 2.6013839379324804, - "grad_norm": 3.7728221583604156, - "learning_rate": 9.118624411696831e-07, - "loss": 0.0999, + "epoch": 1.7517650381248235, + "grad_norm": 3.7013688031577687, + "learning_rate": 7.786682166210184e-06, + "loss": 0.5077, "step": 12406 }, { - "epoch": 2.601593625498008, - "grad_norm": 4.050426188940153, - "learning_rate": 9.109181767569374e-07, - "loss": 0.1195, + "epoch": 1.7519062411748094, + "grad_norm": 4.844171770141107, + "learning_rate": 7.785195547597852e-06, + "loss": 0.757, "step": 12407 }, { - "epoch": 2.6018033130635354, - "grad_norm": 3.560871521697155, - "learning_rate": 9.099743781753989e-07, - "loss": 0.1239, + "epoch": 1.7520474442247953, + "grad_norm": 3.9827763643644905, + "learning_rate": 7.78370898045662e-06, + "loss": 0.6008, "step": 12408 }, { - "epoch": 2.6020130006290625, - "grad_norm": 4.5823091385496095, - "learning_rate": 9.090310454734397e-07, - "loss": 0.1534, + "epoch": 1.7521886472747812, + "grad_norm": 3.576850432309614, + "learning_rate": 7.782222464821029e-06, + "loss": 0.4929, "step": 12409 }, { - "epoch": 2.60222268819459, - "grad_norm": 4.783878513355387, - "learning_rate": 9.080881786994056e-07, - "loss": 0.146, + "epoch": 1.752329850324767, + "grad_norm": 3.013653370376524, + "learning_rate": 7.780736000725636e-06, + "loss": 0.4692, "step": 12410 }, { - "epoch": 2.6024323757601175, - "grad_norm": 3.619155984823816, - "learning_rate": 9.071457779016235e-07, - "loss": 0.1148, + "epoch": 1.752471053374753, + "grad_norm": 3.8339708949039113, + "learning_rate": 7.779249588204978e-06, + "loss": 0.5544, "step": 12411 }, { - "epoch": 2.6026420633256446, - "grad_norm": 4.638483909010977, - "learning_rate": 9.062038431283904e-07, - "loss": 0.1649, + "epoch": 1.7526122564247388, + "grad_norm": 3.7851784337776384, + "learning_rate": 7.7777632272936e-06, + "loss": 0.5443, "step": 12412 }, { - "epoch": 2.602851750891172, - "grad_norm": 3.8700880135710323, - "learning_rate": 9.052623744279821e-07, - "loss": 0.1296, + "epoch": 1.7527534594747247, + "grad_norm": 4.184582800669715, + "learning_rate": 7.77627691802605e-06, + "loss": 0.6184, "step": 12413 }, { - "epoch": 2.6030614384566997, - "grad_norm": 5.579768659617796, - "learning_rate": 9.043213718486499e-07, - "loss": 0.1646, + "epoch": 1.7528946625247106, + "grad_norm": 4.336026424260855, + "learning_rate": 7.774790660436857e-06, + "loss": 0.6384, "step": 12414 }, { - "epoch": 2.6032711260222268, - "grad_norm": 4.695633423052056, - "learning_rate": 9.033808354386254e-07, - "loss": 0.1278, + "epoch": 1.7530358655746965, + "grad_norm": 4.114409127227056, + "learning_rate": 7.773304454560572e-06, + "loss": 0.5843, "step": 12415 }, { - "epoch": 2.6034808135877543, - "grad_norm": 3.5694107387026333, - "learning_rate": 9.024407652461109e-07, - "loss": 0.1067, + "epoch": 1.7531770686246824, + "grad_norm": 4.826660550456006, + "learning_rate": 7.771818300431729e-06, + "loss": 0.5815, "step": 12416 }, { - "epoch": 2.603690501153282, - "grad_norm": 4.241945709666966, - "learning_rate": 9.015011613192837e-07, - "loss": 0.1389, + "epoch": 1.7533182716746682, + "grad_norm": 3.3396370008846237, + "learning_rate": 7.770332198084865e-06, + "loss": 0.4707, "step": 12417 }, { - "epoch": 2.603900188718809, - "grad_norm": 5.160062312585765, - "learning_rate": 9.005620237063039e-07, - "loss": 0.1396, + "epoch": 1.7534594747246541, + "grad_norm": 3.0058608180821498, + "learning_rate": 7.768846147554517e-06, + "loss": 0.4896, "step": 12418 }, { - "epoch": 2.6041098762843364, - "grad_norm": 4.413744077337538, - "learning_rate": 8.996233524553011e-07, - "loss": 0.1828, + "epoch": 1.75360067777464, + "grad_norm": 3.794863819078056, + "learning_rate": 7.767360148875224e-06, + "loss": 0.6703, "step": 12419 }, { - "epoch": 2.604319563849864, - "grad_norm": 5.503388522295018, - "learning_rate": 8.986851476143876e-07, - "loss": 0.1681, + "epoch": 1.753741880824626, + "grad_norm": 3.749540162153938, + "learning_rate": 7.765874202081516e-06, + "loss": 0.6648, "step": 12420 }, { - "epoch": 2.604529251415391, - "grad_norm": 4.384169036915855, - "learning_rate": 8.977474092316451e-07, - "loss": 0.1366, + "epoch": 1.7538830838746118, + "grad_norm": 4.024943912211173, + "learning_rate": 7.764388307207929e-06, + "loss": 0.5399, "step": 12421 }, { - "epoch": 2.6047389389809186, - "grad_norm": 4.654877196466101, - "learning_rate": 8.96810137355133e-07, - "loss": 0.1479, + "epoch": 1.7540242869245977, + "grad_norm": 3.8522656584107984, + "learning_rate": 7.76290246428899e-06, + "loss": 0.6598, "step": 12422 }, { - "epoch": 2.6049486265464457, - "grad_norm": 4.512400556318743, - "learning_rate": 8.958733320328905e-07, - "loss": 0.1757, + "epoch": 1.7541654899745835, + "grad_norm": 4.302520506927567, + "learning_rate": 7.761416673359228e-06, + "loss": 0.646, "step": 12423 }, { - "epoch": 2.605158314111973, - "grad_norm": 3.2754901763946016, - "learning_rate": 8.949369933129337e-07, - "loss": 0.103, + "epoch": 1.7543066930245694, + "grad_norm": 3.2521125943511775, + "learning_rate": 7.75993093445318e-06, + "loss": 0.4286, "step": 12424 }, { - "epoch": 2.6053680016775003, - "grad_norm": 4.38492029252869, - "learning_rate": 8.940011212432442e-07, - "loss": 0.1485, + "epoch": 1.7544478960745553, + "grad_norm": 3.727592637949972, + "learning_rate": 7.758445247605369e-06, + "loss": 0.619, "step": 12425 }, { - "epoch": 2.605577689243028, - "grad_norm": 4.649245406715256, - "learning_rate": 8.930657158717904e-07, - "loss": 0.1422, + "epoch": 1.7545890991245412, + "grad_norm": 3.9465865827172655, + "learning_rate": 7.75695961285032e-06, + "loss": 0.694, "step": 12426 }, { - "epoch": 2.6057873768085553, - "grad_norm": 4.18344797086107, - "learning_rate": 8.92130777246516e-07, - "loss": 0.1659, + "epoch": 1.754730302174527, + "grad_norm": 3.5876326317937304, + "learning_rate": 7.755474030222564e-06, + "loss": 0.5902, "step": 12427 }, { - "epoch": 2.6059970643740824, - "grad_norm": 3.346866309431157, - "learning_rate": 8.911963054153361e-07, - "loss": 0.1166, + "epoch": 1.754871505224513, + "grad_norm": 4.006412893091304, + "learning_rate": 7.753988499756621e-06, + "loss": 0.578, "step": 12428 }, { - "epoch": 2.60620675193961, - "grad_norm": 4.2906166017233796, - "learning_rate": 8.902623004261424e-07, - "loss": 0.1462, + "epoch": 1.7550127082744988, + "grad_norm": 3.26647581513503, + "learning_rate": 7.752503021487016e-06, + "loss": 0.5125, "step": 12429 }, { - "epoch": 2.6064164395051375, - "grad_norm": 5.793009929066904, - "learning_rate": 8.893287623268077e-07, - "loss": 0.2097, + "epoch": 1.7551539113244847, + "grad_norm": 4.827549325208341, + "learning_rate": 7.75101759544827e-06, + "loss": 0.6756, "step": 12430 }, { - "epoch": 2.6066261270706645, - "grad_norm": 3.5577772408429493, - "learning_rate": 8.883956911651736e-07, - "loss": 0.1532, + "epoch": 1.7552951143744706, + "grad_norm": 3.1948763108323797, + "learning_rate": 7.749532221674904e-06, + "loss": 0.4933, "step": 12431 }, { - "epoch": 2.606835814636192, - "grad_norm": 4.462176542416087, - "learning_rate": 8.874630869890644e-07, - "loss": 0.1426, + "epoch": 1.7554363174244565, + "grad_norm": 3.938548724410424, + "learning_rate": 7.748046900201438e-06, + "loss": 0.6477, "step": 12432 }, { - "epoch": 2.6070455022017196, - "grad_norm": 6.613579030776804, - "learning_rate": 8.865309498462782e-07, - "loss": 0.2295, + "epoch": 1.7555775204744424, + "grad_norm": 3.78848357955434, + "learning_rate": 7.746561631062386e-06, + "loss": 0.6098, "step": 12433 }, { - "epoch": 2.6072551897672467, - "grad_norm": 4.006961337434616, - "learning_rate": 8.855992797845847e-07, - "loss": 0.1384, + "epoch": 1.7557187235244283, + "grad_norm": 3.7595967713038756, + "learning_rate": 7.745076414292272e-06, + "loss": 0.6302, "step": 12434 }, { - "epoch": 2.607464877332774, - "grad_norm": 4.765849714961155, - "learning_rate": 8.846680768517368e-07, - "loss": 0.1457, + "epoch": 1.7558599265744141, + "grad_norm": 2.9910045865051034, + "learning_rate": 7.743591249925604e-06, + "loss": 0.4828, "step": 12435 }, { - "epoch": 2.6076745648983017, - "grad_norm": 4.105845866749517, - "learning_rate": 8.837373410954619e-07, - "loss": 0.1446, + "epoch": 1.7560011296244, + "grad_norm": 4.205834549567604, + "learning_rate": 7.742106137996905e-06, + "loss": 0.6327, "step": 12436 }, { - "epoch": 2.607884252463829, - "grad_norm": 3.455321983963246, - "learning_rate": 8.828070725634586e-07, - "loss": 0.117, + "epoch": 1.756142332674386, + "grad_norm": 3.363653112547713, + "learning_rate": 7.740621078540684e-06, + "loss": 0.4779, "step": 12437 }, { - "epoch": 2.6080939400293564, - "grad_norm": 5.790581351749775, - "learning_rate": 8.81877271303404e-07, - "loss": 0.1644, + "epoch": 1.7562835357243718, + "grad_norm": 3.7202910017397106, + "learning_rate": 7.739136071591455e-06, + "loss": 0.4958, "step": 12438 }, { - "epoch": 2.608303627594884, - "grad_norm": 3.9772472301459705, - "learning_rate": 8.80947937362957e-07, - "loss": 0.1214, + "epoch": 1.7564247387743577, + "grad_norm": 3.184843695095462, + "learning_rate": 7.737651117183725e-06, + "loss": 0.4968, "step": 12439 }, { - "epoch": 2.608513315160411, - "grad_norm": 4.219530312022978, - "learning_rate": 8.800190707897416e-07, - "loss": 0.1482, + "epoch": 1.7565659418243436, + "grad_norm": 3.6441293252089277, + "learning_rate": 7.736166215352004e-06, + "loss": 0.6185, "step": 12440 }, { - "epoch": 2.6087230027259385, - "grad_norm": 5.5839233088214755, - "learning_rate": 8.790906716313685e-07, - "loss": 0.1486, + "epoch": 1.7567071448743294, + "grad_norm": 3.7396841070723146, + "learning_rate": 7.734681366130805e-06, + "loss": 0.5923, "step": 12441 }, { - "epoch": 2.6089326902914656, - "grad_norm": 3.9812559393177094, - "learning_rate": 8.781627399354176e-07, - "loss": 0.137, + "epoch": 1.7568483479243153, + "grad_norm": 3.5747049117362897, + "learning_rate": 7.733196569554632e-06, + "loss": 0.5441, "step": 12442 }, { - "epoch": 2.609142377856993, - "grad_norm": 4.326362108177064, - "learning_rate": 8.772352757494462e-07, - "loss": 0.1336, + "epoch": 1.7569895509743012, + "grad_norm": 3.616448822988445, + "learning_rate": 7.73171182565799e-06, + "loss": 0.5794, "step": 12443 }, { - "epoch": 2.60935206542252, - "grad_norm": 4.791230319862026, - "learning_rate": 8.76308279120991e-07, - "loss": 0.1724, + "epoch": 1.757130754024287, + "grad_norm": 4.158406021438606, + "learning_rate": 7.730227134475388e-06, + "loss": 0.5254, "step": 12444 }, { - "epoch": 2.6095617529880477, - "grad_norm": 3.0948779613655124, - "learning_rate": 8.753817500975581e-07, - "loss": 0.1006, + "epoch": 1.7572719570742728, + "grad_norm": 3.7260402433423048, + "learning_rate": 7.728742496041326e-06, + "loss": 0.5396, "step": 12445 }, { - "epoch": 2.6097714405535752, - "grad_norm": 3.581805385345101, - "learning_rate": 8.744556887266387e-07, - "loss": 0.1366, + "epoch": 1.7574131601242586, + "grad_norm": 4.131829241296021, + "learning_rate": 7.727257910390309e-06, + "loss": 0.5744, "step": 12446 }, { - "epoch": 2.6099811281191023, - "grad_norm": 4.6527008048543825, - "learning_rate": 8.735300950556913e-07, - "loss": 0.1407, + "epoch": 1.7575543631742445, + "grad_norm": 3.161312591002527, + "learning_rate": 7.725773377556838e-06, + "loss": 0.5048, "step": 12447 }, { - "epoch": 2.61019081568463, - "grad_norm": 3.829342947287962, - "learning_rate": 8.72604969132157e-07, - "loss": 0.1378, + "epoch": 1.7576955662242304, + "grad_norm": 3.9437560228453687, + "learning_rate": 7.724288897575409e-06, + "loss": 0.5525, "step": 12448 }, { - "epoch": 2.6104005032501574, - "grad_norm": 4.096005873798838, - "learning_rate": 8.7168031100345e-07, - "loss": 0.1303, + "epoch": 1.7578367692742163, + "grad_norm": 3.0303829478932482, + "learning_rate": 7.722804470480523e-06, + "loss": 0.4279, "step": 12449 }, { - "epoch": 2.6106101908156845, - "grad_norm": 4.516398020101538, - "learning_rate": 8.707561207169557e-07, - "loss": 0.1563, + "epoch": 1.7579779723242022, + "grad_norm": 3.234638917294806, + "learning_rate": 7.721320096306679e-06, + "loss": 0.5012, "step": 12450 }, { - "epoch": 2.610819878381212, - "grad_norm": 4.939284191470693, - "learning_rate": 8.698323983200452e-07, - "loss": 0.1592, + "epoch": 1.758119175374188, + "grad_norm": 3.273380982501268, + "learning_rate": 7.71983577508837e-06, + "loss": 0.4477, "step": 12451 }, { - "epoch": 2.6110295659467395, - "grad_norm": 5.964570621914853, - "learning_rate": 8.689091438600605e-07, - "loss": 0.198, + "epoch": 1.758260378424174, + "grad_norm": 2.9510704001089776, + "learning_rate": 7.718351506860092e-06, + "loss": 0.4267, "step": 12452 }, { - "epoch": 2.6112392535122666, - "grad_norm": 4.124011358812057, - "learning_rate": 8.679863573843206e-07, - "loss": 0.1313, + "epoch": 1.7584015814741598, + "grad_norm": 3.7286723360227847, + "learning_rate": 7.716867291656341e-06, + "loss": 0.5682, "step": 12453 }, { - "epoch": 2.611448941077794, - "grad_norm": 4.582397874529155, - "learning_rate": 8.670640389401164e-07, - "loss": 0.1341, + "epoch": 1.7585427845241457, + "grad_norm": 3.6130512085527635, + "learning_rate": 7.71538312951161e-06, + "loss": 0.5478, "step": 12454 }, { - "epoch": 2.6116586286433217, - "grad_norm": 4.775223860432403, - "learning_rate": 8.661421885747212e-07, - "loss": 0.1781, + "epoch": 1.7586839875741316, + "grad_norm": 3.754924627022415, + "learning_rate": 7.713899020460389e-06, + "loss": 0.6228, "step": 12455 }, { - "epoch": 2.6118683162088487, - "grad_norm": 5.23509432187901, - "learning_rate": 8.652208063353795e-07, - "loss": 0.1237, + "epoch": 1.7588251906241175, + "grad_norm": 4.006916714402932, + "learning_rate": 7.712414964537167e-06, + "loss": 0.5613, "step": 12456 }, { - "epoch": 2.6120780037743763, - "grad_norm": 5.143009841000892, - "learning_rate": 8.64299892269318e-07, - "loss": 0.1729, + "epoch": 1.7589663936741033, + "grad_norm": 3.593531592735026, + "learning_rate": 7.710930961776428e-06, + "loss": 0.5732, "step": 12457 }, { - "epoch": 2.612287691339904, - "grad_norm": 4.09327804630886, - "learning_rate": 8.6337944642373e-07, - "loss": 0.1272, + "epoch": 1.7591075967240892, + "grad_norm": 3.454797259548896, + "learning_rate": 7.70944701221267e-06, + "loss": 0.477, "step": 12458 }, { - "epoch": 2.612497378905431, - "grad_norm": 3.9598890836619622, - "learning_rate": 8.624594688457921e-07, - "loss": 0.1353, + "epoch": 1.7592487997740751, + "grad_norm": 3.40887114553142, + "learning_rate": 7.707963115880372e-06, + "loss": 0.5769, "step": 12459 }, { - "epoch": 2.6127070664709584, - "grad_norm": 4.51636056535437, - "learning_rate": 8.615399595826535e-07, - "loss": 0.1312, + "epoch": 1.759390002824061, + "grad_norm": 3.540560189162941, + "learning_rate": 7.706479272814024e-06, + "loss": 0.6118, "step": 12460 }, { - "epoch": 2.6129167540364855, - "grad_norm": 3.5740134690305867, - "learning_rate": 8.606209186814463e-07, - "loss": 0.0903, + "epoch": 1.7595312058740469, + "grad_norm": 4.086662815632813, + "learning_rate": 7.704995483048104e-06, + "loss": 0.623, "step": 12461 }, { - "epoch": 2.613126441602013, - "grad_norm": 3.084324205276457, - "learning_rate": 8.597023461892639e-07, - "loss": 0.0935, + "epoch": 1.7596724089240328, + "grad_norm": 3.5864495388759794, + "learning_rate": 7.703511746617098e-06, + "loss": 0.7263, "step": 12462 }, { - "epoch": 2.61333612916754, - "grad_norm": 4.359064753941047, - "learning_rate": 8.5878424215319e-07, - "loss": 0.1419, + "epoch": 1.7598136119740186, + "grad_norm": 3.1412525923311447, + "learning_rate": 7.70202806355549e-06, + "loss": 0.404, "step": 12463 }, { - "epoch": 2.6135458167330676, - "grad_norm": 4.774579910488419, - "learning_rate": 8.57866606620279e-07, - "loss": 0.1353, + "epoch": 1.7599548150240045, + "grad_norm": 4.1705673588870935, + "learning_rate": 7.700544433897757e-06, + "loss": 0.6693, "step": 12464 }, { - "epoch": 2.613755504298595, - "grad_norm": 3.750609031161766, - "learning_rate": 8.569494396375622e-07, - "loss": 0.1125, + "epoch": 1.7600960180739904, + "grad_norm": 4.25484100255558, + "learning_rate": 7.699060857678379e-06, + "loss": 0.7164, "step": 12465 }, { - "epoch": 2.6139651918641222, - "grad_norm": 3.2346064639082424, - "learning_rate": 8.56032741252042e-07, - "loss": 0.0885, + "epoch": 1.7602372211239763, + "grad_norm": 3.818364759082351, + "learning_rate": 7.697577334931828e-06, + "loss": 0.6078, "step": 12466 }, { - "epoch": 2.6141748794296498, - "grad_norm": 4.276055853770689, - "learning_rate": 8.551165115107052e-07, - "loss": 0.1365, + "epoch": 1.7603784241739622, + "grad_norm": 3.479626639121365, + "learning_rate": 7.69609386569259e-06, + "loss": 0.5779, "step": 12467 }, { - "epoch": 2.6143845669951773, - "grad_norm": 3.8059509768508955, - "learning_rate": 8.542007504605054e-07, - "loss": 0.1393, + "epoch": 1.760519627223948, + "grad_norm": 3.0343303614358996, + "learning_rate": 7.694610449995133e-06, + "loss": 0.4539, "step": 12468 }, { - "epoch": 2.6145942545607044, - "grad_norm": 3.6922418189855772, - "learning_rate": 8.532854581483829e-07, - "loss": 0.1143, + "epoch": 1.760660830273934, + "grad_norm": 3.4071469460333357, + "learning_rate": 7.693127087873934e-06, + "loss": 0.5078, "step": 12469 }, { - "epoch": 2.614803942126232, - "grad_norm": 4.2704346072824615, - "learning_rate": 8.523706346212435e-07, - "loss": 0.1346, + "epoch": 1.7608020333239198, + "grad_norm": 3.695379526051402, + "learning_rate": 7.691643779363465e-06, + "loss": 0.55, "step": 12470 }, { - "epoch": 2.6150136296917594, - "grad_norm": 3.8199798360161026, - "learning_rate": 8.51456279925974e-07, - "loss": 0.1222, + "epoch": 1.7609432363739057, + "grad_norm": 3.2366026101224827, + "learning_rate": 7.6901605244982e-06, + "loss": 0.4449, "step": 12471 }, { - "epoch": 2.6152233172572865, - "grad_norm": 8.225860711129755, - "learning_rate": 8.505423941094371e-07, - "loss": 0.1786, + "epoch": 1.7610844394238916, + "grad_norm": 3.3483212117843726, + "learning_rate": 7.688677323312608e-06, + "loss": 0.5006, "step": 12472 }, { - "epoch": 2.615433004822814, - "grad_norm": 3.803775858172037, - "learning_rate": 8.49628977218474e-07, - "loss": 0.1052, + "epoch": 1.7612256424738775, + "grad_norm": 3.138517254846151, + "learning_rate": 7.687194175841153e-06, + "loss": 0.4817, "step": 12473 }, { - "epoch": 2.6156426923883416, - "grad_norm": 4.596830418451265, - "learning_rate": 8.487160292998952e-07, - "loss": 0.1323, + "epoch": 1.7613668455238634, + "grad_norm": 3.873700100256128, + "learning_rate": 7.685711082118306e-06, + "loss": 0.6783, "step": 12474 }, { - "epoch": 2.6158523799538687, - "grad_norm": 5.650414174904719, - "learning_rate": 8.478035504004911e-07, - "loss": 0.2002, + "epoch": 1.7615080485738492, + "grad_norm": 4.0994443523852855, + "learning_rate": 7.684228042178536e-06, + "loss": 0.5511, "step": 12475 }, { - "epoch": 2.616062067519396, - "grad_norm": 4.73651913267565, - "learning_rate": 8.468915405670297e-07, - "loss": 0.1895, + "epoch": 1.7616492516238351, + "grad_norm": 2.9669050754286834, + "learning_rate": 7.682745056056307e-06, + "loss": 0.4713, "step": 12476 }, { - "epoch": 2.6162717550849237, - "grad_norm": 5.242246273403459, - "learning_rate": 8.459799998462503e-07, - "loss": 0.1698, + "epoch": 1.761790454673821, + "grad_norm": 3.276796391040959, + "learning_rate": 7.681262123786083e-06, + "loss": 0.5239, "step": 12477 }, { - "epoch": 2.616481442650451, - "grad_norm": 4.070876270443945, - "learning_rate": 8.450689282848756e-07, - "loss": 0.1311, + "epoch": 1.7619316577238069, + "grad_norm": 4.588210055670645, + "learning_rate": 7.679779245402321e-06, + "loss": 0.6622, "step": 12478 }, { - "epoch": 2.6166911302159783, - "grad_norm": 4.769807613842107, - "learning_rate": 8.44158325929596e-07, - "loss": 0.1574, + "epoch": 1.7620728607737925, + "grad_norm": 4.860233168534756, + "learning_rate": 7.67829642093949e-06, + "loss": 0.7755, "step": 12479 }, { - "epoch": 2.6169008177815054, - "grad_norm": 3.9581053882038018, - "learning_rate": 8.432481928270808e-07, - "loss": 0.1196, + "epoch": 1.7622140638237784, + "grad_norm": 2.918906494675368, + "learning_rate": 7.67681365043205e-06, + "loss": 0.4447, "step": 12480 }, { - "epoch": 2.617110505347033, - "grad_norm": 4.475019044180424, - "learning_rate": 8.423385290239771e-07, - "loss": 0.1407, + "epoch": 1.7623552668737643, + "grad_norm": 3.000341926525375, + "learning_rate": 7.675330933914456e-06, + "loss": 0.4517, "step": 12481 }, { - "epoch": 2.61732019291256, - "grad_norm": 5.1372302667320575, - "learning_rate": 8.4142933456691e-07, - "loss": 0.1281, + "epoch": 1.7624964699237502, + "grad_norm": 3.556254236340234, + "learning_rate": 7.673848271421166e-06, + "loss": 0.4879, "step": 12482 }, { - "epoch": 2.6175298804780875, - "grad_norm": 2.9060303425745007, - "learning_rate": 8.405206095024709e-07, - "loss": 0.0918, + "epoch": 1.762637672973736, + "grad_norm": 3.4892057606616347, + "learning_rate": 7.672365662986636e-06, + "loss": 0.4889, "step": 12483 }, { - "epoch": 2.617739568043615, - "grad_norm": 4.251609892652858, - "learning_rate": 8.396123538772372e-07, - "loss": 0.1608, + "epoch": 1.762778876023722, + "grad_norm": 3.352287111773432, + "learning_rate": 7.670883108645326e-06, + "loss": 0.5165, "step": 12484 }, { - "epoch": 2.617949255609142, - "grad_norm": 4.84092217200265, - "learning_rate": 8.387045677377604e-07, - "loss": 0.1572, + "epoch": 1.7629200790737078, + "grad_norm": 3.9138987733594335, + "learning_rate": 7.669400608431686e-06, + "loss": 0.6818, "step": 12485 }, { - "epoch": 2.6181589431746697, - "grad_norm": 4.940211935234428, - "learning_rate": 8.377972511305632e-07, - "loss": 0.1551, + "epoch": 1.7630612821236937, + "grad_norm": 3.721553392858244, + "learning_rate": 7.667918162380166e-06, + "loss": 0.4745, "step": 12486 }, { - "epoch": 2.618368630740197, - "grad_norm": 4.760245197568732, - "learning_rate": 8.368904041021464e-07, - "loss": 0.1664, + "epoch": 1.7632024851736796, + "grad_norm": 3.433281503553015, + "learning_rate": 7.666435770525224e-06, + "loss": 0.5461, "step": 12487 }, { - "epoch": 2.6185783183057243, - "grad_norm": 4.781642444834001, - "learning_rate": 8.359840266989905e-07, - "loss": 0.1219, + "epoch": 1.7633436882236655, + "grad_norm": 4.657290225582665, + "learning_rate": 7.664953432901306e-06, + "loss": 0.7559, "step": 12488 }, { - "epoch": 2.618788005871252, - "grad_norm": 3.9956764061989447, - "learning_rate": 8.350781189675461e-07, - "loss": 0.1309, + "epoch": 1.7634848912736514, + "grad_norm": 4.438244327797083, + "learning_rate": 7.663471149542865e-06, + "loss": 0.6731, "step": 12489 }, { - "epoch": 2.6189976934367794, - "grad_norm": 4.060513848445209, - "learning_rate": 8.34172680954245e-07, - "loss": 0.1454, + "epoch": 1.7636260943236373, + "grad_norm": 3.2846179751617237, + "learning_rate": 7.661988920484344e-06, + "loss": 0.5256, "step": 12490 }, { - "epoch": 2.6192073810023064, - "grad_norm": 4.118826553694415, - "learning_rate": 8.332677127054923e-07, - "loss": 0.1438, + "epoch": 1.7637672973736231, + "grad_norm": 3.743108081404323, + "learning_rate": 7.66050674576019e-06, + "loss": 0.595, "step": 12491 }, { - "epoch": 2.619417068567834, - "grad_norm": 3.677533333769006, - "learning_rate": 8.323632142676663e-07, - "loss": 0.1232, + "epoch": 1.763908500423609, + "grad_norm": 3.9667407457509607, + "learning_rate": 7.659024625404845e-06, + "loss": 0.619, "step": 12492 }, { - "epoch": 2.6196267561333615, - "grad_norm": 3.850450374530168, - "learning_rate": 8.314591856871257e-07, - "loss": 0.1265, + "epoch": 1.764049703473595, + "grad_norm": 3.3926561400859105, + "learning_rate": 7.65754255945276e-06, + "loss": 0.6196, "step": 12493 }, { - "epoch": 2.6198364436988886, - "grad_norm": 4.431314143124674, - "learning_rate": 8.305556270102055e-07, - "loss": 0.1307, + "epoch": 1.7641909065235808, + "grad_norm": 3.797393127518146, + "learning_rate": 7.656060547938375e-06, + "loss": 0.6706, "step": 12494 }, { - "epoch": 2.620046131264416, - "grad_norm": 4.108188412282608, - "learning_rate": 8.296525382832121e-07, - "loss": 0.1716, + "epoch": 1.7643321095735667, + "grad_norm": 3.1753276106854296, + "learning_rate": 7.654578590896128e-06, + "loss": 0.4997, "step": 12495 }, { - "epoch": 2.6202558188299436, - "grad_norm": 5.462928380927774, - "learning_rate": 8.287499195524307e-07, - "loss": 0.1609, + "epoch": 1.7644733126235526, + "grad_norm": 3.6983842960646776, + "learning_rate": 7.653096688360465e-06, + "loss": 0.5968, "step": 12496 }, { - "epoch": 2.6204655063954707, - "grad_norm": 3.5094391061153773, - "learning_rate": 8.278477708641242e-07, - "loss": 0.1134, + "epoch": 1.7646145156735384, + "grad_norm": 3.6418490432114052, + "learning_rate": 7.651614840365822e-06, + "loss": 0.6449, "step": 12497 }, { - "epoch": 2.6206751939609982, - "grad_norm": 5.346461282040452, - "learning_rate": 8.269460922645267e-07, - "loss": 0.1701, + "epoch": 1.7647557187235243, + "grad_norm": 4.200852459256813, + "learning_rate": 7.650133046946633e-06, + "loss": 0.756, "step": 12498 }, { - "epoch": 2.6208848815265253, - "grad_norm": 4.146829504603081, - "learning_rate": 8.260448837998525e-07, - "loss": 0.1485, + "epoch": 1.7648969217735102, + "grad_norm": 3.463497769193341, + "learning_rate": 7.64865130813734e-06, + "loss": 0.5321, "step": 12499 }, { - "epoch": 2.621094569092053, - "grad_norm": 4.262607551820511, - "learning_rate": 8.251441455162901e-07, - "loss": 0.1106, + "epoch": 1.765038124823496, + "grad_norm": 5.15969524879268, + "learning_rate": 7.64716962397237e-06, + "loss": 0.9924, "step": 12500 }, { - "epoch": 2.62130425665758, - "grad_norm": 3.2909705199283388, - "learning_rate": 8.242438774600003e-07, - "loss": 0.131, + "epoch": 1.765179327873482, + "grad_norm": 3.6127014030402576, + "learning_rate": 7.645687994486165e-06, + "loss": 0.623, "step": 12501 }, { - "epoch": 2.6215139442231075, - "grad_norm": 4.875331850173637, - "learning_rate": 8.233440796771275e-07, - "loss": 0.1744, + "epoch": 1.7653205309234679, + "grad_norm": 3.677239976045142, + "learning_rate": 7.644206419713155e-06, + "loss": 0.5312, "step": 12502 }, { - "epoch": 2.621723631788635, - "grad_norm": 3.632698854596682, - "learning_rate": 8.224447522137901e-07, - "loss": 0.1221, + "epoch": 1.7654617339734537, + "grad_norm": 3.983191264827294, + "learning_rate": 7.64272489968777e-06, + "loss": 0.6314, "step": 12503 }, { - "epoch": 2.621933319354162, - "grad_norm": 3.7479758677085226, - "learning_rate": 8.215458951160727e-07, - "loss": 0.1246, + "epoch": 1.7656029370234396, + "grad_norm": 3.0828536063773755, + "learning_rate": 7.641243434444439e-06, + "loss": 0.5338, "step": 12504 }, { - "epoch": 2.6221430069196896, - "grad_norm": 4.204097612004744, - "learning_rate": 8.20647508430048e-07, - "loss": 0.1624, + "epoch": 1.7657441400734255, + "grad_norm": 3.4281820809601933, + "learning_rate": 7.639762024017592e-06, + "loss": 0.5823, "step": 12505 }, { - "epoch": 2.622352694485217, - "grad_norm": 3.1665800207515074, - "learning_rate": 8.197495922017595e-07, - "loss": 0.1014, + "epoch": 1.7658853431234114, + "grad_norm": 3.0254029177888198, + "learning_rate": 7.63828066844166e-06, + "loss": 0.437, "step": 12506 }, { - "epoch": 2.622562382050744, - "grad_norm": 4.042634420346649, - "learning_rate": 8.188521464772292e-07, - "loss": 0.1383, + "epoch": 1.7660265461733973, + "grad_norm": 3.4728798278346225, + "learning_rate": 7.636799367751062e-06, + "loss": 0.4968, "step": 12507 }, { - "epoch": 2.6227720696162717, - "grad_norm": 3.500808733611115, - "learning_rate": 8.179551713024481e-07, - "loss": 0.1146, + "epoch": 1.7661677492233832, + "grad_norm": 3.7957023052909213, + "learning_rate": 7.635318121980228e-06, + "loss": 0.6929, "step": 12508 }, { - "epoch": 2.6229817571817993, - "grad_norm": 4.929193474972663, - "learning_rate": 8.170586667233893e-07, - "loss": 0.1407, + "epoch": 1.766308952273369, + "grad_norm": 3.446410238644995, + "learning_rate": 7.633836931163575e-06, + "loss": 0.5711, "step": 12509 }, { - "epoch": 2.6231914447473264, - "grad_norm": 4.319142756884631, - "learning_rate": 8.161626327860017e-07, - "loss": 0.1345, + "epoch": 1.766450155323355, + "grad_norm": 4.313189330047758, + "learning_rate": 7.632355795335533e-06, + "loss": 0.6865, "step": 12510 }, { - "epoch": 2.623401132312854, - "grad_norm": 4.650440397641079, - "learning_rate": 8.152670695362086e-07, - "loss": 0.1279, + "epoch": 1.7665913583733408, + "grad_norm": 3.6680780157159347, + "learning_rate": 7.63087471453052e-06, + "loss": 0.5883, "step": 12511 }, { - "epoch": 2.6236108198783814, - "grad_norm": 4.6412856363463195, - "learning_rate": 8.143719770199065e-07, - "loss": 0.2197, + "epoch": 1.7667325614233267, + "grad_norm": 4.3449302535295615, + "learning_rate": 7.629393688782954e-06, + "loss": 0.559, "step": 12512 }, { - "epoch": 2.6238205074439085, - "grad_norm": 4.545146468358653, - "learning_rate": 8.134773552829745e-07, - "loss": 0.1401, + "epoch": 1.7668737644733126, + "grad_norm": 3.6368062360221143, + "learning_rate": 7.627912718127257e-06, + "loss": 0.5448, "step": 12513 }, { - "epoch": 2.624030195009436, - "grad_norm": 5.120914083900178, - "learning_rate": 8.125832043712589e-07, - "loss": 0.1554, + "epoch": 1.7670149675232985, + "grad_norm": 3.4341223475847227, + "learning_rate": 7.6264318025978455e-06, + "loss": 0.5146, "step": 12514 }, { - "epoch": 2.6242398825749635, - "grad_norm": 4.378461140513062, - "learning_rate": 8.116895243305911e-07, - "loss": 0.1472, + "epoch": 1.7671561705732843, + "grad_norm": 2.6058425808405503, + "learning_rate": 7.6249509422291325e-06, + "loss": 0.3774, "step": 12515 }, { - "epoch": 2.6244495701404906, - "grad_norm": 5.6040548756185995, - "learning_rate": 8.10796315206771e-07, - "loss": 0.1384, + "epoch": 1.7672973736232702, + "grad_norm": 3.5900926502805297, + "learning_rate": 7.623470137055533e-06, + "loss": 0.5576, "step": 12516 }, { - "epoch": 2.624659257706018, - "grad_norm": 5.939318288629242, - "learning_rate": 8.099035770455755e-07, - "loss": 0.1552, + "epoch": 1.767438576673256, + "grad_norm": 4.077587604592495, + "learning_rate": 7.621989387111459e-06, + "loss": 0.6274, "step": 12517 }, { - "epoch": 2.6248689452715452, - "grad_norm": 4.564307909350276, - "learning_rate": 8.090113098927621e-07, - "loss": 0.1674, + "epoch": 1.767579779723242, + "grad_norm": 3.1653100221081285, + "learning_rate": 7.620508692431327e-06, + "loss": 0.5186, "step": 12518 }, { - "epoch": 2.6250786328370728, - "grad_norm": 4.137203442560623, - "learning_rate": 8.081195137940612e-07, - "loss": 0.1049, + "epoch": 1.7677209827732279, + "grad_norm": 3.6051823481068803, + "learning_rate": 7.619028053049545e-06, + "loss": 0.5296, "step": 12519 }, { - "epoch": 2.6252883204026, - "grad_norm": 6.779648545852885, - "learning_rate": 8.072281887951772e-07, - "loss": 0.175, + "epoch": 1.7678621858232137, + "grad_norm": 4.492520168642713, + "learning_rate": 7.617547469000524e-06, + "loss": 0.8, "step": 12520 }, { - "epoch": 2.6254980079681274, - "grad_norm": 4.414722299635095, - "learning_rate": 8.063373349417913e-07, - "loss": 0.1508, + "epoch": 1.7680033888731996, + "grad_norm": 3.82134154485563, + "learning_rate": 7.616066940318667e-06, + "loss": 0.4863, "step": 12521 }, { - "epoch": 2.625707695533655, - "grad_norm": 3.3749954709407874, - "learning_rate": 8.054469522795649e-07, - "loss": 0.1081, + "epoch": 1.7681445919231855, + "grad_norm": 4.08631481197251, + "learning_rate": 7.6145864670383884e-06, + "loss": 0.6602, "step": 12522 }, { - "epoch": 2.625917383099182, - "grad_norm": 3.617552737021408, - "learning_rate": 8.045570408541281e-07, - "loss": 0.1029, + "epoch": 1.7682857949731714, + "grad_norm": 3.7572633724594025, + "learning_rate": 7.613106049194092e-06, + "loss": 0.6828, "step": 12523 }, { - "epoch": 2.6261270706647095, - "grad_norm": 4.251296366565555, - "learning_rate": 8.036676007110911e-07, - "loss": 0.147, + "epoch": 1.7684269980231573, + "grad_norm": 3.0669350542553135, + "learning_rate": 7.611625686820177e-06, + "loss": 0.4887, "step": 12524 }, { - "epoch": 2.626336758230237, - "grad_norm": 4.465259091723742, - "learning_rate": 8.027786318960406e-07, - "loss": 0.1379, + "epoch": 1.7685682010731432, + "grad_norm": 3.0469922818845876, + "learning_rate": 7.610145379951051e-06, + "loss": 0.5176, "step": 12525 }, { - "epoch": 2.626546445795764, - "grad_norm": 4.769412162924134, - "learning_rate": 8.01890134454536e-07, - "loss": 0.1581, + "epoch": 1.768709404123129, + "grad_norm": 3.4268071974597567, + "learning_rate": 7.608665128621111e-06, + "loss": 0.5113, "step": 12526 }, { - "epoch": 2.6267561333612917, - "grad_norm": 2.9418264988990708, - "learning_rate": 8.010021084321173e-07, - "loss": 0.111, + "epoch": 1.768850607173115, + "grad_norm": 3.7498798767074604, + "learning_rate": 7.607184932864764e-06, + "loss": 0.614, "step": 12527 }, { - "epoch": 2.626965820926819, - "grad_norm": 6.149070947200199, - "learning_rate": 8.00114553874295e-07, - "loss": 0.203, + "epoch": 1.7689918102231008, + "grad_norm": 3.457951959657924, + "learning_rate": 7.6057047927164055e-06, + "loss": 0.5069, "step": 12528 }, { - "epoch": 2.6271755084923463, - "grad_norm": 4.342435726620971, - "learning_rate": 7.992274708265557e-07, - "loss": 0.1373, + "epoch": 1.7691330132730867, + "grad_norm": 3.844647584444253, + "learning_rate": 7.60422470821043e-06, + "loss": 0.6102, "step": 12529 }, { - "epoch": 2.627385196057874, - "grad_norm": 4.134785012878603, - "learning_rate": 7.983408593343667e-07, - "loss": 0.1419, + "epoch": 1.7692742163230726, + "grad_norm": 3.6876561053647823, + "learning_rate": 7.602744679381241e-06, + "loss": 0.5933, "step": 12530 }, { - "epoch": 2.6275948836234013, - "grad_norm": 3.8979868974839325, - "learning_rate": 7.974547194431703e-07, - "loss": 0.1328, + "epoch": 1.7694154193730585, + "grad_norm": 3.2184194650490956, + "learning_rate": 7.6012647062632325e-06, + "loss": 0.4679, "step": 12531 }, { - "epoch": 2.6278045711889284, - "grad_norm": 4.020821633691476, - "learning_rate": 7.965690511983804e-07, - "loss": 0.1142, + "epoch": 1.7695566224230443, + "grad_norm": 3.3119275553877157, + "learning_rate": 7.599784788890794e-06, + "loss": 0.4713, "step": 12532 }, { - "epoch": 2.628014258754456, - "grad_norm": 4.261088972036191, - "learning_rate": 7.956838546453882e-07, - "loss": 0.1318, + "epoch": 1.7696978254730302, + "grad_norm": 3.352943485152535, + "learning_rate": 7.598304927298321e-06, + "loss": 0.4538, "step": 12533 }, { - "epoch": 2.6282239463199835, - "grad_norm": 3.8096513757879467, - "learning_rate": 7.947991298295632e-07, - "loss": 0.1003, + "epoch": 1.7698390285230161, + "grad_norm": 3.651492646490969, + "learning_rate": 7.596825121520202e-06, + "loss": 0.6036, "step": 12534 }, { - "epoch": 2.6284336338855105, - "grad_norm": 3.594314400652386, - "learning_rate": 7.939148767962468e-07, - "loss": 0.0999, + "epoch": 1.769980231573002, + "grad_norm": 4.008703276035849, + "learning_rate": 7.5953453715908295e-06, + "loss": 0.5779, "step": 12535 }, { - "epoch": 2.628643321451038, - "grad_norm": 2.895019326971635, - "learning_rate": 7.930310955907627e-07, - "loss": 0.1011, + "epoch": 1.7701214346229879, + "grad_norm": 3.070428313959527, + "learning_rate": 7.593865677544594e-06, + "loss": 0.526, "step": 12536 }, { - "epoch": 2.628853009016565, - "grad_norm": 4.74189094495242, - "learning_rate": 7.921477862584026e-07, - "loss": 0.1578, + "epoch": 1.7702626376729738, + "grad_norm": 3.825177336104953, + "learning_rate": 7.592386039415881e-06, + "loss": 0.5823, "step": 12537 }, { - "epoch": 2.6290626965820927, - "grad_norm": 3.602380171658708, - "learning_rate": 7.912649488444379e-07, - "loss": 0.1544, + "epoch": 1.7704038407229596, + "grad_norm": 4.518224386240222, + "learning_rate": 7.590906457239073e-06, + "loss": 0.7404, "step": 12538 }, { - "epoch": 2.62927238414762, - "grad_norm": 3.099961957096034, - "learning_rate": 7.903825833941159e-07, - "loss": 0.1107, + "epoch": 1.7705450437729455, + "grad_norm": 3.0833367074018025, + "learning_rate": 7.589426931048562e-06, + "loss": 0.518, "step": 12539 }, { - "epoch": 2.6294820717131473, - "grad_norm": 4.46631195366002, - "learning_rate": 7.895006899526625e-07, - "loss": 0.1621, + "epoch": 1.7706862468229314, + "grad_norm": 2.773794350920757, + "learning_rate": 7.587947460878731e-06, + "loss": 0.3966, "step": 12540 }, { - "epoch": 2.629691759278675, - "grad_norm": 3.9501169651971084, - "learning_rate": 7.886192685652705e-07, - "loss": 0.1244, + "epoch": 1.7708274498729173, + "grad_norm": 3.270102781492716, + "learning_rate": 7.586468046763956e-06, + "loss": 0.5815, "step": 12541 }, { - "epoch": 2.629901446844202, - "grad_norm": 4.409815621166538, - "learning_rate": 7.877383192771182e-07, - "loss": 0.1206, + "epoch": 1.7709686529229032, + "grad_norm": 4.244018171905019, + "learning_rate": 7.584988688738622e-06, + "loss": 0.7261, "step": 12542 }, { - "epoch": 2.6301111344097294, - "grad_norm": 4.317304772394556, - "learning_rate": 7.868578421333551e-07, - "loss": 0.1413, + "epoch": 1.771109855972889, + "grad_norm": 3.176724270447471, + "learning_rate": 7.583509386837104e-06, + "loss": 0.4999, "step": 12543 }, { - "epoch": 2.630320821975257, - "grad_norm": 5.035071277937222, - "learning_rate": 7.859778371791082e-07, - "loss": 0.165, + "epoch": 1.771251059022875, + "grad_norm": 3.257812949457098, + "learning_rate": 7.582030141093788e-06, + "loss": 0.6247, "step": 12544 }, { - "epoch": 2.630530509540784, - "grad_norm": 6.519378639038373, - "learning_rate": 7.850983044594762e-07, - "loss": 0.1659, + "epoch": 1.7713922620728608, + "grad_norm": 3.4429436694725903, + "learning_rate": 7.580550951543048e-06, + "loss": 0.5239, "step": 12545 }, { - "epoch": 2.6307401971063116, - "grad_norm": 3.97971816819629, - "learning_rate": 7.842192440195395e-07, - "loss": 0.1222, + "epoch": 1.7715334651228467, + "grad_norm": 3.7518113615551023, + "learning_rate": 7.5790718182192545e-06, + "loss": 0.5843, "step": 12546 }, { - "epoch": 2.630949884671839, - "grad_norm": 5.123931401558346, - "learning_rate": 7.833406559043499e-07, - "loss": 0.1702, + "epoch": 1.7716746681728326, + "grad_norm": 3.1761870502269614, + "learning_rate": 7.577592741156791e-06, + "loss": 0.5849, "step": 12547 }, { - "epoch": 2.631159572237366, - "grad_norm": 3.6534674317095397, - "learning_rate": 7.824625401589381e-07, - "loss": 0.12, + "epoch": 1.7718158712228185, + "grad_norm": 3.2543912862349704, + "learning_rate": 7.576113720390027e-06, + "loss": 0.5699, "step": 12548 }, { - "epoch": 2.6313692598028937, - "grad_norm": 3.481611349926908, - "learning_rate": 7.815848968283079e-07, - "loss": 0.124, + "epoch": 1.7719570742728044, + "grad_norm": 4.378120058932897, + "learning_rate": 7.574634755953331e-06, + "loss": 0.6104, "step": 12549 }, { - "epoch": 2.6315789473684212, - "grad_norm": 3.885767296328627, - "learning_rate": 7.807077259574392e-07, - "loss": 0.1168, + "epoch": 1.7720982773227902, + "grad_norm": 3.3733759258172946, + "learning_rate": 7.573155847881076e-06, + "loss": 0.5081, "step": 12550 }, { - "epoch": 2.6317886349339483, - "grad_norm": 4.879011229400413, - "learning_rate": 7.798310275912891e-07, - "loss": 0.1953, + "epoch": 1.7722394803727761, + "grad_norm": 2.8668752163132267, + "learning_rate": 7.5716769962076305e-06, + "loss": 0.5344, "step": 12551 }, { - "epoch": 2.631998322499476, - "grad_norm": 4.134315971715776, - "learning_rate": 7.789548017747928e-07, - "loss": 0.1244, + "epoch": 1.772380683422762, + "grad_norm": 3.061969354807529, + "learning_rate": 7.570198200967363e-06, + "loss": 0.5341, "step": 12552 }, { - "epoch": 2.6322080100650034, - "grad_norm": 3.8777831491062313, - "learning_rate": 7.780790485528556e-07, - "loss": 0.1286, + "epoch": 1.7725218864727479, + "grad_norm": 3.326654387973597, + "learning_rate": 7.568719462194639e-06, + "loss": 0.5171, "step": 12553 }, { - "epoch": 2.6324176976305305, - "grad_norm": 4.235887096372328, - "learning_rate": 7.772037679703592e-07, - "loss": 0.1521, + "epoch": 1.7726630895227338, + "grad_norm": 4.263205024566076, + "learning_rate": 7.567240779923827e-06, + "loss": 0.7098, "step": 12554 }, { - "epoch": 2.632627385196058, - "grad_norm": 4.817236419914835, - "learning_rate": 7.763289600721691e-07, - "loss": 0.182, + "epoch": 1.7728042925727197, + "grad_norm": 3.629190852869291, + "learning_rate": 7.565762154189284e-06, + "loss": 0.5145, "step": 12555 }, { - "epoch": 2.632837072761585, - "grad_norm": 6.711570814223188, - "learning_rate": 7.754546249031148e-07, - "loss": 0.2651, + "epoch": 1.7729454956227055, + "grad_norm": 3.3060427977322187, + "learning_rate": 7.564283585025383e-06, + "loss": 0.5156, "step": 12556 }, { - "epoch": 2.6330467603271126, - "grad_norm": 3.966812115564374, - "learning_rate": 7.745807625080116e-07, - "loss": 0.1414, + "epoch": 1.7730866986726914, + "grad_norm": 3.3801415206100924, + "learning_rate": 7.562805072466479e-06, + "loss": 0.513, "step": 12557 }, { - "epoch": 2.63325644789264, - "grad_norm": 3.8359922899948224, - "learning_rate": 7.737073729316458e-07, - "loss": 0.1635, + "epoch": 1.7732279017226773, + "grad_norm": 3.62197143566428, + "learning_rate": 7.561326616546932e-06, + "loss": 0.5418, "step": 12558 }, { - "epoch": 2.633466135458167, - "grad_norm": 3.5525722346077595, - "learning_rate": 7.728344562187773e-07, - "loss": 0.1022, + "epoch": 1.7733691047726632, + "grad_norm": 3.441188461105048, + "learning_rate": 7.5598482173011e-06, + "loss": 0.5428, "step": 12559 }, { - "epoch": 2.6336758230236947, - "grad_norm": 4.019947955577282, - "learning_rate": 7.719620124141469e-07, - "loss": 0.1483, + "epoch": 1.773510307822649, + "grad_norm": 3.253321556039228, + "learning_rate": 7.5583698747633394e-06, + "loss": 0.545, "step": 12560 }, { - "epoch": 2.633885510589222, - "grad_norm": 4.687015854350539, - "learning_rate": 7.710900415624712e-07, - "loss": 0.147, + "epoch": 1.773651510872635, + "grad_norm": 4.134119948160922, + "learning_rate": 7.556891588968011e-06, + "loss": 0.666, "step": 12561 }, { - "epoch": 2.6340951981547494, - "grad_norm": 4.141118190628882, - "learning_rate": 7.702185437084342e-07, - "loss": 0.1352, + "epoch": 1.7737927139226208, + "grad_norm": 4.09011967203926, + "learning_rate": 7.555413359949468e-06, + "loss": 0.6204, "step": 12562 }, { - "epoch": 2.634304885720277, - "grad_norm": 4.014687286259511, - "learning_rate": 7.693475188967059e-07, - "loss": 0.152, + "epoch": 1.7739339169726067, + "grad_norm": 4.581483731829366, + "learning_rate": 7.553935187742061e-06, + "loss": 0.7081, "step": 12563 }, { - "epoch": 2.634514573285804, - "grad_norm": 4.808634429731679, - "learning_rate": 7.684769671719283e-07, - "loss": 0.1679, + "epoch": 1.7740751200225926, + "grad_norm": 3.4292329268034796, + "learning_rate": 7.552457072380143e-06, + "loss": 0.599, "step": 12564 }, { - "epoch": 2.6347242608513315, - "grad_norm": 4.707119438128783, - "learning_rate": 7.67606888578718e-07, - "loss": 0.1444, + "epoch": 1.7742163230725785, + "grad_norm": 3.960074085062633, + "learning_rate": 7.55097901389807e-06, + "loss": 0.6195, "step": 12565 }, { - "epoch": 2.634933948416859, - "grad_norm": 3.6564576575132626, - "learning_rate": 7.667372831616649e-07, - "loss": 0.1595, + "epoch": 1.7743575261225644, + "grad_norm": 5.301532332494007, + "learning_rate": 7.549501012330184e-06, + "loss": 0.8063, "step": 12566 }, { - "epoch": 2.635143635982386, - "grad_norm": 4.722708372273352, - "learning_rate": 7.658681509653409e-07, - "loss": 0.144, + "epoch": 1.7744987291725502, + "grad_norm": 3.1016080455082005, + "learning_rate": 7.548023067710837e-06, + "loss": 0.477, "step": 12567 }, { - "epoch": 2.6353533235479136, - "grad_norm": 4.268815605778567, - "learning_rate": 7.649994920342907e-07, - "loss": 0.139, + "epoch": 1.7746399322225361, + "grad_norm": 3.87243441724298, + "learning_rate": 7.546545180074374e-06, + "loss": 0.5837, "step": 12568 }, { - "epoch": 2.635563011113441, - "grad_norm": 3.337035122797393, - "learning_rate": 7.641313064130352e-07, - "loss": 0.1076, + "epoch": 1.774781135272522, + "grad_norm": 3.7543941466471473, + "learning_rate": 7.54506734945514e-06, + "loss": 0.5718, "step": 12569 }, { - "epoch": 2.6357726986789682, - "grad_norm": 4.691405160540121, - "learning_rate": 7.632635941460664e-07, - "loss": 0.1419, + "epoch": 1.774922338322508, + "grad_norm": 3.6294971934761167, + "learning_rate": 7.543589575887482e-06, + "loss": 0.5938, "step": 12570 }, { - "epoch": 2.6359823862444958, - "grad_norm": 4.217483711603581, - "learning_rate": 7.623963552778602e-07, - "loss": 0.1073, + "epoch": 1.7750635413724938, + "grad_norm": 3.1139574918940096, + "learning_rate": 7.542111859405743e-06, + "loss": 0.472, "step": 12571 }, { - "epoch": 2.6361920738100233, - "grad_norm": 4.614364095806026, - "learning_rate": 7.615295898528618e-07, - "loss": 0.1682, + "epoch": 1.7752047444224797, + "grad_norm": 3.8647637320750046, + "learning_rate": 7.54063420004426e-06, + "loss": 0.6484, "step": 12572 }, { - "epoch": 2.6364017613755504, - "grad_norm": 4.913581886994403, - "learning_rate": 7.60663297915496e-07, - "loss": 0.1317, + "epoch": 1.7753459474724655, + "grad_norm": 3.6346673522727677, + "learning_rate": 7.539156597837378e-06, + "loss": 0.5631, "step": 12573 }, { - "epoch": 2.636611448941078, - "grad_norm": 5.3735997710444225, - "learning_rate": 7.597974795101615e-07, - "loss": 0.1002, + "epoch": 1.7754871505224514, + "grad_norm": 3.4561902016910726, + "learning_rate": 7.5376790528194354e-06, + "loss": 0.5434, "step": 12574 }, { - "epoch": 2.6368211365066054, - "grad_norm": 4.628727167423716, - "learning_rate": 7.589321346812306e-07, - "loss": 0.1617, + "epoch": 1.7756283535724373, + "grad_norm": 3.5734905648712134, + "learning_rate": 7.536201565024768e-06, + "loss": 0.4642, "step": 12575 }, { - "epoch": 2.6370308240721325, - "grad_norm": 5.420235085587457, - "learning_rate": 7.580672634730557e-07, - "loss": 0.1453, + "epoch": 1.7757695566224232, + "grad_norm": 3.6518198853939237, + "learning_rate": 7.534724134487709e-06, + "loss": 0.6207, "step": 12576 }, { - "epoch": 2.63724051163766, - "grad_norm": 5.408780639193367, - "learning_rate": 7.572028659299646e-07, - "loss": 0.1667, + "epoch": 1.775910759672409, + "grad_norm": 2.8179548782523125, + "learning_rate": 7.533246761242598e-06, + "loss": 0.4829, "step": 12577 }, { - "epoch": 2.637450199203187, - "grad_norm": 3.68061344580538, - "learning_rate": 7.563389420962586e-07, - "loss": 0.1101, + "epoch": 1.776051962722395, + "grad_norm": 3.162921981434475, + "learning_rate": 7.531769445323767e-06, + "loss": 0.4685, "step": 12578 }, { - "epoch": 2.6376598867687147, - "grad_norm": 5.467383371983453, - "learning_rate": 7.554754920162111e-07, - "loss": 0.1564, + "epoch": 1.7761931657723808, + "grad_norm": 3.5070201542811636, + "learning_rate": 7.530292186765548e-06, + "loss": 0.5568, "step": 12579 }, { - "epoch": 2.6378695743342417, - "grad_norm": 4.673193172097822, - "learning_rate": 7.5461251573408e-07, - "loss": 0.1345, + "epoch": 1.7763343688223667, + "grad_norm": 3.312220876501213, + "learning_rate": 7.528814985602273e-06, + "loss": 0.5343, "step": 12580 }, { - "epoch": 2.6380792618997693, - "grad_norm": 4.000929465795318, - "learning_rate": 7.537500132940911e-07, - "loss": 0.1336, + "epoch": 1.7764755718723524, + "grad_norm": 3.3396743926493824, + "learning_rate": 7.5273378418682675e-06, + "loss": 0.5692, "step": 12581 }, { - "epoch": 2.638288949465297, - "grad_norm": 5.229897156964368, - "learning_rate": 7.528879847404535e-07, - "loss": 0.1812, + "epoch": 1.7766167749223383, + "grad_norm": 3.6904956595668637, + "learning_rate": 7.52586075559787e-06, + "loss": 0.5595, "step": 12582 }, { - "epoch": 2.638498637030824, - "grad_norm": 4.2210304307883595, - "learning_rate": 7.520264301173441e-07, - "loss": 0.1259, + "epoch": 1.7767579779723242, + "grad_norm": 3.2002263143480847, + "learning_rate": 7.524383726825393e-06, + "loss": 0.4677, "step": 12583 }, { - "epoch": 2.6387083245963514, - "grad_norm": 4.428285619233624, - "learning_rate": 7.511653494689186e-07, - "loss": 0.1627, + "epoch": 1.77689918102231, + "grad_norm": 3.1659314314007783, + "learning_rate": 7.522906755585171e-06, + "loss": 0.4969, "step": 12584 }, { - "epoch": 2.638918012161879, - "grad_norm": 2.9515018033350633, - "learning_rate": 7.503047428393106e-07, - "loss": 0.095, + "epoch": 1.777040384072296, + "grad_norm": 3.4771976551360755, + "learning_rate": 7.5214298419115276e-06, + "loss": 0.602, "step": 12585 }, { - "epoch": 2.639127699727406, - "grad_norm": 4.310322306287954, - "learning_rate": 7.494446102726294e-07, - "loss": 0.1432, + "epoch": 1.7771815871222818, + "grad_norm": 2.675592134959582, + "learning_rate": 7.5199529858387795e-06, + "loss": 0.4096, "step": 12586 }, { - "epoch": 2.6393373872929335, - "grad_norm": 3.7895239461777614, - "learning_rate": 7.48584951812954e-07, - "loss": 0.1395, + "epoch": 1.7773227901722677, + "grad_norm": 3.2689852867449125, + "learning_rate": 7.518476187401258e-06, + "loss": 0.5612, "step": 12587 }, { - "epoch": 2.639547074858461, - "grad_norm": 4.978326091435996, - "learning_rate": 7.477257675043447e-07, - "loss": 0.1922, + "epoch": 1.7774639932222536, + "grad_norm": 4.562471569764921, + "learning_rate": 7.516999446633277e-06, + "loss": 0.7384, "step": 12588 }, { - "epoch": 2.639756762423988, - "grad_norm": 4.797021525588315, - "learning_rate": 7.468670573908399e-07, - "loss": 0.1517, + "epoch": 1.7776051962722395, + "grad_norm": 4.496506750814818, + "learning_rate": 7.5155227635691544e-06, + "loss": 0.7187, "step": 12589 }, { - "epoch": 2.6399664499895157, - "grad_norm": 4.035058321323052, - "learning_rate": 7.460088215164463e-07, - "loss": 0.1509, + "epoch": 1.7777463993222253, + "grad_norm": 3.4723875571012095, + "learning_rate": 7.514046138243211e-06, + "loss": 0.5757, "step": 12590 }, { - "epoch": 2.640176137555043, - "grad_norm": 5.504647747994489, - "learning_rate": 7.451510599251499e-07, - "loss": 0.1732, + "epoch": 1.7778876023722112, + "grad_norm": 3.8986731880578267, + "learning_rate": 7.512569570689765e-06, + "loss": 0.5776, "step": 12591 }, { - "epoch": 2.6403858251205703, - "grad_norm": 3.2438899010942945, - "learning_rate": 7.442937726609145e-07, - "loss": 0.1079, + "epoch": 1.778028805422197, + "grad_norm": 3.048740512667674, + "learning_rate": 7.511093060943125e-06, + "loss": 0.4961, "step": 12592 }, { - "epoch": 2.640595512686098, - "grad_norm": 3.209178690851853, - "learning_rate": 7.434369597676749e-07, - "loss": 0.1206, + "epoch": 1.778170008472183, + "grad_norm": 3.650814113577218, + "learning_rate": 7.509616609037608e-06, + "loss": 0.6637, "step": 12593 }, { - "epoch": 2.6408052002516254, - "grad_norm": 4.292244295781463, - "learning_rate": 7.425806212893472e-07, - "loss": 0.1551, + "epoch": 1.7783112115221689, + "grad_norm": 4.078932597071414, + "learning_rate": 7.508140215007526e-06, + "loss": 0.5934, "step": 12594 }, { - "epoch": 2.6410148878171524, - "grad_norm": 4.8402474699134945, - "learning_rate": 7.417247572698194e-07, - "loss": 0.1883, + "epoch": 1.7784524145721547, + "grad_norm": 3.9426654685506213, + "learning_rate": 7.506663878887186e-06, + "loss": 0.5488, "step": 12595 }, { - "epoch": 2.64122457538268, - "grad_norm": 4.698461006950638, - "learning_rate": 7.408693677529532e-07, - "loss": 0.1461, + "epoch": 1.7785936176221406, + "grad_norm": 3.9262854116583576, + "learning_rate": 7.505187600710903e-06, + "loss": 0.6591, "step": 12596 }, { - "epoch": 2.641434262948207, - "grad_norm": 5.04896906133982, - "learning_rate": 7.400144527825914e-07, - "loss": 0.176, + "epoch": 1.7787348206721265, + "grad_norm": 3.712545125532184, + "learning_rate": 7.5037113805129835e-06, + "loss": 0.6308, "step": 12597 }, { - "epoch": 2.6416439505137346, - "grad_norm": 3.650307667148311, - "learning_rate": 7.391600124025511e-07, - "loss": 0.0971, + "epoch": 1.7788760237221124, + "grad_norm": 3.7387462030315044, + "learning_rate": 7.50223521832773e-06, + "loss": 0.5182, "step": 12598 }, { - "epoch": 2.6418536380792617, - "grad_norm": 3.2041939416460403, - "learning_rate": 7.383060466566228e-07, - "loss": 0.1025, + "epoch": 1.7790172267720983, + "grad_norm": 3.5699442642172228, + "learning_rate": 7.50075911418946e-06, + "loss": 0.5374, "step": 12599 }, { - "epoch": 2.642063325644789, - "grad_norm": 5.763439474652581, - "learning_rate": 7.374525555885704e-07, - "loss": 0.1508, + "epoch": 1.7791584298220842, + "grad_norm": 3.9092569592706425, + "learning_rate": 7.49928306813246e-06, + "loss": 0.7467, "step": 12600 }, { - "epoch": 2.6422730132103167, - "grad_norm": 5.21435687581906, - "learning_rate": 7.365995392421421e-07, - "loss": 0.1503, + "epoch": 1.77929963287207, + "grad_norm": 3.118935331467771, + "learning_rate": 7.497807080191046e-06, + "loss": 0.4793, "step": 12601 }, { - "epoch": 2.642482700775844, - "grad_norm": 3.6231930976983664, - "learning_rate": 7.357469976610521e-07, - "loss": 0.1258, + "epoch": 1.779440835922056, + "grad_norm": 3.3574525816442895, + "learning_rate": 7.496331150399512e-06, + "loss": 0.605, "step": 12602 }, { - "epoch": 2.6426923883413713, - "grad_norm": 3.877325125174339, - "learning_rate": 7.348949308889985e-07, - "loss": 0.1147, + "epoch": 1.7795820389720418, + "grad_norm": 3.61421947780563, + "learning_rate": 7.494855278792161e-06, + "loss": 0.6011, "step": 12603 }, { - "epoch": 2.642902075906899, - "grad_norm": 4.122724696455569, - "learning_rate": 7.340433389696489e-07, - "loss": 0.1542, + "epoch": 1.7797232420220277, + "grad_norm": 3.629797217769061, + "learning_rate": 7.493379465403292e-06, + "loss": 0.4923, "step": 12604 }, { - "epoch": 2.643111763472426, - "grad_norm": 3.972453923474521, - "learning_rate": 7.33192221946647e-07, - "loss": 0.1429, + "epoch": 1.7798644450720136, + "grad_norm": 2.889899810849255, + "learning_rate": 7.491903710267203e-06, + "loss": 0.523, "step": 12605 }, { - "epoch": 2.6433214510379535, - "grad_norm": 4.015269816390465, - "learning_rate": 7.32341579863618e-07, - "loss": 0.1034, + "epoch": 1.7800056481219995, + "grad_norm": 3.906373564724152, + "learning_rate": 7.490428013418187e-06, + "loss": 0.774, "step": 12606 }, { - "epoch": 2.643531138603481, - "grad_norm": 5.513555813843799, - "learning_rate": 7.314914127641582e-07, - "loss": 0.1506, + "epoch": 1.7801468511719853, + "grad_norm": 2.805753335559399, + "learning_rate": 7.48895237489054e-06, + "loss": 0.4316, "step": 12607 }, { - "epoch": 2.643740826169008, - "grad_norm": 3.521344413504259, - "learning_rate": 7.306417206918359e-07, - "loss": 0.1096, + "epoch": 1.7802880542219712, + "grad_norm": 3.7765057671541937, + "learning_rate": 7.4874767947185586e-06, + "loss": 0.641, "step": 12608 }, { - "epoch": 2.6439505137345356, - "grad_norm": 5.540244187609464, - "learning_rate": 7.297925036902032e-07, - "loss": 0.1595, + "epoch": 1.780429257271957, + "grad_norm": 4.61490289679377, + "learning_rate": 7.486001272936529e-06, + "loss": 0.7612, "step": 12609 }, { - "epoch": 2.644160201300063, - "grad_norm": 4.040151491461108, - "learning_rate": 7.289437618027839e-07, - "loss": 0.1299, + "epoch": 1.780570460321943, + "grad_norm": 4.148799089621253, + "learning_rate": 7.4845258095787454e-06, + "loss": 0.5987, "step": 12610 }, { - "epoch": 2.64436988886559, - "grad_norm": 4.109575021407578, - "learning_rate": 7.280954950730767e-07, - "loss": 0.1356, + "epoch": 1.7807116633719289, + "grad_norm": 3.7275191942711157, + "learning_rate": 7.4830504046794946e-06, + "loss": 0.602, "step": 12611 }, { - "epoch": 2.6445795764311177, - "grad_norm": 4.171848492814981, - "learning_rate": 7.272477035445557e-07, - "loss": 0.1168, + "epoch": 1.7808528664219148, + "grad_norm": 3.1133517208204755, + "learning_rate": 7.481575058273063e-06, + "loss": 0.4384, "step": 12612 }, { - "epoch": 2.6447892639966453, - "grad_norm": 4.27842625088676, - "learning_rate": 7.264003872606739e-07, - "loss": 0.1406, + "epoch": 1.7809940694719006, + "grad_norm": 8.440440034910013, + "learning_rate": 7.480099770393743e-06, + "loss": 0.4437, "step": 12613 }, { - "epoch": 2.6449989515621724, - "grad_norm": 4.035926692442725, - "learning_rate": 7.255535462648544e-07, - "loss": 0.1465, + "epoch": 1.7811352725218865, + "grad_norm": 3.2018307085635516, + "learning_rate": 7.478624541075814e-06, + "loss": 0.4697, "step": 12614 }, { - "epoch": 2.6452086391277, - "grad_norm": 4.152846517543206, - "learning_rate": 7.247071806005035e-07, - "loss": 0.1259, + "epoch": 1.7812764755718722, + "grad_norm": 3.1604731052340167, + "learning_rate": 7.477149370353561e-06, + "loss": 0.5455, "step": 12615 }, { - "epoch": 2.645418326693227, - "grad_norm": 5.093508811225395, - "learning_rate": 7.238612903109976e-07, - "loss": 0.1167, + "epoch": 1.781417678621858, + "grad_norm": 3.509888587215544, + "learning_rate": 7.475674258261274e-06, + "loss": 0.588, "step": 12616 }, { - "epoch": 2.6456280142587545, - "grad_norm": 4.769817633079835, - "learning_rate": 7.230158754396865e-07, - "loss": 0.1552, + "epoch": 1.781558881671844, + "grad_norm": 3.5057161896353968, + "learning_rate": 7.474199204833219e-06, + "loss": 0.5795, "step": 12617 }, { - "epoch": 2.6458377018242816, - "grad_norm": 4.710167524175901, - "learning_rate": 7.221709360299022e-07, - "loss": 0.1795, + "epoch": 1.7817000847218298, + "grad_norm": 3.5040352268113257, + "learning_rate": 7.472724210103687e-06, + "loss": 0.6301, "step": 12618 }, { - "epoch": 2.646047389389809, - "grad_norm": 3.9346658771277654, - "learning_rate": 7.213264721249525e-07, - "loss": 0.155, + "epoch": 1.7818412877718157, + "grad_norm": 5.19547656890915, + "learning_rate": 7.4712492741069535e-06, + "loss": 0.7418, "step": 12619 }, { - "epoch": 2.6462570769553366, - "grad_norm": 3.7665936598770284, - "learning_rate": 7.204824837681101e-07, - "loss": 0.136, + "epoch": 1.7819824908218016, + "grad_norm": 4.128503519134039, + "learning_rate": 7.4697743968772906e-06, + "loss": 0.5854, "step": 12620 }, { - "epoch": 2.6464667645208637, - "grad_norm": 5.413114293709111, - "learning_rate": 7.196389710026352e-07, - "loss": 0.13, + "epoch": 1.7821236938717875, + "grad_norm": 3.9129094746573507, + "learning_rate": 7.468299578448981e-06, + "loss": 0.5506, "step": 12621 }, { - "epoch": 2.6466764520863912, - "grad_norm": 5.430464437355707, - "learning_rate": 7.18795933871761e-07, - "loss": 0.1967, + "epoch": 1.7822648969217734, + "grad_norm": 3.4439051374703817, + "learning_rate": 7.466824818856296e-06, + "loss": 0.6094, "step": 12622 }, { - "epoch": 2.6468861396519188, - "grad_norm": 4.838773723627348, - "learning_rate": 7.179533724186927e-07, - "loss": 0.1518, + "epoch": 1.7824060999717593, + "grad_norm": 3.6578575341536426, + "learning_rate": 7.465350118133509e-06, + "loss": 0.6297, "step": 12623 }, { - "epoch": 2.647095827217446, - "grad_norm": 3.260822521718474, - "learning_rate": 7.171112866866104e-07, - "loss": 0.105, + "epoch": 1.7825473030217451, + "grad_norm": 3.530279689275235, + "learning_rate": 7.463875476314888e-06, + "loss": 0.4868, "step": 12624 }, { - "epoch": 2.6473055147829734, - "grad_norm": 4.065326315896353, - "learning_rate": 7.162696767186772e-07, - "loss": 0.133, + "epoch": 1.782688506071731, + "grad_norm": 3.6803974768764363, + "learning_rate": 7.462400893434711e-06, + "loss": 0.4803, "step": 12625 }, { - "epoch": 2.647515202348501, - "grad_norm": 3.7634625146516454, - "learning_rate": 7.154285425580243e-07, - "loss": 0.1294, + "epoch": 1.782829709121717, + "grad_norm": 3.505152466274522, + "learning_rate": 7.460926369527236e-06, + "loss": 0.5904, "step": 12626 }, { - "epoch": 2.647724889914028, - "grad_norm": 3.5424257777676527, - "learning_rate": 7.145878842477627e-07, - "loss": 0.0962, + "epoch": 1.7829709121717028, + "grad_norm": 3.9216795158815057, + "learning_rate": 7.459451904626738e-06, + "loss": 0.5733, "step": 12627 }, { - "epoch": 2.6479345774795555, - "grad_norm": 5.2674700860643755, - "learning_rate": 7.137477018309747e-07, - "loss": 0.1415, + "epoch": 1.7831121152216887, + "grad_norm": 3.3964606147122987, + "learning_rate": 7.45797749876748e-06, + "loss": 0.5131, "step": 12628 }, { - "epoch": 2.648144265045083, - "grad_norm": 5.2204793287333136, - "learning_rate": 7.129079953507246e-07, - "loss": 0.1164, + "epoch": 1.7832533182716745, + "grad_norm": 3.4724944940623743, + "learning_rate": 7.456503151983725e-06, + "loss": 0.6069, "step": 12629 }, { - "epoch": 2.64835395261061, - "grad_norm": 4.315467933145188, - "learning_rate": 7.120687648500468e-07, - "loss": 0.1293, + "epoch": 1.7833945213216604, + "grad_norm": 3.1994757420439894, + "learning_rate": 7.455028864309742e-06, + "loss": 0.4793, "step": 12630 }, { - "epoch": 2.6485636401761377, - "grad_norm": 5.265944376939534, - "learning_rate": 7.112300103719549e-07, - "loss": 0.1709, + "epoch": 1.7835357243716463, + "grad_norm": 3.3571940134403038, + "learning_rate": 7.453554635779786e-06, + "loss": 0.4749, "step": 12631 }, { - "epoch": 2.648773327741665, - "grad_norm": 5.447535489681987, - "learning_rate": 7.103917319594356e-07, - "loss": 0.1571, + "epoch": 1.7836769274216322, + "grad_norm": 3.6312057788111876, + "learning_rate": 7.4520804664281195e-06, + "loss": 0.5593, "step": 12632 }, { - "epoch": 2.6489830153071923, - "grad_norm": 3.4988078747793954, - "learning_rate": 7.095539296554499e-07, - "loss": 0.1077, + "epoch": 1.783818130471618, + "grad_norm": 3.768872218264469, + "learning_rate": 7.450606356289009e-06, + "loss": 0.5434, "step": 12633 }, { - "epoch": 2.64919270287272, - "grad_norm": 2.588474898374482, - "learning_rate": 7.087166035029403e-07, - "loss": 0.1007, + "epoch": 1.783959333521604, + "grad_norm": 3.259814140292135, + "learning_rate": 7.449132305396701e-06, + "loss": 0.5693, "step": 12634 }, { - "epoch": 2.649402390438247, - "grad_norm": 4.364946780875802, - "learning_rate": 7.078797535448201e-07, - "loss": 0.1645, + "epoch": 1.7841005365715898, + "grad_norm": 3.597756759226259, + "learning_rate": 7.447658313785458e-06, + "loss": 0.6635, "step": 12635 }, { - "epoch": 2.6496120780037744, - "grad_norm": 4.099121305160108, - "learning_rate": 7.070433798239784e-07, - "loss": 0.1449, + "epoch": 1.7842417396215757, + "grad_norm": 5.127070173959923, + "learning_rate": 7.446184381489533e-06, + "loss": 0.6483, "step": 12636 }, { - "epoch": 2.6498217655693015, - "grad_norm": 3.974024835337795, - "learning_rate": 7.06207482383281e-07, - "loss": 0.1328, + "epoch": 1.7843829426715616, + "grad_norm": 3.687316716326993, + "learning_rate": 7.444710508543181e-06, + "loss": 0.5838, "step": 12637 }, { - "epoch": 2.650031453134829, - "grad_norm": 5.845957287062704, - "learning_rate": 7.053720612655702e-07, - "loss": 0.1725, + "epoch": 1.7845241457215475, + "grad_norm": 3.1021132460273226, + "learning_rate": 7.443236694980649e-06, + "loss": 0.4489, "step": 12638 }, { - "epoch": 2.6502411407003565, - "grad_norm": 3.9062197769903197, - "learning_rate": 7.045371165136605e-07, - "loss": 0.1205, + "epoch": 1.7846653487715334, + "grad_norm": 3.509011034754844, + "learning_rate": 7.441762940836197e-06, + "loss": 0.6171, "step": 12639 }, { - "epoch": 2.6504508282658836, - "grad_norm": 4.53164629909146, - "learning_rate": 7.03702648170348e-07, - "loss": 0.1354, + "epoch": 1.7848065518215193, + "grad_norm": 3.726795909998218, + "learning_rate": 7.440289246144067e-06, + "loss": 0.6288, "step": 12640 }, { - "epoch": 2.650660515831411, - "grad_norm": 3.609244672478305, - "learning_rate": 7.028686562783971e-07, - "loss": 0.1, + "epoch": 1.7849477548715051, + "grad_norm": 3.737239882044377, + "learning_rate": 7.438815610938512e-06, + "loss": 0.5934, "step": 12641 }, { - "epoch": 2.6508702033969387, - "grad_norm": 4.362939927326942, - "learning_rate": 7.020351408805514e-07, - "loss": 0.1348, + "epoch": 1.785088957921491, + "grad_norm": 3.573548072965206, + "learning_rate": 7.437342035253775e-06, + "loss": 0.4912, "step": 12642 }, { - "epoch": 2.6510798909624658, - "grad_norm": 4.21682711838217, - "learning_rate": 7.012021020195325e-07, - "loss": 0.1095, + "epoch": 1.785230160971477, + "grad_norm": 4.218418493286466, + "learning_rate": 7.4358685191241e-06, + "loss": 0.6675, "step": 12643 }, { - "epoch": 2.6512895785279933, - "grad_norm": 4.907432698337553, - "learning_rate": 7.003695397380362e-07, - "loss": 0.1695, + "epoch": 1.7853713640214628, + "grad_norm": 2.93671541893229, + "learning_rate": 7.434395062583735e-06, + "loss": 0.4634, "step": 12644 }, { - "epoch": 2.651499266093521, - "grad_norm": 5.5229083108521, - "learning_rate": 6.99537454078727e-07, - "loss": 0.1726, + "epoch": 1.7855125670714487, + "grad_norm": 4.28654180282839, + "learning_rate": 7.432921665666921e-06, + "loss": 0.7337, "step": 12645 }, { - "epoch": 2.651708953659048, - "grad_norm": 4.352157879708616, - "learning_rate": 6.987058450842532e-07, - "loss": 0.1528, + "epoch": 1.7856537701214346, + "grad_norm": 3.4784233271388985, + "learning_rate": 7.431448328407896e-06, + "loss": 0.5185, "step": 12646 }, { - "epoch": 2.6519186412245754, - "grad_norm": 4.2400275325578844, - "learning_rate": 6.978747127972407e-07, - "loss": 0.1368, + "epoch": 1.7857949731714204, + "grad_norm": 2.977648676040175, + "learning_rate": 7.4299750508409054e-06, + "loss": 0.4106, "step": 12647 }, { - "epoch": 2.652128328790103, - "grad_norm": 4.484034089802241, - "learning_rate": 6.97044057260281e-07, - "loss": 0.1357, + "epoch": 1.7859361762214063, + "grad_norm": 4.515369302150069, + "learning_rate": 7.428501833000186e-06, + "loss": 0.5768, "step": 12648 }, { - "epoch": 2.65233801635563, - "grad_norm": 4.878218402757219, - "learning_rate": 6.962138785159478e-07, - "loss": 0.1388, + "epoch": 1.7860773792713922, + "grad_norm": 3.1530223545070837, + "learning_rate": 7.427028674919969e-06, + "loss": 0.4981, "step": 12649 }, { - "epoch": 2.6525477039211576, - "grad_norm": 4.26261262452982, - "learning_rate": 6.953841766067915e-07, - "loss": 0.1444, + "epoch": 1.786218582321378, + "grad_norm": 4.108880410984938, + "learning_rate": 7.4255555766345025e-06, + "loss": 0.6309, "step": 12650 }, { - "epoch": 2.652757391486685, - "grad_norm": 4.387552773337927, - "learning_rate": 6.945549515753325e-07, - "loss": 0.122, + "epoch": 1.786359785371364, + "grad_norm": 3.409095633110417, + "learning_rate": 7.4240825381780046e-06, + "loss": 0.5093, "step": 12651 }, { - "epoch": 2.652967079052212, - "grad_norm": 4.691733644259645, - "learning_rate": 6.937262034640735e-07, - "loss": 0.1498, + "epoch": 1.7865009884213499, + "grad_norm": 3.3522932557898897, + "learning_rate": 7.422609559584719e-06, + "loss": 0.5409, "step": 12652 }, { - "epoch": 2.6531767666177397, - "grad_norm": 4.026961630370885, - "learning_rate": 6.928979323154883e-07, - "loss": 0.1117, + "epoch": 1.7866421914713357, + "grad_norm": 2.5047917482370767, + "learning_rate": 7.421136640888874e-06, + "loss": 0.3895, "step": 12653 }, { - "epoch": 2.653386454183267, - "grad_norm": 3.535103968634144, - "learning_rate": 6.92070138172023e-07, - "loss": 0.1106, + "epoch": 1.7867833945213216, + "grad_norm": 3.631414884750939, + "learning_rate": 7.4196637821247e-06, + "loss": 0.5799, "step": 12654 }, { - "epoch": 2.6535961417487943, - "grad_norm": 4.340422957226782, - "learning_rate": 6.91242821076108e-07, - "loss": 0.1239, + "epoch": 1.7869245975713075, + "grad_norm": 4.098617431893127, + "learning_rate": 7.418190983326423e-06, + "loss": 0.6633, "step": 12655 }, { - "epoch": 2.6538058293143214, - "grad_norm": 3.8862852530530785, - "learning_rate": 6.90415981070145e-07, - "loss": 0.1342, + "epoch": 1.7870658006212934, + "grad_norm": 3.2313347050858123, + "learning_rate": 7.416718244528275e-06, + "loss": 0.5511, "step": 12656 }, { - "epoch": 2.654015516879849, - "grad_norm": 3.8735343998502287, - "learning_rate": 6.8958961819651e-07, - "loss": 0.1172, + "epoch": 1.7872070036712793, + "grad_norm": 3.748926376229165, + "learning_rate": 7.415245565764479e-06, + "loss": 0.5683, "step": 12657 }, { - "epoch": 2.6542252044453765, - "grad_norm": 4.196653747846812, - "learning_rate": 6.887637324975538e-07, - "loss": 0.1305, + "epoch": 1.7873482067212652, + "grad_norm": 2.989385034941308, + "learning_rate": 7.413772947069262e-06, + "loss": 0.3899, "step": 12658 }, { - "epoch": 2.6544348920109035, - "grad_norm": 4.675004291138012, - "learning_rate": 6.879383240156068e-07, - "loss": 0.1309, + "epoch": 1.787489409771251, + "grad_norm": 4.193753736571592, + "learning_rate": 7.412300388476843e-06, + "loss": 0.6039, "step": 12659 }, { - "epoch": 2.654644579576431, - "grad_norm": 4.3536023105224615, - "learning_rate": 6.871133927929696e-07, - "loss": 0.1332, + "epoch": 1.787630612821237, + "grad_norm": 3.3443997501048375, + "learning_rate": 7.410827890021444e-06, + "loss": 0.5453, "step": 12660 }, { - "epoch": 2.6548542671419586, - "grad_norm": 3.6891008354533508, - "learning_rate": 6.862889388719251e-07, - "loss": 0.1258, + "epoch": 1.7877718158712228, + "grad_norm": 2.6002350329770976, + "learning_rate": 7.409355451737287e-06, + "loss": 0.3999, "step": 12661 }, { - "epoch": 2.6550639547074857, - "grad_norm": 3.9009692119192416, - "learning_rate": 6.854649622947273e-07, - "loss": 0.1534, + "epoch": 1.7879130189212087, + "grad_norm": 3.3996801284863154, + "learning_rate": 7.407883073658591e-06, + "loss": 0.4863, "step": 12662 }, { - "epoch": 2.655273642273013, - "grad_norm": 5.0894974924162595, - "learning_rate": 6.846414631036036e-07, - "loss": 0.1627, + "epoch": 1.7880542219711946, + "grad_norm": 3.378611439705507, + "learning_rate": 7.4064107558195705e-06, + "loss": 0.4487, "step": 12663 }, { - "epoch": 2.6554833298385407, - "grad_norm": 5.441259647127946, - "learning_rate": 6.838184413407611e-07, - "loss": 0.1455, + "epoch": 1.7881954250211805, + "grad_norm": 3.3004741338294328, + "learning_rate": 7.404938498254448e-06, + "loss": 0.5053, "step": 12664 }, { - "epoch": 2.655693017404068, - "grad_norm": 6.744767318163683, - "learning_rate": 6.829958970483852e-07, - "loss": 0.1948, + "epoch": 1.7883366280711663, + "grad_norm": 4.391406098640896, + "learning_rate": 7.403466300997433e-06, + "loss": 0.7061, "step": 12665 }, { - "epoch": 2.6559027049695954, - "grad_norm": 5.481736694650218, - "learning_rate": 6.821738302686254e-07, - "loss": 0.1704, + "epoch": 1.7884778311211522, + "grad_norm": 2.7147667939285056, + "learning_rate": 7.401994164082738e-06, + "loss": 0.3793, "step": 12666 }, { - "epoch": 2.656112392535123, - "grad_norm": 3.883459572991206, - "learning_rate": 6.81352241043618e-07, - "loss": 0.1121, + "epoch": 1.788619034171138, + "grad_norm": 2.7796029442087735, + "learning_rate": 7.400522087544582e-06, + "loss": 0.471, "step": 12667 }, { - "epoch": 2.65632208010065, - "grad_norm": 4.376530323647416, - "learning_rate": 6.805311294154715e-07, - "loss": 0.1206, + "epoch": 1.788760237221124, + "grad_norm": 3.900575154184068, + "learning_rate": 7.399050071417163e-06, + "loss": 0.5418, "step": 12668 }, { - "epoch": 2.6565317676661775, - "grad_norm": 5.019842802019677, - "learning_rate": 6.79710495426269e-07, - "loss": 0.1577, + "epoch": 1.7889014402711099, + "grad_norm": 3.997929457463544, + "learning_rate": 7.397578115734701e-06, + "loss": 0.6009, "step": 12669 }, { - "epoch": 2.656741455231705, - "grad_norm": 3.8781944141075715, - "learning_rate": 6.788903391180679e-07, - "loss": 0.1237, + "epoch": 1.7890426433210957, + "grad_norm": 3.865579488491704, + "learning_rate": 7.396106220531398e-06, + "loss": 0.6832, "step": 12670 }, { - "epoch": 2.656951142797232, - "grad_norm": 3.23722075228387, - "learning_rate": 6.780706605329046e-07, - "loss": 0.1042, + "epoch": 1.7891838463710816, + "grad_norm": 3.698314498265299, + "learning_rate": 7.394634385841464e-06, + "loss": 0.4693, "step": 12671 }, { - "epoch": 2.6571608303627596, - "grad_norm": 3.920396995546099, - "learning_rate": 6.772514597127866e-07, - "loss": 0.1144, + "epoch": 1.7893250494210675, + "grad_norm": 4.112312581379585, + "learning_rate": 7.3931626116990984e-06, + "loss": 0.6782, "step": 12672 }, { - "epoch": 2.6573705179282867, - "grad_norm": 6.093583052591885, - "learning_rate": 6.764327366997015e-07, - "loss": 0.1982, + "epoch": 1.7894662524710534, + "grad_norm": 4.206031691032457, + "learning_rate": 7.391690898138511e-06, + "loss": 0.6342, "step": 12673 }, { - "epoch": 2.6575802054938142, - "grad_norm": 3.384891574803331, - "learning_rate": 6.756144915356111e-07, - "loss": 0.1259, + "epoch": 1.7896074555210393, + "grad_norm": 2.9341974074534907, + "learning_rate": 7.3902192451939e-06, + "loss": 0.4732, "step": 12674 }, { - "epoch": 2.6577898930593413, - "grad_norm": 3.8154105391508852, - "learning_rate": 6.747967242624476e-07, - "loss": 0.1371, + "epoch": 1.7897486585710252, + "grad_norm": 3.5547094873322242, + "learning_rate": 7.388747652899467e-06, + "loss": 0.5304, "step": 12675 }, { - "epoch": 2.657999580624869, - "grad_norm": 3.393146078772084, - "learning_rate": 6.739794349221263e-07, - "loss": 0.1171, + "epoch": 1.789889861621011, + "grad_norm": 3.580350531630066, + "learning_rate": 7.3872761212894105e-06, + "loss": 0.5147, "step": 12676 }, { - "epoch": 2.6582092681903964, - "grad_norm": 4.530025571915646, - "learning_rate": 6.731626235565348e-07, - "loss": 0.1183, + "epoch": 1.790031064670997, + "grad_norm": 3.630549936150676, + "learning_rate": 7.385804650397926e-06, + "loss": 0.5666, "step": 12677 }, { - "epoch": 2.6584189557559235, - "grad_norm": 3.3788694885492245, - "learning_rate": 6.723462902075362e-07, - "loss": 0.13, + "epoch": 1.7901722677209828, + "grad_norm": 3.0463342607227797, + "learning_rate": 7.384333240259216e-06, + "loss": 0.4943, "step": 12678 }, { - "epoch": 2.658628643321451, - "grad_norm": 6.18797116219666, - "learning_rate": 6.71530434916966e-07, - "loss": 0.182, + "epoch": 1.7903134707709687, + "grad_norm": 3.785761150213752, + "learning_rate": 7.38286189090747e-06, + "loss": 0.6318, "step": 12679 }, { - "epoch": 2.6588383308869785, - "grad_norm": 3.8852023143682013, - "learning_rate": 6.707150577266431e-07, - "loss": 0.138, + "epoch": 1.7904546738209546, + "grad_norm": 2.938631974523916, + "learning_rate": 7.381390602376882e-06, + "loss": 0.408, "step": 12680 }, { - "epoch": 2.6590480184525056, - "grad_norm": 3.3361076186950527, - "learning_rate": 6.699001586783516e-07, - "loss": 0.1148, + "epoch": 1.7905958768709405, + "grad_norm": 3.6190864416059267, + "learning_rate": 7.379919374701647e-06, + "loss": 0.5577, "step": 12681 }, { - "epoch": 2.659257706018033, - "grad_norm": 5.3163228970901795, - "learning_rate": 6.690857378138616e-07, - "loss": 0.1367, + "epoch": 1.7907370799209263, + "grad_norm": 4.365291318520244, + "learning_rate": 7.378448207915956e-06, + "loss": 0.6039, "step": 12682 }, { - "epoch": 2.6594673935835607, - "grad_norm": 4.691092042699991, - "learning_rate": 6.682717951749107e-07, - "loss": 0.1526, + "epoch": 1.7908782829709122, + "grad_norm": 4.022758921784208, + "learning_rate": 7.376977102053995e-06, + "loss": 0.6264, "step": 12683 }, { - "epoch": 2.6596770811490877, - "grad_norm": 3.061695848672184, - "learning_rate": 6.674583308032146e-07, - "loss": 0.1128, + "epoch": 1.791019486020898, + "grad_norm": 2.9499139044916847, + "learning_rate": 7.3755060571499555e-06, + "loss": 0.4409, "step": 12684 }, { - "epoch": 2.6598867687146153, - "grad_norm": 4.352388606275134, - "learning_rate": 6.666453447404664e-07, - "loss": 0.1455, + "epoch": 1.791160689070884, + "grad_norm": 3.4629253714726933, + "learning_rate": 7.374035073238019e-06, + "loss": 0.4921, "step": 12685 }, { - "epoch": 2.660096456280143, - "grad_norm": 3.7763577046927597, - "learning_rate": 6.658328370283318e-07, - "loss": 0.1566, + "epoch": 1.7913018921208699, + "grad_norm": 3.64892059653003, + "learning_rate": 7.372564150352373e-06, + "loss": 0.5999, "step": 12686 }, { - "epoch": 2.66030614384567, - "grad_norm": 5.383267788256102, - "learning_rate": 6.65020807708453e-07, - "loss": 0.165, + "epoch": 1.7914430951708558, + "grad_norm": 4.007123129596786, + "learning_rate": 7.3710932885272025e-06, + "loss": 0.6305, "step": 12687 }, { - "epoch": 2.6605158314111974, - "grad_norm": 4.0731842092449835, - "learning_rate": 6.642092568224479e-07, - "loss": 0.1476, + "epoch": 1.7915842982208416, + "grad_norm": 4.006839754062654, + "learning_rate": 7.369622487796689e-06, + "loss": 0.7175, "step": 12688 }, { - "epoch": 2.660725518976725, - "grad_norm": 5.148919127682927, - "learning_rate": 6.63398184411912e-07, - "loss": 0.1216, + "epoch": 1.7917255012708275, + "grad_norm": 3.1225747930875065, + "learning_rate": 7.3681517481950115e-06, + "loss": 0.4113, "step": 12689 }, { - "epoch": 2.660935206542252, - "grad_norm": 5.3853521539558225, - "learning_rate": 6.62587590518412e-07, - "loss": 0.195, + "epoch": 1.7918667043208134, + "grad_norm": 3.6881009908552653, + "learning_rate": 7.366681069756352e-06, + "loss": 0.5417, "step": 12690 }, { - "epoch": 2.6611448941077795, - "grad_norm": 3.7112029207200585, - "learning_rate": 6.617774751834926e-07, - "loss": 0.1273, + "epoch": 1.7920079073707993, + "grad_norm": 3.434834778237671, + "learning_rate": 7.365210452514887e-06, + "loss": 0.5915, "step": 12691 }, { - "epoch": 2.6613545816733066, - "grad_norm": 5.6438188086742365, - "learning_rate": 6.609678384486728e-07, - "loss": 0.1895, + "epoch": 1.7921491104207852, + "grad_norm": 3.2771102630452615, + "learning_rate": 7.363739896504795e-06, + "loss": 0.5171, "step": 12692 }, { - "epoch": 2.661564269238834, - "grad_norm": 3.585877251274759, - "learning_rate": 6.601586803554516e-07, - "loss": 0.079, + "epoch": 1.792290313470771, + "grad_norm": 3.3386181979533656, + "learning_rate": 7.362269401760249e-06, + "loss": 0.5369, "step": 12693 }, { - "epoch": 2.6617739568043612, - "grad_norm": 4.7552206384993845, - "learning_rate": 6.593500009452969e-07, - "loss": 0.1839, + "epoch": 1.792431516520757, + "grad_norm": 4.0791853812449945, + "learning_rate": 7.360798968315419e-06, + "loss": 0.6142, "step": 12694 }, { - "epoch": 2.6619836443698888, - "grad_norm": 3.4343884122717214, - "learning_rate": 6.585418002596533e-07, - "loss": 0.0836, + "epoch": 1.7925727195707428, + "grad_norm": 2.901189090695694, + "learning_rate": 7.359328596204485e-06, + "loss": 0.442, "step": 12695 }, { - "epoch": 2.6621933319354163, - "grad_norm": 4.960552437173351, - "learning_rate": 6.577340783399466e-07, - "loss": 0.2143, + "epoch": 1.7927139226207287, + "grad_norm": 3.8608285474008586, + "learning_rate": 7.357858285461612e-06, + "loss": 0.6361, "step": 12696 }, { - "epoch": 2.6624030195009434, - "grad_norm": 5.3369331504361766, - "learning_rate": 6.569268352275693e-07, - "loss": 0.1636, + "epoch": 1.7928551256707146, + "grad_norm": 3.125412164644823, + "learning_rate": 7.356388036120971e-06, + "loss": 0.5036, "step": 12697 }, { - "epoch": 2.662612707066471, - "grad_norm": 5.773745303408101, - "learning_rate": 6.561200709638982e-07, - "loss": 0.2039, + "epoch": 1.7929963287207005, + "grad_norm": 3.918134380335271, + "learning_rate": 7.35491784821673e-06, + "loss": 0.7047, "step": 12698 }, { - "epoch": 2.6628223946319984, - "grad_norm": 4.01504535482958, - "learning_rate": 6.553137855902802e-07, - "loss": 0.1053, + "epoch": 1.7931375317706864, + "grad_norm": 3.573750068360653, + "learning_rate": 7.353447721783058e-06, + "loss": 0.5853, "step": 12699 }, { - "epoch": 2.6630320821975255, - "grad_norm": 3.3015730143836386, - "learning_rate": 6.545079791480369e-07, - "loss": 0.1182, + "epoch": 1.7932787348206722, + "grad_norm": 3.575881732531242, + "learning_rate": 7.351977656854118e-06, + "loss": 0.5195, "step": 12700 }, { - "epoch": 2.663241769763053, - "grad_norm": 3.8974302878975218, - "learning_rate": 6.537026516784672e-07, - "loss": 0.1272, + "epoch": 1.7934199378706581, + "grad_norm": 3.13267908164547, + "learning_rate": 7.350507653464075e-06, + "loss": 0.4429, "step": 12701 }, { - "epoch": 2.6634514573285806, - "grad_norm": 3.4911772367544787, - "learning_rate": 6.528978032228506e-07, - "loss": 0.0909, + "epoch": 1.793561140920644, + "grad_norm": 3.898376231622971, + "learning_rate": 7.349037711647089e-06, + "loss": 0.6227, "step": 12702 }, { - "epoch": 2.6636611448941077, - "grad_norm": 4.311480612882354, - "learning_rate": 6.520934338224305e-07, - "loss": 0.1431, + "epoch": 1.7937023439706299, + "grad_norm": 3.236928022573513, + "learning_rate": 7.347567831437318e-06, + "loss": 0.4249, "step": 12703 }, { - "epoch": 2.663870832459635, - "grad_norm": 4.51776396315153, - "learning_rate": 6.512895435184352e-07, - "loss": 0.1716, + "epoch": 1.7938435470206158, + "grad_norm": 3.7765891546651127, + "learning_rate": 7.346098012868928e-06, + "loss": 0.6913, "step": 12704 }, { - "epoch": 2.6640805200251627, - "grad_norm": 4.578104621472425, - "learning_rate": 6.504861323520673e-07, - "loss": 0.1566, + "epoch": 1.7939847500706017, + "grad_norm": 3.7215752914211824, + "learning_rate": 7.344628255976074e-06, + "loss": 0.4745, "step": 12705 }, { - "epoch": 2.66429020759069, - "grad_norm": 5.210668449304779, - "learning_rate": 6.496832003644993e-07, - "loss": 0.1581, + "epoch": 1.7941259531205875, + "grad_norm": 3.3203337774186856, + "learning_rate": 7.34315856079291e-06, + "loss": 0.51, "step": 12706 }, { - "epoch": 2.6644998951562173, - "grad_norm": 5.803265303991464, - "learning_rate": 6.48880747596885e-07, - "loss": 0.1366, + "epoch": 1.7942671561705734, + "grad_norm": 2.86106333199991, + "learning_rate": 7.341688927353596e-06, + "loss": 0.446, "step": 12707 }, { - "epoch": 2.664709582721745, - "grad_norm": 4.1144914060496065, - "learning_rate": 6.480787740903516e-07, - "loss": 0.141, + "epoch": 1.7944083592205593, + "grad_norm": 3.4183606113008844, + "learning_rate": 7.340219355692284e-06, + "loss": 0.5195, "step": 12708 }, { - "epoch": 2.664919270287272, - "grad_norm": 4.952466455763738, - "learning_rate": 6.472772798859994e-07, - "loss": 0.1361, + "epoch": 1.7945495622705452, + "grad_norm": 3.7909207638006537, + "learning_rate": 7.338749845843127e-06, + "loss": 0.6058, "step": 12709 }, { - "epoch": 2.6651289578527995, - "grad_norm": 4.669109916458371, - "learning_rate": 6.464762650249101e-07, - "loss": 0.1349, + "epoch": 1.794690765320531, + "grad_norm": 3.2915240197073654, + "learning_rate": 7.337280397840272e-06, + "loss": 0.5921, "step": 12710 }, { - "epoch": 2.6653386454183265, - "grad_norm": 3.683386887504382, - "learning_rate": 6.456757295481341e-07, - "loss": 0.1353, + "epoch": 1.794831968370517, + "grad_norm": 3.413674397771417, + "learning_rate": 7.335811011717868e-06, + "loss": 0.4837, "step": 12711 }, { - "epoch": 2.665548332983854, - "grad_norm": 4.820214735219044, - "learning_rate": 6.44875673496701e-07, - "loss": 0.1694, + "epoch": 1.7949731714205028, + "grad_norm": 2.890164757083276, + "learning_rate": 7.334341687510069e-06, + "loss": 0.485, "step": 12712 }, { - "epoch": 2.665758020549381, - "grad_norm": 3.55495293406985, - "learning_rate": 6.440760969116145e-07, - "loss": 0.1116, + "epoch": 1.7951143744704887, + "grad_norm": 3.733623891650265, + "learning_rate": 7.332872425251017e-06, + "loss": 0.6677, "step": 12713 }, { - "epoch": 2.6659677081149087, - "grad_norm": 3.7451704581002034, - "learning_rate": 6.432769998338573e-07, - "loss": 0.1501, + "epoch": 1.7952555775204746, + "grad_norm": 3.340627749826936, + "learning_rate": 7.331403224974859e-06, + "loss": 0.521, "step": 12714 }, { - "epoch": 2.666177395680436, - "grad_norm": 5.135749258720896, - "learning_rate": 6.424783823043812e-07, - "loss": 0.1345, + "epoch": 1.7953967805704605, + "grad_norm": 3.2876621426675388, + "learning_rate": 7.329934086715735e-06, + "loss": 0.4876, "step": 12715 }, { - "epoch": 2.6663870832459633, - "grad_norm": 4.696369053061642, - "learning_rate": 6.416802443641168e-07, - "loss": 0.1583, + "epoch": 1.7955379836204464, + "grad_norm": 3.9149719699556877, + "learning_rate": 7.328465010507791e-06, + "loss": 0.5232, "step": 12716 }, { - "epoch": 2.666596770811491, - "grad_norm": 3.5069320664738832, - "learning_rate": 6.408825860539736e-07, - "loss": 0.1461, + "epoch": 1.795679186670432, + "grad_norm": 4.648901911737773, + "learning_rate": 7.326995996385167e-06, + "loss": 0.7846, "step": 12717 }, { - "epoch": 2.6668064583770184, - "grad_norm": 6.527851261359122, - "learning_rate": 6.400854074148277e-07, - "loss": 0.1388, + "epoch": 1.795820389720418, + "grad_norm": 3.9886244469537693, + "learning_rate": 7.325527044382004e-06, + "loss": 0.6559, "step": 12718 }, { - "epoch": 2.6670161459425454, - "grad_norm": 3.5905993534070486, - "learning_rate": 6.392887084875399e-07, - "loss": 0.1031, + "epoch": 1.7959615927704038, + "grad_norm": 3.405930755157806, + "learning_rate": 7.324058154532436e-06, + "loss": 0.5244, "step": 12719 }, { - "epoch": 2.667225833508073, - "grad_norm": 4.572313198679673, - "learning_rate": 6.384924893129418e-07, - "loss": 0.1564, + "epoch": 1.7961027958203897, + "grad_norm": 2.918366906182609, + "learning_rate": 7.322589326870597e-06, + "loss": 0.433, "step": 12720 }, { - "epoch": 2.6674355210736005, - "grad_norm": 5.669899754180489, - "learning_rate": 6.376967499318387e-07, - "loss": 0.1828, + "epoch": 1.7962439988703756, + "grad_norm": 4.031454695044306, + "learning_rate": 7.3211205614306295e-06, + "loss": 0.6516, "step": 12721 }, { - "epoch": 2.6676452086391276, - "grad_norm": 3.63017708414544, - "learning_rate": 6.369014903850146e-07, - "loss": 0.1203, + "epoch": 1.7963852019203614, + "grad_norm": 3.6081581404825736, + "learning_rate": 7.319651858246661e-06, + "loss": 0.554, "step": 12722 }, { - "epoch": 2.667854896204655, - "grad_norm": 3.5330825384954263, - "learning_rate": 6.361067107132324e-07, - "loss": 0.101, + "epoch": 1.7965264049703473, + "grad_norm": 3.7419144127774366, + "learning_rate": 7.318183217352826e-06, + "loss": 0.5902, "step": 12723 }, { - "epoch": 2.6680645837701826, - "grad_norm": 3.2888326149734115, - "learning_rate": 6.353124109572195e-07, - "loss": 0.1054, + "epoch": 1.7966676080203332, + "grad_norm": 4.742621704869029, + "learning_rate": 7.316714638783257e-06, + "loss": 0.6064, "step": 12724 }, { - "epoch": 2.6682742713357097, - "grad_norm": 4.189764617241636, - "learning_rate": 6.345185911576867e-07, - "loss": 0.1526, + "epoch": 1.796808811070319, + "grad_norm": 4.82273907888184, + "learning_rate": 7.315246122572079e-06, + "loss": 0.8922, "step": 12725 }, { - "epoch": 2.6684839589012372, - "grad_norm": 4.35691479083874, - "learning_rate": 6.337252513553216e-07, - "loss": 0.1482, + "epoch": 1.796950014120305, + "grad_norm": 3.1414286421046795, + "learning_rate": 7.313777668753426e-06, + "loss": 0.5858, "step": 12726 }, { - "epoch": 2.6686936464667648, - "grad_norm": 6.134140266964438, - "learning_rate": 6.329323915907826e-07, - "loss": 0.1883, + "epoch": 1.7970912171702909, + "grad_norm": 2.9337064126836556, + "learning_rate": 7.312309277361415e-06, + "loss": 0.4388, "step": 12727 }, { - "epoch": 2.668903334032292, - "grad_norm": 9.93398649479604, - "learning_rate": 6.321400119047039e-07, - "loss": 0.1593, + "epoch": 1.7972324202202767, + "grad_norm": 4.370925828394352, + "learning_rate": 7.310840948430178e-06, + "loss": 0.5071, "step": 12728 }, { - "epoch": 2.6691130215978194, - "grad_norm": 3.371213532349761, - "learning_rate": 6.313481123376975e-07, - "loss": 0.116, + "epoch": 1.7973736232702626, + "grad_norm": 3.8099790651171417, + "learning_rate": 7.309372681993834e-06, + "loss": 0.5017, "step": 12729 }, { - "epoch": 2.6693227091633465, - "grad_norm": 4.218904502896088, - "learning_rate": 6.305566929303486e-07, - "loss": 0.1059, + "epoch": 1.7975148263202485, + "grad_norm": 3.4049544222213997, + "learning_rate": 7.307904478086507e-06, + "loss": 0.4996, "step": 12730 }, { - "epoch": 2.669532396728874, - "grad_norm": 4.809198942611835, - "learning_rate": 6.297657537232216e-07, - "loss": 0.144, + "epoch": 1.7976560293702344, + "grad_norm": 3.1102297264107914, + "learning_rate": 7.30643633674232e-06, + "loss": 0.4769, "step": 12731 }, { - "epoch": 2.669742084294401, - "grad_norm": 5.84224414795106, - "learning_rate": 6.289752947568506e-07, - "loss": 0.1245, + "epoch": 1.7977972324202203, + "grad_norm": 4.133678593446216, + "learning_rate": 7.304968257995384e-06, + "loss": 0.6377, "step": 12732 }, { - "epoch": 2.6699517718599286, - "grad_norm": 4.716220099170551, - "learning_rate": 6.281853160717477e-07, - "loss": 0.12, + "epoch": 1.7979384354702062, + "grad_norm": 3.590422903533797, + "learning_rate": 7.303500241879826e-06, + "loss": 0.6099, "step": 12733 }, { - "epoch": 2.670161459425456, - "grad_norm": 4.383744220988682, - "learning_rate": 6.273958177084028e-07, - "loss": 0.1442, + "epoch": 1.798079638520192, + "grad_norm": 3.8299392834917074, + "learning_rate": 7.3020322884297565e-06, + "loss": 0.5889, "step": 12734 }, { - "epoch": 2.670371146990983, - "grad_norm": 4.991446289480173, - "learning_rate": 6.266067997072789e-07, - "loss": 0.1545, + "epoch": 1.798220841570178, + "grad_norm": 4.0224886958985415, + "learning_rate": 7.300564397679295e-06, + "loss": 0.6358, "step": 12735 }, { - "epoch": 2.6705808345565107, - "grad_norm": 3.658300588274846, - "learning_rate": 6.258182621088138e-07, - "loss": 0.1404, + "epoch": 1.7983620446201638, + "grad_norm": 3.3980633739495976, + "learning_rate": 7.299096569662549e-06, + "loss": 0.4936, "step": 12736 }, { - "epoch": 2.6707905221220383, - "grad_norm": 3.8698224827336096, - "learning_rate": 6.250302049534207e-07, - "loss": 0.1376, + "epoch": 1.7985032476701497, + "grad_norm": 3.694730452982102, + "learning_rate": 7.29762880441363e-06, + "loss": 0.6171, "step": 12737 }, { - "epoch": 2.6710002096875654, - "grad_norm": 5.400434770282956, - "learning_rate": 6.242426282814906e-07, - "loss": 0.1515, + "epoch": 1.7986444507201356, + "grad_norm": 4.054157452143602, + "learning_rate": 7.296161101966653e-06, + "loss": 0.6675, "step": 12738 }, { - "epoch": 2.671209897253093, - "grad_norm": 4.198740784257216, - "learning_rate": 6.234555321333858e-07, - "loss": 0.1677, + "epoch": 1.7987856537701215, + "grad_norm": 3.137732513471252, + "learning_rate": 7.294693462355722e-06, + "loss": 0.4635, "step": 12739 }, { - "epoch": 2.6714195848186204, - "grad_norm": 3.306817489466332, - "learning_rate": 6.226689165494493e-07, - "loss": 0.1101, + "epoch": 1.7989268568201073, + "grad_norm": 4.106962465112127, + "learning_rate": 7.293225885614948e-06, + "loss": 0.618, "step": 12740 }, { - "epoch": 2.6716292723841475, - "grad_norm": 4.004903447252203, - "learning_rate": 6.218827815699946e-07, - "loss": 0.1272, + "epoch": 1.7990680598700932, + "grad_norm": 3.351001136657417, + "learning_rate": 7.291758371778434e-06, + "loss": 0.5186, "step": 12741 }, { - "epoch": 2.671838959949675, - "grad_norm": 4.6228323884893765, - "learning_rate": 6.210971272353117e-07, - "loss": 0.1407, + "epoch": 1.799209262920079, + "grad_norm": 3.095535584675286, + "learning_rate": 7.290290920880287e-06, + "loss": 0.4581, "step": 12742 }, { - "epoch": 2.6720486475152025, - "grad_norm": 3.642884107118108, - "learning_rate": 6.203119535856683e-07, - "loss": 0.1039, + "epoch": 1.799350465970065, + "grad_norm": 4.8790316638921745, + "learning_rate": 7.288823532954612e-06, + "loss": 0.5173, "step": 12743 }, { - "epoch": 2.6722583350807296, - "grad_norm": 4.644345393985968, - "learning_rate": 6.195272606613079e-07, - "loss": 0.1555, + "epoch": 1.7994916690200509, + "grad_norm": 3.4235412006221924, + "learning_rate": 7.2873562080355045e-06, + "loss": 0.5957, "step": 12744 }, { - "epoch": 2.672468022646257, - "grad_norm": 4.834913009397528, - "learning_rate": 6.187430485024426e-07, - "loss": 0.1531, + "epoch": 1.7996328720700367, + "grad_norm": 3.61441807486885, + "learning_rate": 7.285888946157067e-06, + "loss": 0.6229, "step": 12745 }, { - "epoch": 2.6726777102117847, - "grad_norm": 3.776297970259029, - "learning_rate": 6.179593171492671e-07, - "loss": 0.1471, + "epoch": 1.7997740751200226, + "grad_norm": 3.874448609758326, + "learning_rate": 7.284421747353396e-06, + "loss": 0.6714, "step": 12746 }, { - "epoch": 2.6728873977773118, - "grad_norm": 4.369053425172367, - "learning_rate": 6.171760666419491e-07, - "loss": 0.1271, + "epoch": 1.7999152781700085, + "grad_norm": 3.1205731565476236, + "learning_rate": 7.282954611658593e-06, + "loss": 0.4913, "step": 12747 }, { - "epoch": 2.6730970853428393, - "grad_norm": 5.215239506788188, - "learning_rate": 6.163932970206333e-07, - "loss": 0.1631, + "epoch": 1.8000564812199944, + "grad_norm": 3.6368251397556226, + "learning_rate": 7.281487539106752e-06, + "loss": 0.3863, "step": 12748 }, { - "epoch": 2.6733067729083664, - "grad_norm": 4.809793509421329, - "learning_rate": 6.15611008325433e-07, - "loss": 0.1637, + "epoch": 1.8001976842699803, + "grad_norm": 4.237562202468708, + "learning_rate": 7.2800205297319635e-06, + "loss": 0.5885, "step": 12749 }, { - "epoch": 2.673516460473894, - "grad_norm": 4.483612475357762, - "learning_rate": 6.148292005964451e-07, - "loss": 0.1445, + "epoch": 1.8003388873199662, + "grad_norm": 3.9165445804160797, + "learning_rate": 7.278553583568326e-06, + "loss": 0.7061, "step": 12750 }, { - "epoch": 2.673726148039421, - "grad_norm": 5.147480928074263, - "learning_rate": 6.140478738737409e-07, - "loss": 0.1354, + "epoch": 1.800480090369952, + "grad_norm": 3.9708163610009493, + "learning_rate": 7.277086700649929e-06, + "loss": 0.5641, "step": 12751 }, { - "epoch": 2.6739358356049485, - "grad_norm": 4.156572352274291, - "learning_rate": 6.132670281973618e-07, - "loss": 0.144, + "epoch": 1.8006212934199377, + "grad_norm": 3.708905010268396, + "learning_rate": 7.275619881010861e-06, + "loss": 0.4969, "step": 12752 }, { - "epoch": 2.674145523170476, - "grad_norm": 4.906318160000465, - "learning_rate": 6.124866636073257e-07, - "loss": 0.1418, + "epoch": 1.8007624964699236, + "grad_norm": 2.8383631044211186, + "learning_rate": 7.27415312468521e-06, + "loss": 0.4671, "step": 12753 }, { - "epoch": 2.674355210736003, - "grad_norm": 4.469233705111311, - "learning_rate": 6.117067801436327e-07, - "loss": 0.1651, + "epoch": 1.8009036995199095, + "grad_norm": 3.268540859762566, + "learning_rate": 7.27268643170706e-06, + "loss": 0.4352, "step": 12754 }, { - "epoch": 2.6745648983015307, - "grad_norm": 3.8880825248755406, - "learning_rate": 6.109273778462477e-07, - "loss": 0.1033, + "epoch": 1.8010449025698954, + "grad_norm": 3.8921672134913607, + "learning_rate": 7.271219802110503e-06, + "loss": 0.6199, "step": 12755 }, { - "epoch": 2.674774585867058, - "grad_norm": 3.4653958656996733, - "learning_rate": 6.101484567551208e-07, - "loss": 0.1273, + "epoch": 1.8011861056198812, + "grad_norm": 3.335159707890712, + "learning_rate": 7.269753235929617e-06, + "loss": 0.5978, "step": 12756 }, { - "epoch": 2.6749842734325853, - "grad_norm": 3.8904701515917064, - "learning_rate": 6.093700169101724e-07, - "loss": 0.1174, + "epoch": 1.8013273086698671, + "grad_norm": 3.6669913236960343, + "learning_rate": 7.268286733198488e-06, + "loss": 0.5357, "step": 12757 }, { - "epoch": 2.675193960998113, - "grad_norm": 4.025990802211541, - "learning_rate": 6.08592058351295e-07, - "loss": 0.1202, + "epoch": 1.801468511719853, + "grad_norm": 3.105498746146766, + "learning_rate": 7.2668202939511946e-06, + "loss": 0.4947, "step": 12758 }, { - "epoch": 2.6754036485636403, - "grad_norm": 3.784841784852634, - "learning_rate": 6.078145811183645e-07, - "loss": 0.1089, + "epoch": 1.8016097147698389, + "grad_norm": 3.744543631961202, + "learning_rate": 7.265353918221818e-06, + "loss": 0.5395, "step": 12759 }, { - "epoch": 2.6756133361291674, - "grad_norm": 6.200025015982894, - "learning_rate": 6.070375852512267e-07, - "loss": 0.1646, + "epoch": 1.8017509178198248, + "grad_norm": 3.4286925834638184, + "learning_rate": 7.263887606044437e-06, + "loss": 0.494, "step": 12760 }, { - "epoch": 2.675823023694695, - "grad_norm": 4.557343211336646, - "learning_rate": 6.062610707897054e-07, - "loss": 0.1013, + "epoch": 1.8018921208698107, + "grad_norm": 4.277088564299397, + "learning_rate": 7.262421357453126e-06, + "loss": 0.6537, "step": 12761 }, { - "epoch": 2.6760327112602225, - "grad_norm": 4.492603514523045, - "learning_rate": 6.054850377735943e-07, - "loss": 0.1446, + "epoch": 1.8020333239197965, + "grad_norm": 3.773746061883331, + "learning_rate": 7.260955172481959e-06, + "loss": 0.5608, "step": 12762 }, { - "epoch": 2.6762423988257495, - "grad_norm": 5.19013901288648, - "learning_rate": 6.047094862426706e-07, - "loss": 0.137, + "epoch": 1.8021745269697824, + "grad_norm": 4.028856070380123, + "learning_rate": 7.2594890511650095e-06, + "loss": 0.5824, "step": 12763 }, { - "epoch": 2.676452086391277, - "grad_norm": 3.906319130881846, - "learning_rate": 6.039344162366812e-07, - "loss": 0.13, + "epoch": 1.8023157300197683, + "grad_norm": 3.9433985389919854, + "learning_rate": 7.258022993536352e-06, + "loss": 0.5229, "step": 12764 }, { - "epoch": 2.6766617739568046, - "grad_norm": 5.2150404717990755, - "learning_rate": 6.031598277953488e-07, - "loss": 0.1175, + "epoch": 1.8024569330697542, + "grad_norm": 4.015588004241889, + "learning_rate": 7.2565569996300575e-06, + "loss": 0.7205, "step": 12765 }, { - "epoch": 2.6768714615223317, - "grad_norm": 4.797787609990199, - "learning_rate": 6.023857209583739e-07, - "loss": 0.1495, + "epoch": 1.80259813611974, + "grad_norm": 3.987801265229566, + "learning_rate": 7.2550910694801905e-06, + "loss": 0.5594, "step": 12766 }, { - "epoch": 2.677081149087859, - "grad_norm": 4.897544976879718, - "learning_rate": 6.016120957654281e-07, - "loss": 0.1622, + "epoch": 1.802739339169726, + "grad_norm": 4.186581520816828, + "learning_rate": 7.253625203120823e-06, + "loss": 0.572, "step": 12767 }, { - "epoch": 2.6772908366533863, - "grad_norm": 3.597809091248149, - "learning_rate": 6.008389522561653e-07, - "loss": 0.1368, + "epoch": 1.8028805422197118, + "grad_norm": 3.4068339951964326, + "learning_rate": 7.2521594005860205e-06, + "loss": 0.5226, "step": 12768 }, { - "epoch": 2.677500524218914, - "grad_norm": 5.400485795253416, - "learning_rate": 6.000662904702081e-07, - "loss": 0.1931, + "epoch": 1.8030217452696977, + "grad_norm": 3.4934247803673513, + "learning_rate": 7.2506936619098486e-06, + "loss": 0.6156, "step": 12769 }, { - "epoch": 2.677710211784441, - "grad_norm": 4.042867120557339, - "learning_rate": 5.992941104471562e-07, - "loss": 0.1386, + "epoch": 1.8031629483196836, + "grad_norm": 3.6421779793365947, + "learning_rate": 7.249227987126368e-06, + "loss": 0.4641, "step": 12770 }, { - "epoch": 2.6779198993499684, - "grad_norm": 3.124741202754535, - "learning_rate": 5.985224122265854e-07, - "loss": 0.1037, + "epoch": 1.8033041513696695, + "grad_norm": 4.659846481647254, + "learning_rate": 7.247762376269638e-06, + "loss": 0.6729, "step": 12771 }, { - "epoch": 2.678129586915496, - "grad_norm": 4.703141696993759, - "learning_rate": 5.977511958480475e-07, - "loss": 0.1326, + "epoch": 1.8034453544196554, + "grad_norm": 3.6451761548937327, + "learning_rate": 7.246296829373723e-06, + "loss": 0.5617, "step": 12772 }, { - "epoch": 2.678339274481023, - "grad_norm": 3.5240236315409783, - "learning_rate": 5.969804613510688e-07, - "loss": 0.1021, + "epoch": 1.8035865574696412, + "grad_norm": 4.270266678295027, + "learning_rate": 7.2448313464726805e-06, + "loss": 0.6425, "step": 12773 }, { - "epoch": 2.6785489620465506, - "grad_norm": 6.268208339195901, - "learning_rate": 5.962102087751487e-07, - "loss": 0.1636, + "epoch": 1.8037277605196271, + "grad_norm": 3.745646676164853, + "learning_rate": 7.24336592760057e-06, + "loss": 0.5797, "step": 12774 }, { - "epoch": 2.678758649612078, - "grad_norm": 4.3717317607729935, - "learning_rate": 5.954404381597678e-07, - "loss": 0.1277, + "epoch": 1.803868963569613, + "grad_norm": 3.3680875064801157, + "learning_rate": 7.2419005727914405e-06, + "loss": 0.4409, "step": 12775 }, { - "epoch": 2.678968337177605, - "grad_norm": 5.739817259074334, - "learning_rate": 5.946711495443735e-07, - "loss": 0.1535, + "epoch": 1.804010166619599, + "grad_norm": 4.343770900463015, + "learning_rate": 7.240435282079352e-06, + "loss": 0.5915, "step": 12776 }, { - "epoch": 2.6791780247431327, - "grad_norm": 4.049907933444462, - "learning_rate": 5.939023429683965e-07, - "loss": 0.1112, + "epoch": 1.8041513696695848, + "grad_norm": 3.82625734261806, + "learning_rate": 7.23897005549836e-06, + "loss": 0.6282, "step": 12777 }, { - "epoch": 2.6793877123086602, - "grad_norm": 3.5271443630655255, - "learning_rate": 5.931340184712387e-07, - "loss": 0.1149, + "epoch": 1.8042925727195707, + "grad_norm": 3.311083247848096, + "learning_rate": 7.237504893082507e-06, + "loss": 0.5909, "step": 12778 }, { - "epoch": 2.6795973998741873, - "grad_norm": 3.9077488641426994, - "learning_rate": 5.923661760922772e-07, - "loss": 0.1387, + "epoch": 1.8044337757695565, + "grad_norm": 3.6813289229231843, + "learning_rate": 7.23603979486585e-06, + "loss": 0.6215, "step": 12779 }, { - "epoch": 2.679807087439715, - "grad_norm": 3.5995636807479054, - "learning_rate": 5.915988158708652e-07, - "loss": 0.1163, + "epoch": 1.8045749788195424, + "grad_norm": 4.074242640755421, + "learning_rate": 7.234574760882431e-06, + "loss": 0.6597, "step": 12780 }, { - "epoch": 2.6800167750052424, - "grad_norm": 4.515959490940363, - "learning_rate": 5.908319378463345e-07, - "loss": 0.1526, + "epoch": 1.8047161818695283, + "grad_norm": 3.153750295800099, + "learning_rate": 7.233109791166302e-06, + "loss": 0.4488, "step": 12781 }, { - "epoch": 2.6802264625707695, - "grad_norm": 3.6460911096116213, - "learning_rate": 5.900655420579848e-07, - "loss": 0.1096, + "epoch": 1.8048573849195142, + "grad_norm": 3.67938824784539, + "learning_rate": 7.2316448857515076e-06, + "loss": 0.4815, "step": 12782 }, { - "epoch": 2.680436150136297, - "grad_norm": 4.64507960612798, - "learning_rate": 5.892996285450958e-07, - "loss": 0.1556, + "epoch": 1.8049985879695, + "grad_norm": 3.3171524152038545, + "learning_rate": 7.230180044672088e-06, + "loss": 0.4601, "step": 12783 }, { - "epoch": 2.6806458377018245, - "grad_norm": 5.354660174517429, - "learning_rate": 5.885341973469239e-07, - "loss": 0.1343, + "epoch": 1.805139791019486, + "grad_norm": 3.4718927648950526, + "learning_rate": 7.22871526796209e-06, + "loss": 0.4943, "step": 12784 }, { - "epoch": 2.6808555252673516, - "grad_norm": 4.0555097606634165, - "learning_rate": 5.877692485026986e-07, - "loss": 0.1192, + "epoch": 1.8052809940694718, + "grad_norm": 3.283474010684326, + "learning_rate": 7.2272505556555515e-06, + "loss": 0.4923, "step": 12785 }, { - "epoch": 2.681065212832879, - "grad_norm": 4.209022904040481, - "learning_rate": 5.870047820516222e-07, - "loss": 0.0976, + "epoch": 1.8054221971194577, + "grad_norm": 3.2633641780529987, + "learning_rate": 7.225785907786516e-06, + "loss": 0.4553, "step": 12786 }, { - "epoch": 2.681274900398406, - "grad_norm": 4.025159569595992, - "learning_rate": 5.862407980328788e-07, - "loss": 0.1392, + "epoch": 1.8055634001694436, + "grad_norm": 3.537788569823869, + "learning_rate": 7.2243213243890146e-06, + "loss": 0.4854, "step": 12787 }, { - "epoch": 2.6814845879639337, - "grad_norm": 4.091986784021727, - "learning_rate": 5.854772964856192e-07, - "loss": 0.1421, + "epoch": 1.8057046032194295, + "grad_norm": 4.100864459972184, + "learning_rate": 7.222856805497087e-06, + "loss": 0.6192, "step": 12788 }, { - "epoch": 2.6816942755294613, - "grad_norm": 3.5470673576811294, - "learning_rate": 5.847142774489789e-07, - "loss": 0.1271, + "epoch": 1.8058458062694154, + "grad_norm": 2.3592471401073594, + "learning_rate": 7.221392351144767e-06, + "loss": 0.3515, "step": 12789 }, { - "epoch": 2.6819039630949884, - "grad_norm": 3.55769407996581, - "learning_rate": 5.839517409620621e-07, - "loss": 0.1004, + "epoch": 1.8059870093194013, + "grad_norm": 3.0962669278812935, + "learning_rate": 7.219927961366091e-06, + "loss": 0.5731, "step": 12790 }, { - "epoch": 2.682113650660516, - "grad_norm": 3.7104387138851522, - "learning_rate": 5.831896870639475e-07, - "loss": 0.1103, + "epoch": 1.8061282123693871, + "grad_norm": 3.506394869642217, + "learning_rate": 7.218463636195088e-06, + "loss": 0.5345, "step": 12791 }, { - "epoch": 2.682323338226043, - "grad_norm": 3.469020609432843, - "learning_rate": 5.82428115793695e-07, - "loss": 0.1193, + "epoch": 1.806269415419373, + "grad_norm": 3.3156456595344657, + "learning_rate": 7.216999375665785e-06, + "loss": 0.509, "step": 12792 }, { - "epoch": 2.6825330257915705, - "grad_norm": 4.22258198449591, - "learning_rate": 5.816670271903369e-07, - "loss": 0.1549, + "epoch": 1.806410618469359, + "grad_norm": 3.6154694903700886, + "learning_rate": 7.215535179812219e-06, + "loss": 0.5022, "step": 12793 }, { - "epoch": 2.682742713357098, - "grad_norm": 4.228763201405874, - "learning_rate": 5.809064212928783e-07, - "loss": 0.1227, + "epoch": 1.8065518215193448, + "grad_norm": 4.283454322570586, + "learning_rate": 7.214071048668414e-06, + "loss": 0.7146, "step": 12794 }, { - "epoch": 2.682952400922625, - "grad_norm": 3.5002008437960215, - "learning_rate": 5.801462981403017e-07, - "loss": 0.1132, + "epoch": 1.8066930245693307, + "grad_norm": 3.219941638795533, + "learning_rate": 7.212606982268393e-06, + "loss": 0.4696, "step": 12795 }, { - "epoch": 2.6831620884881526, - "grad_norm": 6.756367338151461, - "learning_rate": 5.793866577715657e-07, - "loss": 0.172, + "epoch": 1.8068342276193166, + "grad_norm": 3.496864381648772, + "learning_rate": 7.211142980646182e-06, + "loss": 0.5463, "step": 12796 }, { - "epoch": 2.68337177605368, - "grad_norm": 4.344034258328609, - "learning_rate": 5.786275002256025e-07, - "loss": 0.1567, + "epoch": 1.8069754306693024, + "grad_norm": 3.449938999060595, + "learning_rate": 7.209679043835801e-06, + "loss": 0.5483, "step": 12797 }, { - "epoch": 2.6835814636192072, - "grad_norm": 4.408965548937051, - "learning_rate": 5.778688255413212e-07, - "loss": 0.1343, + "epoch": 1.8071166337192883, + "grad_norm": 3.5003768889564197, + "learning_rate": 7.208215171871277e-06, + "loss": 0.5336, "step": 12798 }, { - "epoch": 2.6837911511847348, - "grad_norm": 4.07839422563627, - "learning_rate": 5.771106337576038e-07, - "loss": 0.1454, + "epoch": 1.8072578367692742, + "grad_norm": 3.2777859876847995, + "learning_rate": 7.206751364786626e-06, + "loss": 0.4789, "step": 12799 }, { - "epoch": 2.6840008387502623, - "grad_norm": 5.8816202036413205, - "learning_rate": 5.763529249133093e-07, - "loss": 0.1507, + "epoch": 1.80739903981926, + "grad_norm": 3.285162195889472, + "learning_rate": 7.205287622615866e-06, + "loss": 0.6362, "step": 12800 }, { - "epoch": 2.6842105263157894, - "grad_norm": 4.86602304472139, - "learning_rate": 5.755956990472711e-07, - "loss": 0.1468, + "epoch": 1.807540242869246, + "grad_norm": 3.044489379421284, + "learning_rate": 7.203823945393015e-06, + "loss": 0.4519, "step": 12801 }, { - "epoch": 2.684420213881317, - "grad_norm": 3.500719007491239, - "learning_rate": 5.748389561983015e-07, - "loss": 0.1136, + "epoch": 1.8076814459192319, + "grad_norm": 3.625506880589648, + "learning_rate": 7.20236033315209e-06, + "loss": 0.5507, "step": 12802 }, { - "epoch": 2.6846299014468444, - "grad_norm": 4.989813071866303, - "learning_rate": 5.740826964051805e-07, - "loss": 0.1539, + "epoch": 1.8078226489692177, + "grad_norm": 3.620976522708403, + "learning_rate": 7.200896785927102e-06, + "loss": 0.4848, "step": 12803 }, { - "epoch": 2.6848395890123715, - "grad_norm": 4.654407712606749, - "learning_rate": 5.733269197066693e-07, - "loss": 0.1429, + "epoch": 1.8079638520192036, + "grad_norm": 4.149646574664961, + "learning_rate": 7.199433303752064e-06, + "loss": 0.6335, "step": 12804 }, { - "epoch": 2.685049276577899, - "grad_norm": 4.5511256857098035, - "learning_rate": 5.72571626141506e-07, - "loss": 0.1456, + "epoch": 1.8081050550691895, + "grad_norm": 3.8456707744668672, + "learning_rate": 7.1979698866609845e-06, + "loss": 0.6142, "step": 12805 }, { - "epoch": 2.685258964143426, - "grad_norm": 3.4385160112379682, - "learning_rate": 5.71816815748396e-07, - "loss": 0.1246, + "epoch": 1.8082462581191754, + "grad_norm": 3.4916951947600867, + "learning_rate": 7.196506534687875e-06, + "loss": 0.5288, "step": 12806 }, { - "epoch": 2.6854686517089537, - "grad_norm": 3.699521419193321, - "learning_rate": 5.710624885660265e-07, - "loss": 0.098, + "epoch": 1.8083874611691613, + "grad_norm": 3.550521145222937, + "learning_rate": 7.195043247866744e-06, + "loss": 0.4856, "step": 12807 }, { - "epoch": 2.685678339274481, - "grad_norm": 5.1769483042255375, - "learning_rate": 5.703086446330585e-07, - "loss": 0.1509, + "epoch": 1.8085286642191472, + "grad_norm": 3.9811455053795552, + "learning_rate": 7.193580026231596e-06, + "loss": 0.5725, "step": 12808 }, { - "epoch": 2.6858880268400083, - "grad_norm": 3.9930895513634233, - "learning_rate": 5.695552839881268e-07, - "loss": 0.1271, + "epoch": 1.808669867269133, + "grad_norm": 3.780150826519457, + "learning_rate": 7.192116869816434e-06, + "loss": 0.6369, "step": 12809 }, { - "epoch": 2.686097714405536, - "grad_norm": 4.952225650666098, - "learning_rate": 5.688024066698439e-07, - "loss": 0.1403, + "epoch": 1.808811070319119, + "grad_norm": 3.529892229354472, + "learning_rate": 7.190653778655267e-06, + "loss": 0.5214, "step": 12810 }, { - "epoch": 2.686307401971063, - "grad_norm": 5.121689585926485, - "learning_rate": 5.680500127167943e-07, - "loss": 0.1411, + "epoch": 1.8089522733691048, + "grad_norm": 3.053511670228956, + "learning_rate": 7.1891907527820935e-06, + "loss": 0.4901, "step": 12811 }, { - "epoch": 2.6865170895365904, - "grad_norm": 5.195136479841332, - "learning_rate": 5.672981021675417e-07, - "loss": 0.1624, + "epoch": 1.8090934764190907, + "grad_norm": 3.5511600211171794, + "learning_rate": 7.187727792230912e-06, + "loss": 0.5284, "step": 12812 }, { - "epoch": 2.686726777102118, - "grad_norm": 5.398556005844397, - "learning_rate": 5.665466750606196e-07, - "loss": 0.1826, + "epoch": 1.8092346794690766, + "grad_norm": 2.869117548534986, + "learning_rate": 7.186264897035722e-06, + "loss": 0.373, "step": 12813 }, { - "epoch": 2.686936464667645, - "grad_norm": 4.575079031151572, - "learning_rate": 5.657957314345441e-07, - "loss": 0.136, + "epoch": 1.8093758825190625, + "grad_norm": 3.6839313224243826, + "learning_rate": 7.184802067230518e-06, + "loss": 0.5474, "step": 12814 }, { - "epoch": 2.6871461522331725, - "grad_norm": 4.432476278777013, - "learning_rate": 5.650452713277998e-07, - "loss": 0.133, + "epoch": 1.8095170855690483, + "grad_norm": 3.8759859764280913, + "learning_rate": 7.1833393028492996e-06, + "loss": 0.5487, "step": 12815 }, { - "epoch": 2.6873558397987, - "grad_norm": 3.3534408261339337, - "learning_rate": 5.642952947788483e-07, - "loss": 0.0958, + "epoch": 1.8096582886190342, + "grad_norm": 3.010458671370947, + "learning_rate": 7.18187660392606e-06, + "loss": 0.481, "step": 12816 }, { - "epoch": 2.687565527364227, - "grad_norm": 6.880326841928252, - "learning_rate": 5.635458018261286e-07, - "loss": 0.0986, + "epoch": 1.80979949166902, + "grad_norm": 3.432457030780017, + "learning_rate": 7.1804139704947904e-06, + "loss": 0.4258, "step": 12817 }, { - "epoch": 2.6877752149297547, - "grad_norm": 4.499830380610673, - "learning_rate": 5.627967925080535e-07, - "loss": 0.1437, + "epoch": 1.809940694719006, + "grad_norm": 3.4021210063784864, + "learning_rate": 7.178951402589482e-06, + "loss": 0.5436, "step": 12818 }, { - "epoch": 2.687984902495282, - "grad_norm": 4.553806998262058, - "learning_rate": 5.620482668630123e-07, - "loss": 0.1292, + "epoch": 1.8100818977689919, + "grad_norm": 3.4804388177646066, + "learning_rate": 7.17748890024413e-06, + "loss": 0.5784, "step": 12819 }, { - "epoch": 2.6881945900608093, - "grad_norm": 4.025919344498264, - "learning_rate": 5.613002249293642e-07, - "loss": 0.1542, + "epoch": 1.8102231008189777, + "grad_norm": 3.5972704113643683, + "learning_rate": 7.176026463492711e-06, + "loss": 0.5205, "step": 12820 }, { - "epoch": 2.688404277626337, - "grad_norm": 5.257580708811834, - "learning_rate": 5.605526667454519e-07, - "loss": 0.1421, + "epoch": 1.8103643038689636, + "grad_norm": 3.5505622583408862, + "learning_rate": 7.17456409236922e-06, + "loss": 0.5644, "step": 12821 }, { - "epoch": 2.6886139651918644, - "grad_norm": 3.9473085604016718, - "learning_rate": 5.598055923495859e-07, - "loss": 0.1185, + "epoch": 1.8105055069189495, + "grad_norm": 3.2008258259016173, + "learning_rate": 7.173101786907638e-06, + "loss": 0.4622, "step": 12822 }, { - "epoch": 2.6888236527573914, - "grad_norm": 4.085600488961794, - "learning_rate": 5.590590017800578e-07, - "loss": 0.1379, + "epoch": 1.8106467099689354, + "grad_norm": 3.5318862895721104, + "learning_rate": 7.171639547141949e-06, + "loss": 0.5232, "step": 12823 }, { - "epoch": 2.689033340322919, - "grad_norm": 5.6836744464965925, - "learning_rate": 5.583128950751304e-07, - "loss": 0.1305, + "epoch": 1.8107879130189213, + "grad_norm": 3.4537266409057246, + "learning_rate": 7.170177373106135e-06, + "loss": 0.4863, "step": 12824 }, { - "epoch": 2.6892430278884465, - "grad_norm": 3.81804350719673, - "learning_rate": 5.575672722730419e-07, - "loss": 0.1348, + "epoch": 1.8109291160689072, + "grad_norm": 4.4507994291648245, + "learning_rate": 7.168715264834178e-06, + "loss": 0.679, "step": 12825 }, { - "epoch": 2.6894527154539736, - "grad_norm": 3.356325668359058, - "learning_rate": 5.568221334120083e-07, - "loss": 0.1257, + "epoch": 1.811070319118893, + "grad_norm": 4.387622575291305, + "learning_rate": 7.167253222360056e-06, + "loss": 0.6606, "step": 12826 }, { - "epoch": 2.689662403019501, - "grad_norm": 4.394239455814096, - "learning_rate": 5.560774785302192e-07, - "loss": 0.1522, + "epoch": 1.811211522168879, + "grad_norm": 3.6084162764227705, + "learning_rate": 7.165791245717745e-06, + "loss": 0.5252, "step": 12827 }, { - "epoch": 2.689872090585028, - "grad_norm": 4.575388451390649, - "learning_rate": 5.553333076658374e-07, - "loss": 0.1185, + "epoch": 1.8113527252188648, + "grad_norm": 3.909371690224482, + "learning_rate": 7.164329334941225e-06, + "loss": 0.5539, "step": 12828 }, { - "epoch": 2.6900817781505557, - "grad_norm": 3.303928367948205, - "learning_rate": 5.545896208570045e-07, - "loss": 0.1082, + "epoch": 1.8114939282688507, + "grad_norm": 3.761701722849796, + "learning_rate": 7.162867490064463e-06, + "loss": 0.5801, "step": 12829 }, { - "epoch": 2.690291465716083, - "grad_norm": 3.1300046728189206, - "learning_rate": 5.538464181418379e-07, - "loss": 0.0983, + "epoch": 1.8116351313188366, + "grad_norm": 4.289497338165475, + "learning_rate": 7.161405711121436e-06, + "loss": 0.6557, "step": 12830 }, { - "epoch": 2.6905011532816103, - "grad_norm": 4.059315491534546, - "learning_rate": 5.531036995584249e-07, - "loss": 0.138, + "epoch": 1.8117763343688225, + "grad_norm": 3.8987627290864264, + "learning_rate": 7.159943998146114e-06, + "loss": 0.6136, "step": 12831 }, { - "epoch": 2.690710840847138, - "grad_norm": 4.377515590100833, - "learning_rate": 5.523614651448317e-07, - "loss": 0.1374, + "epoch": 1.8119175374188083, + "grad_norm": 3.884263279689895, + "learning_rate": 7.158482351172465e-06, + "loss": 0.5894, "step": 12832 }, { - "epoch": 2.690920528412665, - "grad_norm": 4.4728844546941, - "learning_rate": 5.516197149391e-07, - "loss": 0.1153, + "epoch": 1.8120587404687942, + "grad_norm": 4.098844885359488, + "learning_rate": 7.157020770234462e-06, + "loss": 0.6347, "step": 12833 }, { - "epoch": 2.6911302159781925, - "grad_norm": 4.05538839842716, - "learning_rate": 5.508784489792451e-07, - "loss": 0.1164, + "epoch": 1.81219994351878, + "grad_norm": 3.408974804574832, + "learning_rate": 7.155559255366067e-06, + "loss": 0.5712, "step": 12834 }, { - "epoch": 2.69133990354372, - "grad_norm": 4.944304712714176, - "learning_rate": 5.501376673032587e-07, - "loss": 0.1545, + "epoch": 1.812341146568766, + "grad_norm": 3.794150336662539, + "learning_rate": 7.154097806601246e-06, + "loss": 0.5948, "step": 12835 }, { - "epoch": 2.691549591109247, - "grad_norm": 3.770859336744159, - "learning_rate": 5.493973699491073e-07, - "loss": 0.1195, + "epoch": 1.8124823496187519, + "grad_norm": 3.3815157301751766, + "learning_rate": 7.152636423973969e-06, + "loss": 0.5855, "step": 12836 }, { - "epoch": 2.6917592786747746, - "grad_norm": 3.223311776695362, - "learning_rate": 5.486575569547303e-07, - "loss": 0.0854, + "epoch": 1.8126235526687378, + "grad_norm": 3.493423415677898, + "learning_rate": 7.151175107518185e-06, + "loss": 0.5609, "step": 12837 }, { - "epoch": 2.691968966240302, - "grad_norm": 4.158575029817456, - "learning_rate": 5.479182283580453e-07, - "loss": 0.1228, + "epoch": 1.8127647557187236, + "grad_norm": 3.4237426949607115, + "learning_rate": 7.149713857267862e-06, + "loss": 0.5316, "step": 12838 }, { - "epoch": 2.692178653805829, - "grad_norm": 4.733046280896581, - "learning_rate": 5.471793841969475e-07, - "loss": 0.1618, + "epoch": 1.8129059587687095, + "grad_norm": 3.3766753050264846, + "learning_rate": 7.148252673256959e-06, + "loss": 0.4548, "step": 12839 }, { - "epoch": 2.6923883413713567, - "grad_norm": 3.862882112461269, - "learning_rate": 5.46441024509301e-07, - "loss": 0.1417, + "epoch": 1.8130471618186954, + "grad_norm": 3.02548650481774, + "learning_rate": 7.146791555519431e-06, + "loss": 0.4517, "step": 12840 }, { - "epoch": 2.6925980289368843, - "grad_norm": 5.5670852325662326, - "learning_rate": 5.457031493329467e-07, - "loss": 0.169, + "epoch": 1.8131883648686813, + "grad_norm": 3.170876950611142, + "learning_rate": 7.145330504089236e-06, + "loss": 0.4711, "step": 12841 }, { - "epoch": 2.6928077165024114, - "grad_norm": 4.034730287070835, - "learning_rate": 5.449657587057045e-07, - "loss": 0.1325, + "epoch": 1.8133295679186672, + "grad_norm": 4.212236301973642, + "learning_rate": 7.143869519000328e-06, + "loss": 0.7011, "step": 12842 }, { - "epoch": 2.693017404067939, - "grad_norm": 5.410612560818423, - "learning_rate": 5.442288526653672e-07, - "loss": 0.1788, + "epoch": 1.813470770968653, + "grad_norm": 3.261385322319014, + "learning_rate": 7.14240860028666e-06, + "loss": 0.4848, "step": 12843 }, { - "epoch": 2.6932270916334664, - "grad_norm": 3.6000222066663747, - "learning_rate": 5.434924312496992e-07, - "loss": 0.1164, + "epoch": 1.813611974018639, + "grad_norm": 3.2961521373568874, + "learning_rate": 7.140947747982179e-06, + "loss": 0.4457, "step": 12844 }, { - "epoch": 2.6934367791989935, - "grad_norm": 4.131473401384019, - "learning_rate": 5.427564944964481e-07, - "loss": 0.144, + "epoch": 1.8137531770686248, + "grad_norm": 3.1731788247630495, + "learning_rate": 7.139486962120841e-06, + "loss": 0.4463, "step": 12845 }, { - "epoch": 2.693646466764521, - "grad_norm": 3.5502653029057347, - "learning_rate": 5.420210424433269e-07, - "loss": 0.1273, + "epoch": 1.8138943801186107, + "grad_norm": 4.592416823032784, + "learning_rate": 7.1380262427365885e-06, + "loss": 0.6016, "step": 12846 }, { - "epoch": 2.693856154330048, - "grad_norm": 4.313079841865814, - "learning_rate": 5.412860751280324e-07, - "loss": 0.1164, + "epoch": 1.8140355831685966, + "grad_norm": 3.1959642312972614, + "learning_rate": 7.136565589863371e-06, + "loss": 0.4942, "step": 12847 }, { - "epoch": 2.6940658418955756, - "grad_norm": 4.19684441087359, - "learning_rate": 5.405515925882332e-07, - "loss": 0.126, + "epoch": 1.8141767862185825, + "grad_norm": 3.8194520166813, + "learning_rate": 7.135105003535132e-06, + "loss": 0.6647, "step": 12848 }, { - "epoch": 2.6942755294611027, - "grad_norm": 4.2494456077744704, - "learning_rate": 5.398175948615692e-07, - "loss": 0.1489, + "epoch": 1.8143179892685684, + "grad_norm": 4.117756177907205, + "learning_rate": 7.133644483785814e-06, + "loss": 0.6815, "step": 12849 }, { - "epoch": 2.6944852170266302, - "grad_norm": 4.390633000272372, - "learning_rate": 5.390840819856613e-07, - "loss": 0.1167, + "epoch": 1.8144591923185542, + "grad_norm": 3.4438750250954193, + "learning_rate": 7.1321840306493625e-06, + "loss": 0.5341, "step": 12850 }, { - "epoch": 2.6946949045921578, - "grad_norm": 3.465133406252074, - "learning_rate": 5.383510539981052e-07, - "loss": 0.137, + "epoch": 1.8146003953685401, + "grad_norm": 3.9340419976386753, + "learning_rate": 7.130723644159715e-06, + "loss": 0.6784, "step": 12851 }, { - "epoch": 2.694904592157685, - "grad_norm": 4.228916769183762, - "learning_rate": 5.376185109364673e-07, - "loss": 0.1255, + "epoch": 1.814741598418526, + "grad_norm": 5.181105243087843, + "learning_rate": 7.129263324350808e-06, + "loss": 0.812, "step": 12852 }, { - "epoch": 2.6951142797232124, - "grad_norm": 4.289390887741756, - "learning_rate": 5.368864528382922e-07, - "loss": 0.1704, + "epoch": 1.8148828014685119, + "grad_norm": 3.24221118849084, + "learning_rate": 7.1278030712565896e-06, + "loss": 0.5055, "step": 12853 }, { - "epoch": 2.69532396728874, - "grad_norm": 4.242634837374145, - "learning_rate": 5.361548797410998e-07, - "loss": 0.1114, + "epoch": 1.8150240045184975, + "grad_norm": 4.095749801217945, + "learning_rate": 7.1263428849109805e-06, + "loss": 0.6455, "step": 12854 }, { - "epoch": 2.695533654854267, - "grad_norm": 4.071015545282936, - "learning_rate": 5.354237916823823e-07, - "loss": 0.1512, + "epoch": 1.8151652075684834, + "grad_norm": 3.9353727956222198, + "learning_rate": 7.124882765347923e-06, + "loss": 0.512, "step": 12855 }, { - "epoch": 2.6957433424197945, - "grad_norm": 3.7171100702745927, - "learning_rate": 5.346931886996132e-07, - "loss": 0.1163, + "epoch": 1.8153064106184693, + "grad_norm": 4.048878759200474, + "learning_rate": 7.123422712601349e-06, + "loss": 0.5852, "step": 12856 }, { - "epoch": 2.695953029985322, - "grad_norm": 4.252449940361792, - "learning_rate": 5.339630708302346e-07, - "loss": 0.1277, + "epoch": 1.8154476136684552, + "grad_norm": 3.725134924002504, + "learning_rate": 7.121962726705187e-06, + "loss": 0.5781, "step": 12857 }, { - "epoch": 2.696162717550849, - "grad_norm": 4.250896799262427, - "learning_rate": 5.332334381116643e-07, - "loss": 0.1438, + "epoch": 1.815588816718441, + "grad_norm": 3.1205570886717573, + "learning_rate": 7.1205028076933705e-06, + "loss": 0.4256, "step": 12858 }, { - "epoch": 2.6963724051163767, - "grad_norm": 5.318366338604121, - "learning_rate": 5.325042905813005e-07, - "loss": 0.1616, + "epoch": 1.815730019768427, + "grad_norm": 3.4264098254931694, + "learning_rate": 7.119042955599824e-06, + "loss": 0.6082, "step": 12859 }, { - "epoch": 2.696582092681904, - "grad_norm": 4.427708656007492, - "learning_rate": 5.317756282765152e-07, - "loss": 0.1507, + "epoch": 1.8158712228184128, + "grad_norm": 3.3652304244043885, + "learning_rate": 7.117583170458478e-06, + "loss": 0.4821, "step": 12860 }, { - "epoch": 2.6967917802474313, - "grad_norm": 3.9280451614176455, - "learning_rate": 5.310474512346476e-07, - "loss": 0.1354, + "epoch": 1.8160124258683987, + "grad_norm": 4.033551001052538, + "learning_rate": 7.116123452303248e-06, + "loss": 0.5044, "step": 12861 }, { - "epoch": 2.697001467812959, - "grad_norm": 4.451344699274738, - "learning_rate": 5.30319759493022e-07, - "loss": 0.1324, + "epoch": 1.8161536289183846, + "grad_norm": 4.0040084829129015, + "learning_rate": 7.114663801168073e-06, + "loss": 0.6021, "step": 12862 }, { - "epoch": 2.6972111553784863, - "grad_norm": 3.806971984511964, - "learning_rate": 5.295925530889334e-07, - "loss": 0.1178, + "epoch": 1.8162948319683705, + "grad_norm": 3.9418016054370755, + "learning_rate": 7.113204217086858e-06, + "loss": 0.5867, "step": 12863 }, { - "epoch": 2.6974208429440134, - "grad_norm": 3.6706076985361378, - "learning_rate": 5.288658320596529e-07, - "loss": 0.1107, + "epoch": 1.8164360350183564, + "grad_norm": 3.298424339274356, + "learning_rate": 7.111744700093531e-06, + "loss": 0.4045, "step": 12864 }, { - "epoch": 2.697630530509541, - "grad_norm": 3.4757671497873095, - "learning_rate": 5.281395964424229e-07, - "loss": 0.1085, + "epoch": 1.8165772380683423, + "grad_norm": 3.420851941401796, + "learning_rate": 7.110285250222011e-06, + "loss": 0.4584, "step": 12865 }, { - "epoch": 2.697840218075068, - "grad_norm": 4.528489561745475, - "learning_rate": 5.274138462744693e-07, - "loss": 0.1654, + "epoch": 1.8167184411183281, + "grad_norm": 3.287380952736182, + "learning_rate": 7.108825867506213e-06, + "loss": 0.5353, "step": 12866 }, { - "epoch": 2.6980499056405955, - "grad_norm": 4.336963742398315, - "learning_rate": 5.266885815929835e-07, - "loss": 0.121, + "epoch": 1.816859644168314, + "grad_norm": 3.57345297573272, + "learning_rate": 7.107366551980053e-06, + "loss": 0.5899, "step": 12867 }, { - "epoch": 2.6982595932061226, - "grad_norm": 3.4332250539813054, - "learning_rate": 5.259638024351399e-07, - "loss": 0.0873, + "epoch": 1.8170008472183, + "grad_norm": 3.0369197946473485, + "learning_rate": 7.105907303677446e-06, + "loss": 0.4798, "step": 12868 }, { - "epoch": 2.69846928077165, - "grad_norm": 5.916331043663303, - "learning_rate": 5.252395088380835e-07, - "loss": 0.1738, + "epoch": 1.8171420502682858, + "grad_norm": 3.6108539437221, + "learning_rate": 7.104448122632302e-06, + "loss": 0.5404, "step": 12869 }, { - "epoch": 2.6986789683371777, - "grad_norm": 4.143655914420538, - "learning_rate": 5.245157008389334e-07, - "loss": 0.1601, + "epoch": 1.8172832533182717, + "grad_norm": 3.015423832970976, + "learning_rate": 7.102989008878538e-06, + "loss": 0.4636, "step": 12870 }, { - "epoch": 2.6988886559027048, - "grad_norm": 4.426412851082678, - "learning_rate": 5.237923784747889e-07, - "loss": 0.1307, + "epoch": 1.8174244563682576, + "grad_norm": 4.2188865769592825, + "learning_rate": 7.101529962450053e-06, + "loss": 0.5774, "step": 12871 }, { - "epoch": 2.6990983434682323, - "grad_norm": 4.576500674665621, - "learning_rate": 5.230695417827214e-07, - "loss": 0.1608, + "epoch": 1.8175656594182434, + "grad_norm": 3.3373234918907184, + "learning_rate": 7.100070983380763e-06, + "loss": 0.5251, "step": 12872 }, { - "epoch": 2.69930803103376, - "grad_norm": 3.3190453237441653, - "learning_rate": 5.223471907997768e-07, - "loss": 0.1002, + "epoch": 1.8177068624682293, + "grad_norm": 2.9330609149093254, + "learning_rate": 7.098612071704569e-06, + "loss": 0.4617, "step": 12873 }, { - "epoch": 2.699517718599287, - "grad_norm": 3.603233066777662, - "learning_rate": 5.216253255629756e-07, - "loss": 0.1408, + "epoch": 1.8178480655182152, + "grad_norm": 3.7339006018759204, + "learning_rate": 7.097153227455379e-06, + "loss": 0.6237, "step": 12874 }, { - "epoch": 2.6997274061648144, - "grad_norm": 4.110136446441054, - "learning_rate": 5.209039461093157e-07, - "loss": 0.1849, + "epoch": 1.817989268568201, + "grad_norm": 3.3856046501674424, + "learning_rate": 7.0956944506670915e-06, + "loss": 0.542, "step": 12875 }, { - "epoch": 2.699937093730342, - "grad_norm": 4.168173204893433, - "learning_rate": 5.201830524757712e-07, - "loss": 0.1303, + "epoch": 1.818130471618187, + "grad_norm": 4.293288920298669, + "learning_rate": 7.0942357413736116e-06, + "loss": 0.6428, "step": 12876 }, { - "epoch": 2.700146781295869, - "grad_norm": 3.9801289094666448, - "learning_rate": 5.194626446992868e-07, - "loss": 0.1358, + "epoch": 1.8182716746681729, + "grad_norm": 2.661555044523843, + "learning_rate": 7.092777099608841e-06, + "loss": 0.442, "step": 12877 }, { - "epoch": 2.7003564688613966, - "grad_norm": 4.7962538078344625, - "learning_rate": 5.187427228167841e-07, - "loss": 0.1613, + "epoch": 1.8184128777181587, + "grad_norm": 3.6859024310855393, + "learning_rate": 7.091318525406671e-06, + "loss": 0.5181, "step": 12878 }, { - "epoch": 2.700566156426924, - "grad_norm": 4.021730804720595, - "learning_rate": 5.180232868651614e-07, - "loss": 0.1301, + "epoch": 1.8185540807681446, + "grad_norm": 3.1742771846401197, + "learning_rate": 7.0898600188010095e-06, + "loss": 0.473, "step": 12879 }, { - "epoch": 2.700775843992451, - "grad_norm": 4.2870120910781315, - "learning_rate": 5.173043368812902e-07, - "loss": 0.1079, + "epoch": 1.8186952838181305, + "grad_norm": 4.195754624080737, + "learning_rate": 7.0884015798257365e-06, + "loss": 0.617, "step": 12880 }, { - "epoch": 2.7009855315579787, - "grad_norm": 4.022749962440249, - "learning_rate": 5.165858729020202e-07, - "loss": 0.118, + "epoch": 1.8188364868681164, + "grad_norm": 4.69524693322813, + "learning_rate": 7.086943208514756e-06, + "loss": 0.7408, "step": 12881 }, { - "epoch": 2.7011952191235062, - "grad_norm": 3.845429217342205, - "learning_rate": 5.158678949641726e-07, - "loss": 0.1293, + "epoch": 1.8189776899181023, + "grad_norm": 2.972310631206243, + "learning_rate": 7.0854849049019584e-06, + "loss": 0.4099, "step": 12882 }, { - "epoch": 2.7014049066890333, - "grad_norm": 4.0569265027943, - "learning_rate": 5.151504031045429e-07, - "loss": 0.1618, + "epoch": 1.8191188929680882, + "grad_norm": 3.3687230133511874, + "learning_rate": 7.084026669021231e-06, + "loss": 0.4717, "step": 12883 }, { - "epoch": 2.701614594254561, - "grad_norm": 4.721182374593468, - "learning_rate": 5.144333973599058e-07, - "loss": 0.1602, + "epoch": 1.819260096018074, + "grad_norm": 3.7843386927456915, + "learning_rate": 7.082568500906466e-06, + "loss": 0.6699, "step": 12884 }, { - "epoch": 2.701824281820088, - "grad_norm": 4.386778487990207, - "learning_rate": 5.13716877767012e-07, - "loss": 0.1631, + "epoch": 1.81940129906806, + "grad_norm": 3.682661043652068, + "learning_rate": 7.081110400591549e-06, + "loss": 0.5295, "step": 12885 }, { - "epoch": 2.7020339693856155, - "grad_norm": 5.736770148841213, - "learning_rate": 5.130008443625778e-07, - "loss": 0.1844, + "epoch": 1.8195425021180458, + "grad_norm": 2.8416972266776113, + "learning_rate": 7.0796523681103635e-06, + "loss": 0.4449, "step": 12886 }, { - "epoch": 2.7022436569511425, - "grad_norm": 3.796202617425379, - "learning_rate": 5.122852971833058e-07, - "loss": 0.121, + "epoch": 1.8196837051680317, + "grad_norm": 3.470541658907999, + "learning_rate": 7.0781944034968005e-06, + "loss": 0.5316, "step": 12887 }, { - "epoch": 2.70245334451667, - "grad_norm": 3.4456789062199387, - "learning_rate": 5.115702362658681e-07, - "loss": 0.1126, + "epoch": 1.8198249082180173, + "grad_norm": 4.172147579996848, + "learning_rate": 7.076736506784734e-06, + "loss": 0.6595, "step": 12888 }, { - "epoch": 2.7026630320821976, - "grad_norm": 4.908146967473637, - "learning_rate": 5.10855661646914e-07, - "loss": 0.1407, + "epoch": 1.8199661112680032, + "grad_norm": 3.273681122943454, + "learning_rate": 7.07527867800805e-06, + "loss": 0.5196, "step": 12889 }, { - "epoch": 2.7028727196477247, - "grad_norm": 4.280959328408613, - "learning_rate": 5.101415733630632e-07, - "loss": 0.1458, + "epoch": 1.8201073143179891, + "grad_norm": 3.8661528676453663, + "learning_rate": 7.073820917200627e-06, + "loss": 0.6009, "step": 12890 }, { - "epoch": 2.703082407213252, - "grad_norm": 3.5326011427196864, - "learning_rate": 5.094279714509176e-07, - "loss": 0.139, + "epoch": 1.820248517367975, + "grad_norm": 3.5537568982398278, + "learning_rate": 7.07236322439634e-06, + "loss": 0.5928, "step": 12891 }, { - "epoch": 2.7032920947787797, - "grad_norm": 4.588449235259221, - "learning_rate": 5.087148559470468e-07, - "loss": 0.1419, + "epoch": 1.8203897204179609, + "grad_norm": 3.7316836035607146, + "learning_rate": 7.070905599629068e-06, + "loss": 0.6217, "step": 12892 }, { - "epoch": 2.703501782344307, - "grad_norm": 5.423659056032981, - "learning_rate": 5.080022268880036e-07, - "loss": 0.1306, + "epoch": 1.8205309234679468, + "grad_norm": 4.180834001327106, + "learning_rate": 7.069448042932685e-06, + "loss": 0.6357, "step": 12893 }, { - "epoch": 2.7037114699098344, - "grad_norm": 3.712798224548057, - "learning_rate": 5.072900843103091e-07, - "loss": 0.1293, + "epoch": 1.8206721265179326, + "grad_norm": 4.571762484979292, + "learning_rate": 7.067990554341064e-06, + "loss": 0.6285, "step": 12894 }, { - "epoch": 2.703921157475362, - "grad_norm": 4.73864505983658, - "learning_rate": 5.065784282504615e-07, - "loss": 0.1371, + "epoch": 1.8208133295679185, + "grad_norm": 3.332436260374653, + "learning_rate": 7.066533133888074e-06, + "loss": 0.5565, "step": 12895 }, { - "epoch": 2.704130845040889, - "grad_norm": 4.123363263628095, - "learning_rate": 5.058672587449343e-07, - "loss": 0.0808, + "epoch": 1.8209545326179044, + "grad_norm": 4.1325157189700175, + "learning_rate": 7.065075781607594e-06, + "loss": 0.5795, "step": 12896 }, { - "epoch": 2.7043405326064165, - "grad_norm": 4.354155265861245, - "learning_rate": 5.051565758301779e-07, - "loss": 0.1286, + "epoch": 1.8210957356678903, + "grad_norm": 3.962848640363837, + "learning_rate": 7.06361849753348e-06, + "loss": 0.5845, "step": 12897 }, { - "epoch": 2.704550220171944, - "grad_norm": 3.7863848836974277, - "learning_rate": 5.044463795426147e-07, - "loss": 0.1254, + "epoch": 1.8212369387178762, + "grad_norm": 3.4771717415279393, + "learning_rate": 7.062161281699605e-06, + "loss": 0.5677, "step": 12898 }, { - "epoch": 2.704759907737471, - "grad_norm": 3.787978147289437, - "learning_rate": 5.03736669918643e-07, - "loss": 0.1086, + "epoch": 1.821378141767862, + "grad_norm": 3.271499791608426, + "learning_rate": 7.060704134139833e-06, + "loss": 0.545, "step": 12899 }, { - "epoch": 2.7049695953029986, - "grad_norm": 4.110695690613969, - "learning_rate": 5.030274469946395e-07, - "loss": 0.1532, + "epoch": 1.821519344817848, + "grad_norm": 3.155150276888311, + "learning_rate": 7.059247054888025e-06, + "loss": 0.4396, "step": 12900 }, { - "epoch": 2.705179282868526, - "grad_norm": 4.465080941296165, - "learning_rate": 5.023187108069483e-07, - "loss": 0.1264, + "epoch": 1.8216605478678338, + "grad_norm": 3.3192578487232067, + "learning_rate": 7.057790043978049e-06, + "loss": 0.4588, "step": 12901 }, { - "epoch": 2.7053889704340532, - "grad_norm": 4.80143661057014, - "learning_rate": 5.016104613918981e-07, - "loss": 0.1335, + "epoch": 1.8218017509178197, + "grad_norm": 3.8616721500280535, + "learning_rate": 7.056333101443761e-06, + "loss": 0.5879, "step": 12902 }, { - "epoch": 2.7055986579995808, - "grad_norm": 5.0381109938023405, - "learning_rate": 5.009026987857857e-07, - "loss": 0.1586, + "epoch": 1.8219429539678056, + "grad_norm": 3.175853212801913, + "learning_rate": 7.054876227319021e-06, + "loss": 0.4731, "step": 12903 }, { - "epoch": 2.705808345565108, - "grad_norm": 4.802065550838619, - "learning_rate": 5.00195423024884e-07, - "loss": 0.1476, + "epoch": 1.8220841570177915, + "grad_norm": 2.8954068624408817, + "learning_rate": 7.053419421637686e-06, + "loss": 0.4487, "step": 12904 }, { - "epoch": 2.7060180331306354, - "grad_norm": 4.853637498825989, - "learning_rate": 4.994886341454452e-07, - "loss": 0.1077, + "epoch": 1.8222253600677774, + "grad_norm": 3.2322616949485576, + "learning_rate": 7.051962684433609e-06, + "loss": 0.5377, "step": 12905 }, { - "epoch": 2.7062277206961625, - "grad_norm": 3.9431367171723077, - "learning_rate": 4.987823321836904e-07, - "loss": 0.1307, + "epoch": 1.8223665631177632, + "grad_norm": 3.745735070647311, + "learning_rate": 7.050506015740646e-06, + "loss": 0.5995, "step": 12906 }, { - "epoch": 2.70643740826169, - "grad_norm": 3.1542944862368487, - "learning_rate": 4.980765171758206e-07, - "loss": 0.1273, + "epoch": 1.8225077661677491, + "grad_norm": 3.771105120529124, + "learning_rate": 7.04904941559265e-06, + "loss": 0.4554, "step": 12907 }, { - "epoch": 2.7066470958272175, - "grad_norm": 3.7359439573337854, - "learning_rate": 4.973711891580079e-07, - "loss": 0.1238, + "epoch": 1.822648969217735, + "grad_norm": 3.3877731431226428, + "learning_rate": 7.047592884023473e-06, + "loss": 0.4849, "step": 12908 }, { - "epoch": 2.7068567833927446, - "grad_norm": 4.324340520305529, - "learning_rate": 4.966663481664058e-07, - "loss": 0.1166, + "epoch": 1.8227901722677209, + "grad_norm": 3.382403822786309, + "learning_rate": 7.046136421066958e-06, + "loss": 0.5002, "step": 12909 }, { - "epoch": 2.707066470958272, - "grad_norm": 3.812328832626003, - "learning_rate": 4.959619942371352e-07, - "loss": 0.1153, + "epoch": 1.8229313753177068, + "grad_norm": 3.438775793625825, + "learning_rate": 7.04468002675696e-06, + "loss": 0.3849, "step": 12910 }, { - "epoch": 2.7072761585237997, - "grad_norm": 5.194414708895172, - "learning_rate": 4.952581274062951e-07, - "loss": 0.1695, + "epoch": 1.8230725783676927, + "grad_norm": 3.798581986970123, + "learning_rate": 7.043223701127322e-06, + "loss": 0.5639, "step": 12911 }, { - "epoch": 2.7074858460893267, - "grad_norm": 4.40246419500683, - "learning_rate": 4.945547477099633e-07, - "loss": 0.1276, + "epoch": 1.8232137814176785, + "grad_norm": 3.855478516877096, + "learning_rate": 7.041767444211886e-06, + "loss": 0.5847, "step": 12912 }, { - "epoch": 2.7076955336548543, - "grad_norm": 3.311785189662271, - "learning_rate": 4.938518551841842e-07, - "loss": 0.1299, + "epoch": 1.8233549844676644, + "grad_norm": 3.4640605423079034, + "learning_rate": 7.0403112560445035e-06, + "loss": 0.5314, "step": 12913 }, { - "epoch": 2.707905221220382, - "grad_norm": 3.3137349807363097, - "learning_rate": 4.93149449864988e-07, - "loss": 0.0855, + "epoch": 1.8234961875176503, + "grad_norm": 2.837153723373047, + "learning_rate": 7.038855136659002e-06, + "loss": 0.4539, "step": 12914 }, { - "epoch": 2.708114908785909, - "grad_norm": 5.11668873569344, - "learning_rate": 4.924475317883704e-07, - "loss": 0.1189, + "epoch": 1.8236373905676362, + "grad_norm": 3.8393920388939504, + "learning_rate": 7.0373990860892316e-06, + "loss": 0.621, "step": 12915 }, { - "epoch": 2.7083245963514364, - "grad_norm": 3.1832860599040154, - "learning_rate": 4.917461009903057e-07, - "loss": 0.1022, + "epoch": 1.823778593617622, + "grad_norm": 3.907877872451048, + "learning_rate": 7.035943104369026e-06, + "loss": 0.6964, "step": 12916 }, { - "epoch": 2.708534283916964, - "grad_norm": 5.489876373463343, - "learning_rate": 4.910451575067443e-07, - "loss": 0.1563, + "epoch": 1.823919796667608, + "grad_norm": 3.46895419756258, + "learning_rate": 7.034487191532221e-06, + "loss": 0.5484, "step": 12917 }, { - "epoch": 2.708743971482491, - "grad_norm": 4.857369726719416, - "learning_rate": 4.903447013736129e-07, - "loss": 0.1705, + "epoch": 1.8240609997175938, + "grad_norm": 3.042037436344643, + "learning_rate": 7.033031347612655e-06, + "loss": 0.5172, "step": 12918 }, { - "epoch": 2.7089536590480185, - "grad_norm": 4.027220215947041, - "learning_rate": 4.896447326268083e-07, - "loss": 0.1423, + "epoch": 1.8242022027675797, + "grad_norm": 3.4928262045145053, + "learning_rate": 7.031575572644159e-06, + "loss": 0.4728, "step": 12919 }, { - "epoch": 2.709163346613546, - "grad_norm": 4.472407174254757, - "learning_rate": 4.889452513022053e-07, - "loss": 0.1266, + "epoch": 1.8243434058175656, + "grad_norm": 3.310749770472833, + "learning_rate": 7.030119866660565e-06, + "loss": 0.479, "step": 12920 }, { - "epoch": 2.709373034179073, - "grad_norm": 4.981540546093964, - "learning_rate": 4.882462574356562e-07, - "loss": 0.1316, + "epoch": 1.8244846088675515, + "grad_norm": 4.330850869752965, + "learning_rate": 7.028664229695705e-06, + "loss": 0.5467, "step": 12921 }, { - "epoch": 2.7095827217446007, - "grad_norm": 3.371009902419683, - "learning_rate": 4.875477510629823e-07, - "loss": 0.1176, + "epoch": 1.8246258119175374, + "grad_norm": 4.326678458502234, + "learning_rate": 7.027208661783402e-06, + "loss": 0.5656, "step": 12922 }, { - "epoch": 2.7097924093101278, - "grad_norm": 5.501066025328657, - "learning_rate": 4.86849732219985e-07, - "loss": 0.178, + "epoch": 1.8247670149675232, + "grad_norm": 3.034908152997462, + "learning_rate": 7.025753162957485e-06, + "loss": 0.5014, "step": 12923 }, { - "epoch": 2.7100020968756553, - "grad_norm": 5.155077674741697, - "learning_rate": 4.861522009424391e-07, - "loss": 0.1266, + "epoch": 1.8249082180175091, + "grad_norm": 3.2159150696126533, + "learning_rate": 7.024297733251781e-06, + "loss": 0.4997, "step": 12924 }, { - "epoch": 2.7102117844411824, - "grad_norm": 4.636654473753456, - "learning_rate": 4.854551572660915e-07, - "loss": 0.1465, + "epoch": 1.825049421067495, + "grad_norm": 4.084357067061361, + "learning_rate": 7.022842372700114e-06, + "loss": 0.5331, "step": 12925 }, { - "epoch": 2.71042147200671, - "grad_norm": 5.304478354853692, - "learning_rate": 4.847586012266725e-07, - "loss": 0.2087, + "epoch": 1.825190624117481, + "grad_norm": 4.430834825734025, + "learning_rate": 7.021387081336302e-06, + "loss": 0.7386, "step": 12926 }, { - "epoch": 2.7106311595722374, - "grad_norm": 3.467300868807927, - "learning_rate": 4.840625328598769e-07, - "loss": 0.1102, + "epoch": 1.8253318271674668, + "grad_norm": 3.7519976370525225, + "learning_rate": 7.019931859194169e-06, + "loss": 0.4744, "step": 12927 }, { - "epoch": 2.7108408471377645, - "grad_norm": 4.4499461528255635, - "learning_rate": 4.833669522013807e-07, - "loss": 0.1294, + "epoch": 1.8254730302174527, + "grad_norm": 3.110141799865905, + "learning_rate": 7.018476706307533e-06, + "loss": 0.5555, "step": 12928 }, { - "epoch": 2.711050534703292, - "grad_norm": 4.017344033093376, - "learning_rate": 4.82671859286834e-07, - "loss": 0.1165, + "epoch": 1.8256142332674385, + "grad_norm": 3.049452026967664, + "learning_rate": 7.017021622710208e-06, + "loss": 0.4372, "step": 12929 }, { - "epoch": 2.7112602222688196, - "grad_norm": 4.395317674417498, - "learning_rate": 4.81977254151862e-07, - "loss": 0.1614, + "epoch": 1.8257554363174244, + "grad_norm": 3.1151367330581317, + "learning_rate": 7.01556660843602e-06, + "loss": 0.4619, "step": 12930 }, { - "epoch": 2.7114699098343467, - "grad_norm": 5.442910382218076, - "learning_rate": 4.812831368320647e-07, - "loss": 0.1315, + "epoch": 1.8258966393674103, + "grad_norm": 4.201986617164945, + "learning_rate": 7.014111663518768e-06, + "loss": 0.6328, "step": 12931 }, { - "epoch": 2.711679597399874, - "grad_norm": 4.53188196216915, - "learning_rate": 4.80589507363014e-07, - "loss": 0.1201, + "epoch": 1.8260378424173962, + "grad_norm": 4.457439444604092, + "learning_rate": 7.0126567879922735e-06, + "loss": 0.7733, "step": 12932 }, { - "epoch": 2.7118892849654017, - "grad_norm": 4.769564464365183, - "learning_rate": 4.798963657802624e-07, - "loss": 0.1668, + "epoch": 1.826179045467382, + "grad_norm": 4.36747949866512, + "learning_rate": 7.011201981890345e-06, + "loss": 0.5876, "step": 12933 }, { - "epoch": 2.712098972530929, - "grad_norm": 4.541470336707967, - "learning_rate": 4.792037121193349e-07, - "loss": 0.1466, + "epoch": 1.826320248517368, + "grad_norm": 3.6606938349099116, + "learning_rate": 7.009747245246792e-06, + "loss": 0.5329, "step": 12934 }, { - "epoch": 2.7123086600964563, - "grad_norm": 4.174641683307479, - "learning_rate": 4.785115464157297e-07, - "loss": 0.1485, + "epoch": 1.8264614515673538, + "grad_norm": 2.9492616199750206, + "learning_rate": 7.008292578095419e-06, + "loss": 0.454, "step": 12935 }, { - "epoch": 2.712518347661984, - "grad_norm": 4.6907800493546645, - "learning_rate": 4.778198687049207e-07, - "loss": 0.142, + "epoch": 1.8266026546173397, + "grad_norm": 3.4802303868372073, + "learning_rate": 7.006837980470038e-06, + "loss": 0.5824, "step": 12936 }, { - "epoch": 2.712728035227511, - "grad_norm": 3.9731316734444397, - "learning_rate": 4.771286790223606e-07, - "loss": 0.1324, + "epoch": 1.8267438576673256, + "grad_norm": 3.5004929693023725, + "learning_rate": 7.0053834524044485e-06, + "loss": 0.5644, "step": 12937 }, { - "epoch": 2.7129377227930385, - "grad_norm": 3.5745997010911847, - "learning_rate": 4.7643797740346997e-07, - "loss": 0.1039, + "epoch": 1.8268850607173115, + "grad_norm": 3.6414346385005785, + "learning_rate": 7.003928993932456e-06, + "loss": 0.5573, "step": 12938 }, { - "epoch": 2.713147410358566, - "grad_norm": 3.6178250386276884, - "learning_rate": 4.7574776388365383e-07, - "loss": 0.1122, + "epoch": 1.8270262637672974, + "grad_norm": 4.415182299903268, + "learning_rate": 7.002474605087859e-06, + "loss": 0.6296, "step": 12939 }, { - "epoch": 2.713357097924093, - "grad_norm": 3.7975845545905185, - "learning_rate": 4.750580384982828e-07, - "loss": 0.1273, + "epoch": 1.8271674668172833, + "grad_norm": 4.419292363189847, + "learning_rate": 7.001020285904454e-06, + "loss": 0.6941, "step": 12940 }, { - "epoch": 2.7135667854896206, - "grad_norm": 5.987865909498283, - "learning_rate": 4.7436880128270526e-07, - "loss": 0.1548, + "epoch": 1.8273086698672691, + "grad_norm": 3.532028696670464, + "learning_rate": 6.9995660364160455e-06, + "loss": 0.5155, "step": 12941 }, { - "epoch": 2.7137764730551477, - "grad_norm": 3.6207442005610466, - "learning_rate": 4.7368005227224954e-07, - "loss": 0.1184, + "epoch": 1.827449872917255, + "grad_norm": 3.8983695251116384, + "learning_rate": 6.998111856656427e-06, + "loss": 0.6924, "step": 12942 }, { - "epoch": 2.713986160620675, - "grad_norm": 4.321823345711403, - "learning_rate": 4.729917915022153e-07, - "loss": 0.1436, + "epoch": 1.827591075967241, + "grad_norm": 4.119268997257333, + "learning_rate": 6.99665774665939e-06, + "loss": 0.573, "step": 12943 }, { - "epoch": 2.7141958481862023, - "grad_norm": 4.483822184350326, - "learning_rate": 4.7230401900787315e-07, - "loss": 0.1119, + "epoch": 1.8277322790172268, + "grad_norm": 3.478562512655586, + "learning_rate": 6.995203706458731e-06, + "loss": 0.6019, "step": 12944 }, { - "epoch": 2.71440553575173, - "grad_norm": 5.099378431563094, - "learning_rate": 4.716167348244749e-07, - "loss": 0.1782, + "epoch": 1.8278734820672127, + "grad_norm": 3.5515429282841007, + "learning_rate": 6.993749736088241e-06, + "loss": 0.5283, "step": 12945 }, { - "epoch": 2.7146152233172574, - "grad_norm": 6.093865063693587, - "learning_rate": 4.709299389872468e-07, - "loss": 0.1995, + "epoch": 1.8280146851171986, + "grad_norm": 3.4037633162633596, + "learning_rate": 6.9922958355817085e-06, + "loss": 0.5047, "step": 12946 }, { - "epoch": 2.7148249108827844, - "grad_norm": 4.033200298151314, - "learning_rate": 4.7024363153138633e-07, - "loss": 0.1486, + "epoch": 1.8281558881671844, + "grad_norm": 3.3683701889140614, + "learning_rate": 6.9908420049729244e-06, + "loss": 0.6367, "step": 12947 }, { - "epoch": 2.715034598448312, - "grad_norm": 3.732926921607242, - "learning_rate": 4.6955781249206747e-07, - "loss": 0.1044, + "epoch": 1.8282970912171703, + "grad_norm": 3.3998008631200336, + "learning_rate": 6.989388244295669e-06, + "loss": 0.5997, "step": 12948 }, { - "epoch": 2.7152442860138395, - "grad_norm": 3.7122810034242995, - "learning_rate": 4.68872481904441e-07, - "loss": 0.1088, + "epoch": 1.8284382942671562, + "grad_norm": 5.050720707850807, + "learning_rate": 6.987934553583732e-06, + "loss": 0.6887, "step": 12949 }, { - "epoch": 2.7154539735793666, - "grad_norm": 5.315774523405423, - "learning_rate": 4.6818763980363e-07, - "loss": 0.1681, + "epoch": 1.828579497317142, + "grad_norm": 4.017842694274579, + "learning_rate": 6.9864809328708945e-06, + "loss": 0.5307, "step": 12950 }, { - "epoch": 2.715663661144894, - "grad_norm": 3.595935883284847, - "learning_rate": 4.675032862247364e-07, - "loss": 0.1232, + "epoch": 1.828720700367128, + "grad_norm": 4.015390314737476, + "learning_rate": 6.9850273821909395e-06, + "loss": 0.7019, "step": 12951 }, { - "epoch": 2.7158733487104216, - "grad_norm": 5.176459058248521, - "learning_rate": 4.6681942120283094e-07, - "loss": 0.143, + "epoch": 1.8288619034171139, + "grad_norm": 3.478739896118804, + "learning_rate": 6.983573901577644e-06, + "loss": 0.575, "step": 12952 }, { - "epoch": 2.7160830362759487, - "grad_norm": 4.663626960206136, - "learning_rate": 4.661360447729646e-07, - "loss": 0.1663, + "epoch": 1.8290031064670997, + "grad_norm": 3.3442106589343323, + "learning_rate": 6.98212049106479e-06, + "loss": 0.4844, "step": 12953 }, { - "epoch": 2.7162927238414762, - "grad_norm": 4.295038124005719, - "learning_rate": 4.6545315697016146e-07, - "loss": 0.1573, + "epoch": 1.8291443095170856, + "grad_norm": 3.853777997147361, + "learning_rate": 6.980667150686153e-06, + "loss": 0.5272, "step": 12954 }, { - "epoch": 2.7165024114070038, - "grad_norm": 3.9227550300988363, - "learning_rate": 4.647707578294225e-07, - "loss": 0.1322, + "epoch": 1.8292855125670715, + "grad_norm": 3.220241348100374, + "learning_rate": 6.979213880475509e-06, + "loss": 0.4219, "step": 12955 }, { - "epoch": 2.716712098972531, - "grad_norm": 3.6938144716384564, - "learning_rate": 4.6408884738571966e-07, - "loss": 0.145, + "epoch": 1.8294267156170574, + "grad_norm": 3.82295888260213, + "learning_rate": 6.977760680466628e-06, + "loss": 0.5854, "step": 12956 }, { - "epoch": 2.7169217865380584, - "grad_norm": 2.9999797074214003, - "learning_rate": 4.6340742567400065e-07, - "loss": 0.1018, + "epoch": 1.8295679186670433, + "grad_norm": 3.205860338350853, + "learning_rate": 6.976307550693282e-06, + "loss": 0.5569, "step": 12957 }, { - "epoch": 2.717131474103586, - "grad_norm": 3.7452665704529684, - "learning_rate": 4.6272649272919414e-07, - "loss": 0.1174, + "epoch": 1.8297091217170292, + "grad_norm": 3.445479321651828, + "learning_rate": 6.974854491189243e-06, + "loss": 0.5719, "step": 12958 }, { - "epoch": 2.717341161669113, - "grad_norm": 3.5840532657436315, - "learning_rate": 4.620460485861944e-07, - "loss": 0.1175, + "epoch": 1.829850324767015, + "grad_norm": 3.0506326938637187, + "learning_rate": 6.973401501988282e-06, + "loss": 0.4373, "step": 12959 }, { - "epoch": 2.7175508492346405, - "grad_norm": 3.502673722676137, - "learning_rate": 4.61366093279878e-07, - "loss": 0.0967, + "epoch": 1.829991527817001, + "grad_norm": 3.0765396251341195, + "learning_rate": 6.971948583124159e-06, + "loss": 0.494, "step": 12960 }, { - "epoch": 2.7177605368001676, - "grad_norm": 5.068836699777926, - "learning_rate": 4.606866268450949e-07, - "loss": 0.122, + "epoch": 1.8301327308669868, + "grad_norm": 3.710493014480271, + "learning_rate": 6.970495734630644e-06, + "loss": 0.5816, "step": 12961 }, { - "epoch": 2.717970224365695, - "grad_norm": 4.161801295578957, - "learning_rate": 4.6000764931666496e-07, - "loss": 0.1232, + "epoch": 1.8302739339169727, + "grad_norm": 3.298814896555165, + "learning_rate": 6.969042956541503e-06, + "loss": 0.4884, "step": 12962 }, { - "epoch": 2.718179911931222, - "grad_norm": 4.573145471927253, - "learning_rate": 4.593291607293893e-07, - "loss": 0.1699, + "epoch": 1.8304151369669586, + "grad_norm": 3.1585775646162046, + "learning_rate": 6.967590248890494e-06, + "loss": 0.5538, "step": 12963 }, { - "epoch": 2.7183895994967497, - "grad_norm": 3.896022721718441, - "learning_rate": 4.5865116111804444e-07, - "loss": 0.1226, + "epoch": 1.8305563400169444, + "grad_norm": 3.634156383268282, + "learning_rate": 6.9661376117113775e-06, + "loss": 0.5873, "step": 12964 }, { - "epoch": 2.7185992870622773, - "grad_norm": 4.46992078456667, - "learning_rate": 4.579736505173726e-07, - "loss": 0.1021, + "epoch": 1.8306975430669303, + "grad_norm": 3.1868988096888176, + "learning_rate": 6.9646850450379135e-06, + "loss": 0.466, "step": 12965 }, { - "epoch": 2.7188089746278044, - "grad_norm": 5.699091132044439, - "learning_rate": 4.572966289621028e-07, - "loss": 0.1371, + "epoch": 1.8308387461169162, + "grad_norm": 2.8879852298620534, + "learning_rate": 6.963232548903853e-06, + "loss": 0.4434, "step": 12966 }, { - "epoch": 2.719018662193332, - "grad_norm": 4.583468468504497, - "learning_rate": 4.566200964869316e-07, - "loss": 0.1434, + "epoch": 1.830979949166902, + "grad_norm": 4.0245401590934495, + "learning_rate": 6.961780123342962e-06, + "loss": 0.6467, "step": 12967 }, { - "epoch": 2.7192283497588594, - "grad_norm": 3.8987073071538085, - "learning_rate": 4.5594405312653357e-07, - "loss": 0.1134, + "epoch": 1.831121152216888, + "grad_norm": 4.078065828583929, + "learning_rate": 6.960327768388987e-06, + "loss": 0.6642, "step": 12968 }, { - "epoch": 2.7194380373243865, - "grad_norm": 4.154138249860281, - "learning_rate": 4.5526849891555424e-07, - "loss": 0.1391, + "epoch": 1.8312623552668739, + "grad_norm": 3.4408978974910207, + "learning_rate": 6.95887548407568e-06, + "loss": 0.4727, "step": 12969 }, { - "epoch": 2.719647724889914, - "grad_norm": 4.777505832992595, - "learning_rate": 4.545934338886204e-07, - "loss": 0.1832, + "epoch": 1.8314035583168597, + "grad_norm": 3.377404608305792, + "learning_rate": 6.957423270436796e-06, + "loss": 0.5405, "step": 12970 }, { - "epoch": 2.7198574124554415, - "grad_norm": 4.293642384747073, - "learning_rate": 4.5391885808032664e-07, - "loss": 0.1306, + "epoch": 1.8315447613668456, + "grad_norm": 2.850221783227584, + "learning_rate": 6.95597112750608e-06, + "loss": 0.4097, "step": 12971 }, { - "epoch": 2.7200671000209686, - "grad_norm": 3.7549427768051395, - "learning_rate": 4.532447715252508e-07, - "loss": 0.1271, + "epoch": 1.8316859644168315, + "grad_norm": 4.236598820886305, + "learning_rate": 6.9545190553172836e-06, + "loss": 0.7268, "step": 12972 }, { - "epoch": 2.720276787586496, - "grad_norm": 5.894509316397902, - "learning_rate": 4.525711742579375e-07, - "loss": 0.1577, + "epoch": 1.8318271674668174, + "grad_norm": 4.004826606188797, + "learning_rate": 6.953067053904148e-06, + "loss": 0.5986, "step": 12973 }, { - "epoch": 2.7204864751520237, - "grad_norm": 4.241173070553654, - "learning_rate": 4.518980663129102e-07, - "loss": 0.1186, + "epoch": 1.8319683705168033, + "grad_norm": 4.394742659423986, + "learning_rate": 6.951615123300415e-06, + "loss": 0.5886, "step": 12974 }, { - "epoch": 2.7206961627175508, - "grad_norm": 3.967952873027723, - "learning_rate": 4.512254477246669e-07, - "loss": 0.1154, + "epoch": 1.8321095735667892, + "grad_norm": 4.245458123013903, + "learning_rate": 6.950163263539832e-06, + "loss": 0.6157, "step": 12975 }, { - "epoch": 2.7209058502830783, - "grad_norm": 3.4704061749036668, - "learning_rate": 4.5055331852768113e-07, - "loss": 0.117, + "epoch": 1.832250776616775, + "grad_norm": 3.538521373921943, + "learning_rate": 6.948711474656138e-06, + "loss": 0.4566, "step": 12976 }, { - "epoch": 2.721115537848606, - "grad_norm": 4.66058442829878, - "learning_rate": 4.49881678756402e-07, - "loss": 0.1511, + "epoch": 1.832391979666761, + "grad_norm": 3.01240469021295, + "learning_rate": 6.947259756683072e-06, + "loss": 0.4359, "step": 12977 }, { - "epoch": 2.721325225414133, - "grad_norm": 4.549925513913783, - "learning_rate": 4.492105284452486e-07, - "loss": 0.1344, + "epoch": 1.8325331827167468, + "grad_norm": 3.390901435180193, + "learning_rate": 6.945808109654366e-06, + "loss": 0.52, "step": 12978 }, { - "epoch": 2.7215349129796604, - "grad_norm": 4.603219116507876, - "learning_rate": 4.4853986762862234e-07, - "loss": 0.1226, + "epoch": 1.8326743857667327, + "grad_norm": 3.8974968636659555, + "learning_rate": 6.944356533603766e-06, + "loss": 0.5053, "step": 12979 }, { - "epoch": 2.7217446005451875, - "grad_norm": 5.4852833301333845, - "learning_rate": 4.478696963408924e-07, - "loss": 0.1511, + "epoch": 1.8328155888167186, + "grad_norm": 3.95819792918635, + "learning_rate": 6.9429050285650015e-06, + "loss": 0.5875, "step": 12980 }, { - "epoch": 2.721954288110715, - "grad_norm": 4.620012538146422, - "learning_rate": 4.4720001461641015e-07, - "loss": 0.1485, + "epoch": 1.8329567918667045, + "grad_norm": 3.948153055721032, + "learning_rate": 6.941453594571801e-06, + "loss": 0.5853, "step": 12981 }, { - "epoch": 2.722163975676242, - "grad_norm": 4.113204601674995, - "learning_rate": 4.4653082248949487e-07, - "loss": 0.1229, + "epoch": 1.8330979949166903, + "grad_norm": 3.3413157512326612, + "learning_rate": 6.940002231657899e-06, + "loss": 0.5088, "step": 12982 }, { - "epoch": 2.7223736632417697, - "grad_norm": 4.59889031244395, - "learning_rate": 4.458621199944424e-07, - "loss": 0.1148, + "epoch": 1.8332391979666762, + "grad_norm": 4.100716382762518, + "learning_rate": 6.93855093985702e-06, + "loss": 0.6964, "step": 12983 }, { - "epoch": 2.722583350807297, - "grad_norm": 5.381763701939643, - "learning_rate": 4.451939071655298e-07, - "loss": 0.1614, + "epoch": 1.833380401016662, + "grad_norm": 3.6980727272691905, + "learning_rate": 6.9370997192028995e-06, + "loss": 0.5444, "step": 12984 }, { - "epoch": 2.7227930383728243, - "grad_norm": 3.9638831508810317, - "learning_rate": 4.445261840370008e-07, - "loss": 0.1215, + "epoch": 1.833521604066648, + "grad_norm": 3.350969296039136, + "learning_rate": 6.935648569729258e-06, + "loss": 0.5281, "step": 12985 }, { - "epoch": 2.723002725938352, - "grad_norm": 4.190737809321449, - "learning_rate": 4.4385895064307684e-07, - "loss": 0.1286, + "epoch": 1.8336628071166339, + "grad_norm": 3.2090912131890237, + "learning_rate": 6.934197491469818e-06, + "loss": 0.4458, "step": 12986 }, { - "epoch": 2.7232124135038793, - "grad_norm": 5.706131868331729, - "learning_rate": 4.431922070179562e-07, - "loss": 0.1293, + "epoch": 1.8338040101666198, + "grad_norm": 3.096354032595325, + "learning_rate": 6.932746484458306e-06, + "loss": 0.4669, "step": 12987 }, { - "epoch": 2.7234221010694064, - "grad_norm": 4.1399119658392145, - "learning_rate": 4.425259531958115e-07, - "loss": 0.1186, + "epoch": 1.8339452132166056, + "grad_norm": 3.3447443800897885, + "learning_rate": 6.93129554872844e-06, + "loss": 0.4481, "step": 12988 }, { - "epoch": 2.723631788634934, - "grad_norm": 3.722007400210006, - "learning_rate": 4.4186018921078765e-07, - "loss": 0.122, + "epoch": 1.8340864162665915, + "grad_norm": 3.7473248184129537, + "learning_rate": 6.929844684313944e-06, + "loss": 0.5671, "step": 12989 }, { - "epoch": 2.7238414762004615, - "grad_norm": 4.45556209010085, - "learning_rate": 4.4119491509700627e-07, - "loss": 0.1111, + "epoch": 1.8342276193165772, + "grad_norm": 3.9988993363777845, + "learning_rate": 6.928393891248529e-06, + "loss": 0.7134, "step": 12990 }, { - "epoch": 2.7240511637659885, - "grad_norm": 4.614646855155972, - "learning_rate": 4.4053013088856566e-07, - "loss": 0.1699, + "epoch": 1.834368822366563, + "grad_norm": 3.58485593446313, + "learning_rate": 6.926943169565912e-06, + "loss": 0.5114, "step": 12991 }, { - "epoch": 2.724260851331516, - "grad_norm": 3.730685125062525, - "learning_rate": 4.398658366195341e-07, - "loss": 0.1319, + "epoch": 1.834510025416549, + "grad_norm": 3.2803229232289475, + "learning_rate": 6.92549251929981e-06, + "loss": 0.5011, "step": 12992 }, { - "epoch": 2.7244705388970436, - "grad_norm": 4.176612248536511, - "learning_rate": 4.3920203232396096e-07, - "loss": 0.1617, + "epoch": 1.8346512284665348, + "grad_norm": 3.313406497025851, + "learning_rate": 6.924041940483935e-06, + "loss": 0.5176, "step": 12993 }, { - "epoch": 2.7246802264625707, - "grad_norm": 4.4207480177285685, - "learning_rate": 4.385387180358658e-07, - "loss": 0.1419, + "epoch": 1.8347924315165207, + "grad_norm": 3.1129437419894157, + "learning_rate": 6.922591433151995e-06, + "loss": 0.467, "step": 12994 }, { - "epoch": 2.724889914028098, - "grad_norm": 4.1412969041830685, - "learning_rate": 4.378758937892447e-07, - "loss": 0.1101, + "epoch": 1.8349336345665066, + "grad_norm": 3.5439691082162352, + "learning_rate": 6.921140997337701e-06, + "loss": 0.5441, "step": 12995 }, { - "epoch": 2.7250996015936257, - "grad_norm": 5.035721551259342, - "learning_rate": 4.372135596180682e-07, - "loss": 0.1088, + "epoch": 1.8350748376164925, + "grad_norm": 2.969154537991024, + "learning_rate": 6.91969063307476e-06, + "loss": 0.4099, "step": 12996 }, { - "epoch": 2.725309289159153, - "grad_norm": 6.026930671252997, - "learning_rate": 4.365517155562837e-07, - "loss": 0.1452, + "epoch": 1.8352160406664784, + "grad_norm": 3.3955128724458463, + "learning_rate": 6.9182403403968835e-06, + "loss": 0.4975, "step": 12997 }, { - "epoch": 2.7255189767246804, - "grad_norm": 4.530969704085446, - "learning_rate": 4.3589036163781185e-07, - "loss": 0.1268, + "epoch": 1.8353572437164642, + "grad_norm": 3.2582015403075104, + "learning_rate": 6.916790119337766e-06, + "loss": 0.5208, "step": 12998 }, { - "epoch": 2.7257286642902074, - "grad_norm": 4.161639657181135, - "learning_rate": 4.3522949789654434e-07, - "loss": 0.1482, + "epoch": 1.8354984467664501, + "grad_norm": 4.622895253943369, + "learning_rate": 6.915339969931116e-06, + "loss": 0.6737, "step": 12999 }, { - "epoch": 2.725938351855735, - "grad_norm": 3.515417560841747, - "learning_rate": 4.3456912436635414e-07, - "loss": 0.1391, + "epoch": 1.835639649816436, + "grad_norm": 3.1246992229287733, + "learning_rate": 6.913889892210631e-06, + "loss": 0.5008, "step": 13000 }, { - "epoch": 2.726148039421262, - "grad_norm": 2.776970370880082, - "learning_rate": 4.339092410810908e-07, - "loss": 0.0796, + "epoch": 1.835780852866422, + "grad_norm": 3.135299717334731, + "learning_rate": 6.912439886210014e-06, + "loss": 0.4637, "step": 13001 }, { - "epoch": 2.7263577269867896, - "grad_norm": 4.28631678012816, - "learning_rate": 4.3324984807456613e-07, - "loss": 0.1286, + "epoch": 1.8359220559164078, + "grad_norm": 3.7443755135163035, + "learning_rate": 6.9109899519629605e-06, + "loss": 0.6192, "step": 13002 }, { - "epoch": 2.726567414552317, - "grad_norm": 3.336565772482864, - "learning_rate": 4.32590945380581e-07, - "loss": 0.1048, + "epoch": 1.8360632589663937, + "grad_norm": 3.905794861537653, + "learning_rate": 6.9095400895031635e-06, + "loss": 0.5396, "step": 13003 }, { - "epoch": 2.726777102117844, - "grad_norm": 4.81039271802697, - "learning_rate": 4.3193253303290385e-07, - "loss": 0.1464, + "epoch": 1.8362044620163795, + "grad_norm": 3.3812916403383864, + "learning_rate": 6.908090298864325e-06, + "loss": 0.4497, "step": 13004 }, { - "epoch": 2.7269867896833717, - "grad_norm": 3.947673232245669, - "learning_rate": 4.312746110652799e-07, - "loss": 0.1222, + "epoch": 1.8363456650663654, + "grad_norm": 3.8188608102608663, + "learning_rate": 6.9066405800801305e-06, + "loss": 0.5911, "step": 13005 }, { - "epoch": 2.7271964772488992, - "grad_norm": 4.288011507579844, - "learning_rate": 4.306171795114267e-07, - "loss": 0.0879, + "epoch": 1.8364868681163513, + "grad_norm": 4.057828120458315, + "learning_rate": 6.905190933184275e-06, + "loss": 0.5235, "step": 13006 }, { - "epoch": 2.7274061648144263, - "grad_norm": 5.598255226914071, - "learning_rate": 4.2996023840504166e-07, - "loss": 0.1808, + "epoch": 1.8366280711663372, + "grad_norm": 5.3349435502988145, + "learning_rate": 6.903741358210444e-06, + "loss": 0.9251, "step": 13007 }, { - "epoch": 2.727615852379954, - "grad_norm": 3.9635573762059355, - "learning_rate": 4.293037877797912e-07, - "loss": 0.1116, + "epoch": 1.836769274216323, + "grad_norm": 4.383967316408678, + "learning_rate": 6.9022918551923245e-06, + "loss": 0.5969, "step": 13008 }, { - "epoch": 2.7278255399454814, - "grad_norm": 4.1031806293641955, - "learning_rate": 4.286478276693229e-07, - "loss": 0.134, + "epoch": 1.836910477266309, + "grad_norm": 3.8546171920433983, + "learning_rate": 6.900842424163606e-06, + "loss": 0.614, "step": 13009 }, { - "epoch": 2.7280352275110085, - "grad_norm": 5.06334606984689, - "learning_rate": 4.2799235810725426e-07, - "loss": 0.1658, + "epoch": 1.8370516803162948, + "grad_norm": 3.4380878945794313, + "learning_rate": 6.899393065157972e-06, + "loss": 0.5289, "step": 13010 }, { - "epoch": 2.728244915076536, - "grad_norm": 3.719275434923693, - "learning_rate": 4.273373791271773e-07, - "loss": 0.1127, + "epoch": 1.8371928833662807, + "grad_norm": 3.1593764083511684, + "learning_rate": 6.897943778209104e-06, + "loss": 0.3584, "step": 13011 }, { - "epoch": 2.7284546026420635, - "grad_norm": 3.8670569350256696, - "learning_rate": 4.26682890762663e-07, - "loss": 0.1186, + "epoch": 1.8373340864162666, + "grad_norm": 3.0904589831449445, + "learning_rate": 6.896494563350681e-06, + "loss": 0.4471, "step": 13012 }, { - "epoch": 2.7286642902075906, - "grad_norm": 4.161937875338079, - "learning_rate": 4.2602889304725556e-07, - "loss": 0.1246, + "epoch": 1.8374752894662525, + "grad_norm": 3.9474280287847474, + "learning_rate": 6.895045420616385e-06, + "loss": 0.6063, "step": 13013 }, { - "epoch": 2.728873977773118, - "grad_norm": 3.3669231756630325, - "learning_rate": 4.253753860144727e-07, - "loss": 0.1011, + "epoch": 1.8376164925162384, + "grad_norm": 4.12602026206808, + "learning_rate": 6.893596350039896e-06, + "loss": 0.6523, "step": 13014 }, { - "epoch": 2.7290836653386457, - "grad_norm": 3.6344981940066408, - "learning_rate": 4.2472236969780647e-07, - "loss": 0.1411, + "epoch": 1.8377576955662243, + "grad_norm": 3.822764251328042, + "learning_rate": 6.8921473516548835e-06, + "loss": 0.5942, "step": 13015 }, { - "epoch": 2.7292933529041727, - "grad_norm": 3.759262770552555, - "learning_rate": 4.240698441307278e-07, - "loss": 0.1518, + "epoch": 1.8378988986162101, + "grad_norm": 3.112236983973115, + "learning_rate": 6.8906984254950235e-06, + "loss": 0.4882, "step": 13016 }, { - "epoch": 2.7295030404697003, - "grad_norm": 4.196837596671605, - "learning_rate": 4.234178093466768e-07, - "loss": 0.1181, + "epoch": 1.838040101666196, + "grad_norm": 3.0450469945870204, + "learning_rate": 6.889249571593989e-06, + "loss": 0.4571, "step": 13017 }, { - "epoch": 2.7297127280352274, - "grad_norm": 4.830146643294918, - "learning_rate": 4.2276626537907315e-07, - "loss": 0.1321, + "epoch": 1.838181304716182, + "grad_norm": 3.5839369223158872, + "learning_rate": 6.887800789985452e-06, + "loss": 0.5574, "step": 13018 }, { - "epoch": 2.729922415600755, - "grad_norm": 3.9996514286075486, - "learning_rate": 4.2211521226131037e-07, - "loss": 0.1265, + "epoch": 1.8383225077661678, + "grad_norm": 3.161950259352857, + "learning_rate": 6.88635208070308e-06, + "loss": 0.4716, "step": 13019 }, { - "epoch": 2.730132103166282, - "grad_norm": 4.86915354529976, - "learning_rate": 4.2146465002675274e-07, - "loss": 0.1527, + "epoch": 1.8384637108161537, + "grad_norm": 3.464969242668353, + "learning_rate": 6.884903443780541e-06, + "loss": 0.5346, "step": 13020 }, { - "epoch": 2.7303417907318095, - "grad_norm": 4.009173848251095, - "learning_rate": 4.208145787087459e-07, - "loss": 0.1478, + "epoch": 1.8386049138661396, + "grad_norm": 2.9927967266371818, + "learning_rate": 6.883454879251501e-06, + "loss": 0.4622, "step": 13021 }, { - "epoch": 2.730551478297337, - "grad_norm": 2.928690850804952, - "learning_rate": 4.201649983406086e-07, - "loss": 0.1029, + "epoch": 1.8387461169161254, + "grad_norm": 3.6015519063449606, + "learning_rate": 6.882006387149625e-06, + "loss": 0.6709, "step": 13022 }, { - "epoch": 2.730761165862864, - "grad_norm": 3.122123351868127, - "learning_rate": 4.195159089556278e-07, - "loss": 0.0888, + "epoch": 1.8388873199661113, + "grad_norm": 3.6341638971962964, + "learning_rate": 6.880557967508574e-06, + "loss": 0.614, "step": 13023 }, { - "epoch": 2.7309708534283916, - "grad_norm": 3.728001608132078, - "learning_rate": 4.188673105870733e-07, - "loss": 0.108, + "epoch": 1.839028523016097, + "grad_norm": 3.5506296644123303, + "learning_rate": 6.879109620362008e-06, + "loss": 0.5115, "step": 13024 }, { - "epoch": 2.731180540993919, - "grad_norm": 4.0771304710357805, - "learning_rate": 4.1821920326818753e-07, - "loss": 0.1324, + "epoch": 1.8391697260660829, + "grad_norm": 3.928316636069356, + "learning_rate": 6.877661345743587e-06, + "loss": 0.6011, "step": 13025 }, { - "epoch": 2.7313902285594462, - "grad_norm": 4.485887408354709, - "learning_rate": 4.175715870321861e-07, - "loss": 0.1598, + "epoch": 1.8393109291160687, + "grad_norm": 4.011640022806216, + "learning_rate": 6.876213143686965e-06, + "loss": 0.5269, "step": 13026 }, { - "epoch": 2.7315999161249738, - "grad_norm": 3.3454733109642416, - "learning_rate": 4.1692446191226035e-07, - "loss": 0.1367, + "epoch": 1.8394521321660546, + "grad_norm": 3.6274148876941315, + "learning_rate": 6.874765014225804e-06, + "loss": 0.5628, "step": 13027 }, { - "epoch": 2.7318096036905013, - "grad_norm": 3.804249223834096, - "learning_rate": 4.16277827941578e-07, - "loss": 0.1286, + "epoch": 1.8395933352160405, + "grad_norm": 4.645423163851433, + "learning_rate": 6.873316957393752e-06, + "loss": 0.6017, "step": 13028 }, { - "epoch": 2.7320192912560284, - "grad_norm": 3.686310386817152, - "learning_rate": 4.156316851532771e-07, - "loss": 0.1397, + "epoch": 1.8397345382660264, + "grad_norm": 2.7150057290410925, + "learning_rate": 6.871868973224462e-06, + "loss": 0.4436, "step": 13029 }, { - "epoch": 2.732228978821556, - "grad_norm": 4.350701934111513, - "learning_rate": 4.1498603358047787e-07, - "loss": 0.1476, + "epoch": 1.8398757413160123, + "grad_norm": 3.8046919934852093, + "learning_rate": 6.87042106175159e-06, + "loss": 0.5471, "step": 13030 }, { - "epoch": 2.7324386663870834, - "grad_norm": 3.9165162074522812, - "learning_rate": 4.1434087325626816e-07, - "loss": 0.1453, + "epoch": 1.8400169443659982, + "grad_norm": 4.234192039360731, + "learning_rate": 6.868973223008781e-06, + "loss": 0.644, "step": 13031 }, { - "epoch": 2.7326483539526105, - "grad_norm": 3.8880507342164403, - "learning_rate": 4.136962042137127e-07, - "loss": 0.1369, + "epoch": 1.840158147415984, + "grad_norm": 3.1287981791762842, + "learning_rate": 6.867525457029682e-06, + "loss": 0.4094, "step": 13032 }, { - "epoch": 2.732858041518138, - "grad_norm": 4.557953933665127, - "learning_rate": 4.1305202648585397e-07, - "loss": 0.1279, + "epoch": 1.84029935046597, + "grad_norm": 3.6834451614589065, + "learning_rate": 6.866077763847937e-06, + "loss": 0.4957, "step": 13033 }, { - "epoch": 2.7330677290836656, - "grad_norm": 3.930596345472188, - "learning_rate": 4.1240834010570774e-07, - "loss": 0.1439, + "epoch": 1.8404405535159558, + "grad_norm": 3.6931413868123597, + "learning_rate": 6.864630143497191e-06, + "loss": 0.483, "step": 13034 }, { - "epoch": 2.7332774166491927, - "grad_norm": 4.608424277575705, - "learning_rate": 4.117651451062632e-07, - "loss": 0.1428, + "epoch": 1.8405817565659417, + "grad_norm": 3.378501480811915, + "learning_rate": 6.8631825960110866e-06, + "loss": 0.4926, "step": 13035 }, { - "epoch": 2.73348710421472, - "grad_norm": 4.064467420122084, - "learning_rate": 4.1112244152048285e-07, - "loss": 0.1503, + "epoch": 1.8407229596159276, + "grad_norm": 4.029508702159001, + "learning_rate": 6.861735121423264e-06, + "loss": 0.5376, "step": 13036 }, { - "epoch": 2.7336967917802473, - "grad_norm": 5.437481104267108, - "learning_rate": 4.104802293813104e-07, - "loss": 0.146, + "epoch": 1.8408641626659135, + "grad_norm": 3.656874209167187, + "learning_rate": 6.860287719767362e-06, + "loss": 0.5308, "step": 13037 }, { - "epoch": 2.733906479345775, - "grad_norm": 4.617233642135366, - "learning_rate": 4.098385087216561e-07, - "loss": 0.1382, + "epoch": 1.8410053657158993, + "grad_norm": 2.927930795090184, + "learning_rate": 6.858840391077017e-06, + "loss": 0.491, "step": 13038 }, { - "epoch": 2.7341161669113023, - "grad_norm": 4.295030715442799, - "learning_rate": 4.0919727957441366e-07, - "loss": 0.126, + "epoch": 1.8411465687658852, + "grad_norm": 4.2201038579931, + "learning_rate": 6.857393135385866e-06, + "loss": 0.6178, "step": 13039 }, { - "epoch": 2.7343258544768294, - "grad_norm": 5.4199496086373316, - "learning_rate": 4.0855654197244464e-07, - "loss": 0.1482, + "epoch": 1.841287771815871, + "grad_norm": 3.5585677517463314, + "learning_rate": 6.8559459527275426e-06, + "loss": 0.6186, "step": 13040 }, { - "epoch": 2.734535542042357, - "grad_norm": 4.454260250891233, - "learning_rate": 4.079162959485883e-07, - "loss": 0.1479, + "epoch": 1.841428974865857, + "grad_norm": 4.408098395093376, + "learning_rate": 6.8544988431356755e-06, + "loss": 0.6829, "step": 13041 }, { - "epoch": 2.734745229607884, - "grad_norm": 4.033597316713583, - "learning_rate": 4.072765415356572e-07, - "loss": 0.1303, + "epoch": 1.8415701779158429, + "grad_norm": 3.62741665457451, + "learning_rate": 6.853051806643898e-06, + "loss": 0.5815, "step": 13042 }, { - "epoch": 2.7349549171734115, - "grad_norm": 4.6331033193228155, - "learning_rate": 4.066372787664441e-07, - "loss": 0.1487, + "epoch": 1.8417113809658288, + "grad_norm": 3.312570087878604, + "learning_rate": 6.851604843285835e-06, + "loss": 0.5559, "step": 13043 }, { - "epoch": 2.735164604738939, - "grad_norm": 5.362642571081246, - "learning_rate": 4.0599850767370607e-07, - "loss": 0.1663, + "epoch": 1.8418525840158146, + "grad_norm": 3.122379175760547, + "learning_rate": 6.850157953095117e-06, + "loss": 0.4385, "step": 13044 }, { - "epoch": 2.735374292304466, - "grad_norm": 2.8049229516249965, - "learning_rate": 4.0536022829018476e-07, - "loss": 0.0842, + "epoch": 1.8419937870658005, + "grad_norm": 4.140891910963143, + "learning_rate": 6.848711136105368e-06, + "loss": 0.6058, "step": 13045 }, { - "epoch": 2.7355839798699937, - "grad_norm": 3.9739064855732606, - "learning_rate": 4.04722440648595e-07, - "loss": 0.1513, + "epoch": 1.8421349901157864, + "grad_norm": 4.001685898667218, + "learning_rate": 6.84726439235021e-06, + "loss": 0.5267, "step": 13046 }, { - "epoch": 2.735793667435521, - "grad_norm": 4.188148948106658, - "learning_rate": 4.0408514478162075e-07, - "loss": 0.1345, + "epoch": 1.8422761931657723, + "grad_norm": 3.5650653906880674, + "learning_rate": 6.845817721863267e-06, + "loss": 0.4557, "step": 13047 }, { - "epoch": 2.7360033550010483, - "grad_norm": 4.483543983162668, - "learning_rate": 4.0344834072192585e-07, - "loss": 0.1226, + "epoch": 1.8424173962157582, + "grad_norm": 3.2096054155631775, + "learning_rate": 6.844371124678161e-06, + "loss": 0.4321, "step": 13048 }, { - "epoch": 2.736213042566576, - "grad_norm": 4.380791260737828, - "learning_rate": 4.028120285021486e-07, - "loss": 0.1314, + "epoch": 1.842558599265744, + "grad_norm": 3.452462047519633, + "learning_rate": 6.8429246008285046e-06, + "loss": 0.5292, "step": 13049 }, { - "epoch": 2.7364227301321034, - "grad_norm": 5.135582382637659, - "learning_rate": 4.0217620815489964e-07, - "loss": 0.205, + "epoch": 1.84269980231573, + "grad_norm": 3.7793251118856563, + "learning_rate": 6.841478150347918e-06, + "loss": 0.5563, "step": 13050 }, { - "epoch": 2.7366324176976304, - "grad_norm": 4.049195953770861, - "learning_rate": 4.015408797127684e-07, - "loss": 0.128, + "epoch": 1.8428410053657158, + "grad_norm": 3.204073320838366, + "learning_rate": 6.840031773270011e-06, + "loss": 0.465, "step": 13051 }, { - "epoch": 2.736842105263158, - "grad_norm": 3.517403727679536, - "learning_rate": 4.009060432083134e-07, - "loss": 0.1277, + "epoch": 1.8429822084157017, + "grad_norm": 4.481342358710885, + "learning_rate": 6.838585469628405e-06, + "loss": 0.5994, "step": 13052 }, { - "epoch": 2.7370517928286855, - "grad_norm": 4.828535459679986, - "learning_rate": 4.0027169867407177e-07, - "loss": 0.1706, + "epoch": 1.8431234114656876, + "grad_norm": 3.8309132907575254, + "learning_rate": 6.837139239456706e-06, + "loss": 0.4823, "step": 13053 }, { - "epoch": 2.7372614803942126, - "grad_norm": 4.011334635681633, - "learning_rate": 3.996378461425554e-07, - "loss": 0.1162, + "epoch": 1.8432646145156735, + "grad_norm": 4.185046160443699, + "learning_rate": 6.8356930827885256e-06, + "loss": 0.5965, "step": 13054 }, { - "epoch": 2.73747116795974, - "grad_norm": 3.4721676360167564, - "learning_rate": 3.990044856462527e-07, - "loss": 0.1127, + "epoch": 1.8434058175656594, + "grad_norm": 3.6433729020238492, + "learning_rate": 6.83424699965747e-06, + "loss": 0.6031, "step": 13055 }, { - "epoch": 2.737680855525267, - "grad_norm": 4.407681619585082, - "learning_rate": 3.9837161721762106e-07, - "loss": 0.1211, + "epoch": 1.8435470206156452, + "grad_norm": 3.419474893240668, + "learning_rate": 6.832800990097148e-06, + "loss": 0.5126, "step": 13056 }, { - "epoch": 2.7378905430907947, - "grad_norm": 4.229210439689979, - "learning_rate": 3.9773924088909566e-07, - "loss": 0.1221, + "epoch": 1.8436882236656311, + "grad_norm": 2.8802853102732637, + "learning_rate": 6.831355054141167e-06, + "loss": 0.4329, "step": 13057 }, { - "epoch": 2.7381002306563222, - "grad_norm": 3.8301973295774, - "learning_rate": 3.971073566930894e-07, - "loss": 0.1256, + "epoch": 1.843829426715617, + "grad_norm": 3.39032453280376, + "learning_rate": 6.829909191823121e-06, + "loss": 0.3999, "step": 13058 }, { - "epoch": 2.7383099182218493, - "grad_norm": 4.465339519646707, - "learning_rate": 3.9647596466198757e-07, - "loss": 0.1249, + "epoch": 1.8439706297656029, + "grad_norm": 4.138369641931917, + "learning_rate": 6.828463403176619e-06, + "loss": 0.6083, "step": 13059 }, { - "epoch": 2.738519605787377, - "grad_norm": 3.1818519083640497, - "learning_rate": 3.9584506482814864e-07, - "loss": 0.1154, + "epoch": 1.8441118328155888, + "grad_norm": 4.382449423747757, + "learning_rate": 6.827017688235255e-06, + "loss": 0.6343, "step": 13060 }, { - "epoch": 2.738729293352904, - "grad_norm": 4.987861233438751, - "learning_rate": 3.952146572239068e-07, - "loss": 0.1227, + "epoch": 1.8442530358655747, + "grad_norm": 3.8566278062713835, + "learning_rate": 6.825572047032631e-06, + "loss": 0.5486, "step": 13061 }, { - "epoch": 2.7389389809184315, - "grad_norm": 4.510606008019649, - "learning_rate": 3.945847418815729e-07, - "loss": 0.1, + "epoch": 1.8443942389155605, + "grad_norm": 4.64398193378965, + "learning_rate": 6.824126479602342e-06, + "loss": 0.7301, "step": 13062 }, { - "epoch": 2.739148668483959, - "grad_norm": 3.7856506544172133, - "learning_rate": 3.9395531883343105e-07, - "loss": 0.1297, + "epoch": 1.8445354419655464, + "grad_norm": 4.088316942192737, + "learning_rate": 6.822680985977981e-06, + "loss": 0.5754, "step": 13063 }, { - "epoch": 2.739358356049486, - "grad_norm": 3.5064900545571827, - "learning_rate": 3.9332638811173885e-07, - "loss": 0.1263, + "epoch": 1.8446766450155323, + "grad_norm": 3.331222689538483, + "learning_rate": 6.821235566193143e-06, + "loss": 0.5324, "step": 13064 }, { - "epoch": 2.7395680436150136, - "grad_norm": 4.160328132812707, - "learning_rate": 3.9269794974873264e-07, - "loss": 0.125, + "epoch": 1.8448178480655182, + "grad_norm": 3.496662540262184, + "learning_rate": 6.819790220281419e-06, + "loss": 0.5635, "step": 13065 }, { - "epoch": 2.739777731180541, - "grad_norm": 4.585377453459852, - "learning_rate": 3.920700037766179e-07, - "loss": 0.1321, + "epoch": 1.844959051115504, + "grad_norm": 4.368564527264109, + "learning_rate": 6.818344948276397e-06, + "loss": 0.5982, "step": 13066 }, { - "epoch": 2.739987418746068, - "grad_norm": 3.5077687544314538, - "learning_rate": 3.9144255022758203e-07, - "loss": 0.1344, + "epoch": 1.84510025416549, + "grad_norm": 4.018651763025937, + "learning_rate": 6.816899750211662e-06, + "loss": 0.6362, "step": 13067 }, { - "epoch": 2.7401971063115957, - "grad_norm": 6.688263692432872, - "learning_rate": 3.9081558913377947e-07, - "loss": 0.1662, + "epoch": 1.8452414572154758, + "grad_norm": 3.13911551150017, + "learning_rate": 6.815454626120804e-06, + "loss": 0.4399, "step": 13068 }, { - "epoch": 2.7404067938771233, - "grad_norm": 3.761823956359265, - "learning_rate": 3.9018912052734335e-07, - "loss": 0.1213, + "epoch": 1.8453826602654617, + "grad_norm": 4.709398180193074, + "learning_rate": 6.814009576037401e-06, + "loss": 0.6822, "step": 13069 }, { - "epoch": 2.7406164814426504, - "grad_norm": 3.784821829057151, - "learning_rate": 3.8956314444038135e-07, - "loss": 0.1189, + "epoch": 1.8455238633154476, + "grad_norm": 3.9961081870164747, + "learning_rate": 6.812564599995042e-06, + "loss": 0.5371, "step": 13070 }, { - "epoch": 2.740826169008178, - "grad_norm": 4.316685418555946, - "learning_rate": 3.8893766090497886e-07, - "loss": 0.1209, + "epoch": 1.8456650663654335, + "grad_norm": 4.572040309855932, + "learning_rate": 6.811119698027307e-06, + "loss": 0.774, "step": 13071 }, { - "epoch": 2.7410358565737054, - "grad_norm": 4.130071273999352, - "learning_rate": 3.8831266995319027e-07, - "loss": 0.1089, + "epoch": 1.8458062694154194, + "grad_norm": 3.495209834569317, + "learning_rate": 6.809674870167768e-06, + "loss": 0.598, "step": 13072 }, { - "epoch": 2.7412455441392325, - "grad_norm": 5.564957271679498, - "learning_rate": 3.8768817161704776e-07, - "loss": 0.1665, + "epoch": 1.8459474724654052, + "grad_norm": 3.8499092741833434, + "learning_rate": 6.808230116450012e-06, + "loss": 0.5956, "step": 13073 }, { - "epoch": 2.74145523170476, - "grad_norm": 6.426699029027333, - "learning_rate": 3.870641659285579e-07, - "loss": 0.1848, + "epoch": 1.8460886755153911, + "grad_norm": 3.3129191524042794, + "learning_rate": 6.80678543690761e-06, + "loss": 0.5083, "step": 13074 }, { - "epoch": 2.7416649192702875, - "grad_norm": 4.390466017640438, - "learning_rate": 3.864406529197018e-07, - "loss": 0.151, + "epoch": 1.846229878565377, + "grad_norm": 3.3148960715697307, + "learning_rate": 6.805340831574135e-06, + "loss": 0.4707, "step": 13075 }, { - "epoch": 2.7418746068358146, - "grad_norm": 3.3542251218238226, - "learning_rate": 3.858176326224372e-07, - "loss": 0.0865, + "epoch": 1.846371081615363, + "grad_norm": 3.2693997877370493, + "learning_rate": 6.803896300483159e-06, + "loss": 0.4845, "step": 13076 }, { - "epoch": 2.742084294401342, - "grad_norm": 3.791685912559164, - "learning_rate": 3.8519510506869417e-07, - "loss": 0.1176, + "epoch": 1.8465122846653488, + "grad_norm": 3.1775377575426695, + "learning_rate": 6.802451843668249e-06, + "loss": 0.4902, "step": 13077 }, { - "epoch": 2.7422939819668692, - "grad_norm": 5.21462192045731, - "learning_rate": 3.8457307029037716e-07, - "loss": 0.1649, + "epoch": 1.8466534877153347, + "grad_norm": 3.7416205054708738, + "learning_rate": 6.8010074611629815e-06, + "loss": 0.5184, "step": 13078 }, { - "epoch": 2.7425036695323968, - "grad_norm": 4.529877945127559, - "learning_rate": 3.8395152831936734e-07, - "loss": 0.1316, + "epoch": 1.8467946907653205, + "grad_norm": 3.183010921895056, + "learning_rate": 6.799563153000919e-06, + "loss": 0.4173, "step": 13079 }, { - "epoch": 2.742713357097924, - "grad_norm": 3.7712872183011563, - "learning_rate": 3.833304791875214e-07, - "loss": 0.1273, + "epoch": 1.8469358938153064, + "grad_norm": 3.0111762591850164, + "learning_rate": 6.798118919215625e-06, + "loss": 0.442, "step": 13080 }, { - "epoch": 2.7429230446634514, - "grad_norm": 3.548834989663908, - "learning_rate": 3.827099229266662e-07, - "loss": 0.103, + "epoch": 1.8470770968652923, + "grad_norm": 3.5036289780116214, + "learning_rate": 6.7966747598406625e-06, + "loss": 0.5281, "step": 13081 }, { - "epoch": 2.743132732228979, - "grad_norm": 5.361699992626051, - "learning_rate": 3.8208985956860736e-07, - "loss": 0.1374, + "epoch": 1.8472182999152782, + "grad_norm": 2.8188643944298413, + "learning_rate": 6.795230674909601e-06, + "loss": 0.4138, "step": 13082 }, { - "epoch": 2.743342419794506, - "grad_norm": 3.719129501816349, - "learning_rate": 3.8147028914512494e-07, - "loss": 0.1011, + "epoch": 1.847359502965264, + "grad_norm": 3.933975929806643, + "learning_rate": 6.793786664455992e-06, + "loss": 0.5154, "step": 13083 }, { - "epoch": 2.7435521073600335, - "grad_norm": 5.664017426870495, - "learning_rate": 3.808512116879737e-07, - "loss": 0.1705, + "epoch": 1.84750070601525, + "grad_norm": 3.689280996258638, + "learning_rate": 6.7923427285133945e-06, + "loss": 0.5283, "step": 13084 }, { - "epoch": 2.743761794925561, - "grad_norm": 4.727731389146657, - "learning_rate": 3.8023262722888033e-07, - "loss": 0.1709, + "epoch": 1.8476419090652358, + "grad_norm": 4.230289474288531, + "learning_rate": 6.790898867115368e-06, + "loss": 0.7521, "step": 13085 }, { - "epoch": 2.743971482491088, - "grad_norm": 5.1136299412198145, - "learning_rate": 3.7961453579955064e-07, - "loss": 0.1307, + "epoch": 1.8477831121152217, + "grad_norm": 3.350836655520724, + "learning_rate": 6.789455080295464e-06, + "loss": 0.5472, "step": 13086 }, { - "epoch": 2.7441811700566157, - "grad_norm": 4.169705775287418, - "learning_rate": 3.7899693743166043e-07, - "loss": 0.1354, + "epoch": 1.8479243151652076, + "grad_norm": 3.026929105937988, + "learning_rate": 6.788011368087239e-06, + "loss": 0.4157, "step": 13087 }, { - "epoch": 2.744390857622143, - "grad_norm": 4.510827347190437, - "learning_rate": 3.783798321568655e-07, - "loss": 0.1526, + "epoch": 1.8480655182151935, + "grad_norm": 3.4634934694148867, + "learning_rate": 6.786567730524243e-06, + "loss": 0.5804, "step": 13088 }, { - "epoch": 2.7446005451876703, - "grad_norm": 4.172586836498216, - "learning_rate": 3.7776322000679156e-07, - "loss": 0.1028, + "epoch": 1.8482067212651794, + "grad_norm": 3.8032417006001373, + "learning_rate": 6.785124167640024e-06, + "loss": 0.5181, "step": 13089 }, { - "epoch": 2.744810232753198, - "grad_norm": 3.943851788797182, - "learning_rate": 3.771471010130401e-07, - "loss": 0.1451, + "epoch": 1.8483479243151653, + "grad_norm": 3.0420925090824507, + "learning_rate": 6.783680679468132e-06, + "loss": 0.4914, "step": 13090 }, { - "epoch": 2.7450199203187253, - "grad_norm": 3.8749849758751007, - "learning_rate": 3.7653147520719024e-07, - "loss": 0.1375, + "epoch": 1.8484891273651511, + "grad_norm": 3.543399742681195, + "learning_rate": 6.782237266042113e-06, + "loss": 0.5275, "step": 13091 }, { - "epoch": 2.7452296078842524, - "grad_norm": 4.9005040556261, - "learning_rate": 3.759163426207935e-07, - "loss": 0.1407, + "epoch": 1.848630330415137, + "grad_norm": 3.965937173537493, + "learning_rate": 6.78079392739551e-06, + "loss": 0.5368, "step": 13092 }, { - "epoch": 2.74543929544978, - "grad_norm": 3.898923187637183, - "learning_rate": 3.7530170328537674e-07, - "loss": 0.1416, + "epoch": 1.848771533465123, + "grad_norm": 3.5237579735437596, + "learning_rate": 6.779350663561866e-06, + "loss": 0.5556, "step": 13093 }, { - "epoch": 2.7456489830153075, - "grad_norm": 3.9399313927751427, - "learning_rate": 3.7468755723244046e-07, - "loss": 0.12, + "epoch": 1.8489127365151088, + "grad_norm": 3.148869908137464, + "learning_rate": 6.777907474574718e-06, + "loss": 0.5379, "step": 13094 }, { - "epoch": 2.7458586705808345, - "grad_norm": 6.3920146592992655, - "learning_rate": 3.740739044934616e-07, - "loss": 0.1696, + "epoch": 1.8490539395650947, + "grad_norm": 4.022133592110588, + "learning_rate": 6.776464360467612e-06, + "loss": 0.5872, "step": 13095 }, { - "epoch": 2.746068358146362, - "grad_norm": 3.397969669856621, - "learning_rate": 3.734607450998895e-07, - "loss": 0.1066, + "epoch": 1.8491951426150806, + "grad_norm": 3.604395909089462, + "learning_rate": 6.775021321274082e-06, + "loss": 0.5331, "step": 13096 }, { - "epoch": 2.746278045711889, - "grad_norm": 3.9531536301510926, - "learning_rate": 3.7284807908315235e-07, - "loss": 0.113, + "epoch": 1.8493363456650664, + "grad_norm": 3.170710675596997, + "learning_rate": 6.773578357027663e-06, + "loss": 0.4818, "step": 13097 }, { - "epoch": 2.7464877332774167, - "grad_norm": 4.779758917615418, - "learning_rate": 3.7223590647464833e-07, - "loss": 0.1656, + "epoch": 1.8494775487150523, + "grad_norm": 3.6832256942273767, + "learning_rate": 6.772135467761889e-06, + "loss": 0.5306, "step": 13098 }, { - "epoch": 2.7466974208429438, - "grad_norm": 3.7435038922630723, - "learning_rate": 3.7162422730575133e-07, - "loss": 0.1249, + "epoch": 1.8496187517650382, + "grad_norm": 3.68538872524263, + "learning_rate": 6.770692653510298e-06, + "loss": 0.5214, "step": 13099 }, { - "epoch": 2.7469071084084713, - "grad_norm": 3.7160965670180497, - "learning_rate": 3.710130416078117e-07, - "loss": 0.115, + "epoch": 1.849759954815024, + "grad_norm": 2.884190096524447, + "learning_rate": 6.769249914306408e-06, + "loss": 0.4183, "step": 13100 }, { - "epoch": 2.747116795973999, - "grad_norm": 4.142113055418561, - "learning_rate": 3.704023494121567e-07, - "loss": 0.1099, + "epoch": 1.84990115786501, + "grad_norm": 3.9703138565317837, + "learning_rate": 6.7678072501837575e-06, + "loss": 0.6118, "step": 13101 }, { - "epoch": 2.747326483539526, - "grad_norm": 3.197186020749749, - "learning_rate": 3.6979215075008123e-07, - "loss": 0.112, + "epoch": 1.8500423609149959, + "grad_norm": 3.157783159407482, + "learning_rate": 6.766364661175872e-06, + "loss": 0.5218, "step": 13102 }, { - "epoch": 2.7475361711050534, - "grad_norm": 3.588662667123561, - "learning_rate": 3.6918244565286033e-07, - "loss": 0.1217, + "epoch": 1.8501835639649817, + "grad_norm": 3.8373369842682714, + "learning_rate": 6.7649221473162705e-06, + "loss": 0.5184, "step": 13103 }, { - "epoch": 2.747745858670581, - "grad_norm": 3.5467705783460692, - "learning_rate": 3.685732341517445e-07, - "loss": 0.1136, + "epoch": 1.8503247670149676, + "grad_norm": 4.3834388670315745, + "learning_rate": 6.763479708638485e-06, + "loss": 0.7996, "step": 13104 }, { - "epoch": 2.747955546236108, - "grad_norm": 5.1426684179322315, - "learning_rate": 3.679645162779555e-07, - "loss": 0.1472, + "epoch": 1.8504659700649535, + "grad_norm": 4.324165680170939, + "learning_rate": 6.762037345176034e-06, + "loss": 0.5653, "step": 13105 }, { - "epoch": 2.7481652338016356, - "grad_norm": 6.6765549065229575, - "learning_rate": 3.673562920626894e-07, - "loss": 0.1398, + "epoch": 1.8506071731149394, + "grad_norm": 3.2994406258364637, + "learning_rate": 6.7605950569624335e-06, + "loss": 0.4666, "step": 13106 }, { - "epoch": 2.748374921367163, - "grad_norm": 3.082389406644875, - "learning_rate": 3.6674856153712137e-07, - "loss": 0.0969, + "epoch": 1.8507483761649253, + "grad_norm": 3.7796980624048153, + "learning_rate": 6.759152844031207e-06, + "loss": 0.624, "step": 13107 }, { - "epoch": 2.74858460893269, - "grad_norm": 4.900001066246391, - "learning_rate": 3.6614132473239636e-07, - "loss": 0.1569, + "epoch": 1.8508895792149112, + "grad_norm": 3.2514149408468023, + "learning_rate": 6.757710706415872e-06, + "loss": 0.4232, "step": 13108 }, { - "epoch": 2.7487942964982177, - "grad_norm": 3.4354965307657737, - "learning_rate": 3.655345816796385e-07, - "loss": 0.1186, + "epoch": 1.851030782264897, + "grad_norm": 3.487641472513069, + "learning_rate": 6.756268644149937e-06, + "loss": 0.5797, "step": 13109 }, { - "epoch": 2.7490039840637452, - "grad_norm": 3.9565278578734144, - "learning_rate": 3.6492833240994395e-07, - "loss": 0.1305, + "epoch": 1.851171985314883, + "grad_norm": 3.307199768673497, + "learning_rate": 6.754826657266918e-06, + "loss": 0.5566, "step": 13110 }, { - "epoch": 2.7492136716292723, - "grad_norm": 5.237203355733942, - "learning_rate": 3.6432257695438235e-07, - "loss": 0.1631, + "epoch": 1.8513131883648688, + "grad_norm": 3.85226359538764, + "learning_rate": 6.753384745800323e-06, + "loss": 0.559, "step": 13111 }, { - "epoch": 2.7494233591948, - "grad_norm": 3.5204876778776804, - "learning_rate": 3.63717315344001e-07, - "loss": 0.1264, + "epoch": 1.8514543914148547, + "grad_norm": 2.8679677599527413, + "learning_rate": 6.7519429097836675e-06, + "loss": 0.3939, "step": 13112 }, { - "epoch": 2.7496330467603274, - "grad_norm": 4.89761190286342, - "learning_rate": 3.6311254760982074e-07, - "loss": 0.1841, + "epoch": 1.8515955944648406, + "grad_norm": 3.7060414432084157, + "learning_rate": 6.750501149250456e-06, + "loss": 0.5533, "step": 13113 }, { - "epoch": 2.7498427343258545, - "grad_norm": 3.1440827744403914, - "learning_rate": 3.625082737828378e-07, - "loss": 0.1168, + "epoch": 1.8517367975148264, + "grad_norm": 4.09989522133206, + "learning_rate": 6.749059464234193e-06, + "loss": 0.5577, "step": 13114 }, { - "epoch": 2.750052421891382, - "grad_norm": 4.355234658361031, - "learning_rate": 3.619044938940197e-07, - "loss": 0.1101, + "epoch": 1.8518780005648123, + "grad_norm": 3.4591118725350807, + "learning_rate": 6.747617854768384e-06, + "loss": 0.5244, "step": 13115 }, { - "epoch": 2.750262109456909, - "grad_norm": 5.427987352236284, - "learning_rate": 3.613012079743128e-07, - "loss": 0.1426, + "epoch": 1.8520192036147982, + "grad_norm": 2.9961117988610804, + "learning_rate": 6.746176320886537e-06, + "loss": 0.4481, "step": 13116 }, { - "epoch": 2.7504717970224366, - "grad_norm": 4.228338932327801, - "learning_rate": 3.606984160546378e-07, - "loss": 0.1167, + "epoch": 1.852160406664784, + "grad_norm": 3.3508685951080093, + "learning_rate": 6.74473486262214e-06, + "loss": 0.5461, "step": 13117 }, { - "epoch": 2.7506814845879637, - "grad_norm": 3.7465332245080663, - "learning_rate": 3.6009611816588685e-07, - "loss": 0.125, + "epoch": 1.85230160971477, + "grad_norm": 3.783042260360074, + "learning_rate": 6.743293480008703e-06, + "loss": 0.5308, "step": 13118 }, { - "epoch": 2.750891172153491, - "grad_norm": 4.250225679873633, - "learning_rate": 3.5949431433892846e-07, - "loss": 0.1267, + "epoch": 1.8524428127647559, + "grad_norm": 4.117493158412321, + "learning_rate": 6.7418521730797175e-06, + "loss": 0.5666, "step": 13119 }, { - "epoch": 2.7511008597190187, - "grad_norm": 3.92420688975659, - "learning_rate": 3.588930046046091e-07, - "loss": 0.133, + "epoch": 1.8525840158147417, + "grad_norm": 3.4205811816104585, + "learning_rate": 6.740410941868678e-06, + "loss": 0.4995, "step": 13120 }, { - "epoch": 2.751310547284546, - "grad_norm": 4.063127554739173, - "learning_rate": 3.58292188993743e-07, - "loss": 0.1219, + "epoch": 1.8527252188647276, + "grad_norm": 2.878786239601334, + "learning_rate": 6.738969786409084e-06, + "loss": 0.4281, "step": 13121 }, { - "epoch": 2.7515202348500734, - "grad_norm": 4.375136017281564, - "learning_rate": 3.576918675371255e-07, - "loss": 0.1196, + "epoch": 1.8528664219147135, + "grad_norm": 2.821309256526727, + "learning_rate": 6.737528706734423e-06, + "loss": 0.4784, "step": 13122 }, { - "epoch": 2.751729922415601, - "grad_norm": 4.5861848087416615, - "learning_rate": 3.5709204026552425e-07, - "loss": 0.1233, + "epoch": 1.8530076249646994, + "grad_norm": 4.11967329139101, + "learning_rate": 6.736087702878184e-06, + "loss": 0.5713, "step": 13123 }, { - "epoch": 2.751939609981128, - "grad_norm": 3.6517934292796053, - "learning_rate": 3.564927072096802e-07, - "loss": 0.122, + "epoch": 1.8531488280146853, + "grad_norm": 4.086038772971897, + "learning_rate": 6.734646774873863e-06, + "loss": 0.6332, "step": 13124 }, { - "epoch": 2.7521492975466555, - "grad_norm": 2.9779729000042923, - "learning_rate": 3.5589386840031105e-07, - "loss": 0.112, + "epoch": 1.8532900310646712, + "grad_norm": 3.2797519348146764, + "learning_rate": 6.733205922754935e-06, + "loss": 0.4412, "step": 13125 }, { - "epoch": 2.752358985112183, - "grad_norm": 5.492115655330109, - "learning_rate": 3.5529552386810773e-07, - "loss": 0.1505, + "epoch": 1.8534312341146568, + "grad_norm": 3.2819511546815336, + "learning_rate": 6.731765146554891e-06, + "loss": 0.5208, "step": 13126 }, { - "epoch": 2.75256867267771, - "grad_norm": 3.271788870604396, - "learning_rate": 3.5469767364373577e-07, - "loss": 0.097, + "epoch": 1.8535724371646427, + "grad_norm": 4.237105433611935, + "learning_rate": 6.730324446307217e-06, + "loss": 0.771, "step": 13127 }, { - "epoch": 2.7527783602432376, - "grad_norm": 7.163102690944309, - "learning_rate": 3.5410031775783727e-07, - "loss": 0.1506, + "epoch": 1.8537136402146286, + "grad_norm": 3.647457081672026, + "learning_rate": 6.728883822045389e-06, + "loss": 0.5972, "step": 13128 }, { - "epoch": 2.752988047808765, - "grad_norm": 4.274985455413665, - "learning_rate": 3.5350345624102886e-07, - "loss": 0.1164, + "epoch": 1.8538548432646145, + "grad_norm": 3.940859323027391, + "learning_rate": 6.727443273802885e-06, + "loss": 0.6026, "step": 13129 }, { - "epoch": 2.7531977353742922, - "grad_norm": 3.102493697348989, - "learning_rate": 3.529070891238984e-07, - "loss": 0.0944, + "epoch": 1.8539960463146004, + "grad_norm": 2.9861249254050892, + "learning_rate": 6.726002801613189e-06, + "loss": 0.427, "step": 13130 }, { - "epoch": 2.7534074229398198, - "grad_norm": 5.902044342160964, - "learning_rate": 3.5231121643701127e-07, - "loss": 0.1502, + "epoch": 1.8541372493645862, + "grad_norm": 3.3952705695280323, + "learning_rate": 6.724562405509775e-06, + "loss": 0.5192, "step": 13131 }, { - "epoch": 2.7536171105053473, - "grad_norm": 4.372388953363877, - "learning_rate": 3.5171583821090873e-07, - "loss": 0.1171, + "epoch": 1.8542784524145721, + "grad_norm": 3.609157372697066, + "learning_rate": 6.723122085526113e-06, + "loss": 0.5409, "step": 13132 }, { - "epoch": 2.7538267980708744, - "grad_norm": 5.440613931656129, - "learning_rate": 3.511209544761018e-07, - "loss": 0.1643, + "epoch": 1.854419655464558, + "grad_norm": 4.113757548739018, + "learning_rate": 6.721681841695684e-06, + "loss": 0.6379, "step": 13133 }, { - "epoch": 2.754036485636402, - "grad_norm": 4.084115200852558, - "learning_rate": 3.505265652630818e-07, - "loss": 0.1381, + "epoch": 1.8545608585145439, + "grad_norm": 3.1597205351500808, + "learning_rate": 6.720241674051948e-06, + "loss": 0.4846, "step": 13134 }, { - "epoch": 2.754246173201929, - "grad_norm": 4.1777709926562725, - "learning_rate": 3.4993267060231206e-07, - "loss": 0.1338, + "epoch": 1.8547020615645298, + "grad_norm": 3.2350819632305052, + "learning_rate": 6.718801582628382e-06, + "loss": 0.4667, "step": 13135 }, { - "epoch": 2.7544558607674565, - "grad_norm": 4.6997194254327805, - "learning_rate": 3.4933927052422936e-07, - "loss": 0.1102, + "epoch": 1.8548432646145157, + "grad_norm": 3.5708744269335746, + "learning_rate": 6.717361567458449e-06, + "loss": 0.5791, "step": 13136 }, { - "epoch": 2.7546655483329836, - "grad_norm": 3.562132076821083, - "learning_rate": 3.487463650592471e-07, - "loss": 0.1091, + "epoch": 1.8549844676645015, + "grad_norm": 2.9508234371242708, + "learning_rate": 6.7159216285756136e-06, + "loss": 0.4636, "step": 13137 }, { - "epoch": 2.754875235898511, - "grad_norm": 3.860834567962666, - "learning_rate": 3.481539542377532e-07, - "loss": 0.1294, + "epoch": 1.8551256707144874, + "grad_norm": 3.4258585562427566, + "learning_rate": 6.714481766013343e-06, + "loss": 0.4654, "step": 13138 }, { - "epoch": 2.7550849234640387, - "grad_norm": 5.435384765939473, - "learning_rate": 3.475620380901101e-07, - "loss": 0.1501, + "epoch": 1.8552668737644733, + "grad_norm": 3.7640246918880753, + "learning_rate": 6.713041979805098e-06, + "loss": 0.4919, "step": 13139 }, { - "epoch": 2.7552946110295657, - "grad_norm": 3.9516121408780593, - "learning_rate": 3.4697061664665354e-07, - "loss": 0.1336, + "epoch": 1.8554080768144592, + "grad_norm": 3.5634126378827338, + "learning_rate": 6.711602269984339e-06, + "loss": 0.5251, "step": 13140 }, { - "epoch": 2.7555042985950933, - "grad_norm": 4.011476632573279, - "learning_rate": 3.463796899376948e-07, - "loss": 0.1416, + "epoch": 1.855549279864445, + "grad_norm": 3.1346662329080135, + "learning_rate": 6.710162636584523e-06, + "loss": 0.4941, "step": 13141 }, { - "epoch": 2.755713986160621, - "grad_norm": 4.517091721202496, - "learning_rate": 3.457892579935218e-07, - "loss": 0.1137, + "epoch": 1.855690482914431, + "grad_norm": 4.740619663834005, + "learning_rate": 6.7087230796391035e-06, + "loss": 0.5694, "step": 13142 }, { - "epoch": 2.755923673726148, - "grad_norm": 4.630549011987817, - "learning_rate": 3.4519932084439154e-07, - "loss": 0.1625, + "epoch": 1.8558316859644168, + "grad_norm": 3.4944571963537348, + "learning_rate": 6.707283599181539e-06, + "loss": 0.4434, "step": 13143 }, { - "epoch": 2.7561333612916754, - "grad_norm": 3.5814864642386772, - "learning_rate": 3.446098785205432e-07, - "loss": 0.1199, + "epoch": 1.8559728890144027, + "grad_norm": 3.0204521163123625, + "learning_rate": 6.705844195245283e-06, + "loss": 0.4872, "step": 13144 }, { - "epoch": 2.756343048857203, - "grad_norm": 5.190948645736391, - "learning_rate": 3.440209310521825e-07, - "loss": 0.1736, + "epoch": 1.8561140920643886, + "grad_norm": 4.191052787416471, + "learning_rate": 6.704404867863785e-06, + "loss": 0.5735, "step": 13145 }, { - "epoch": 2.75655273642273, - "grad_norm": 3.3138893189654897, - "learning_rate": 3.4343247846949976e-07, - "loss": 0.1143, + "epoch": 1.8562552951143745, + "grad_norm": 2.9452209389480477, + "learning_rate": 6.702965617070492e-06, + "loss": 0.4326, "step": 13146 }, { - "epoch": 2.7567624239882575, - "grad_norm": 3.583057535002732, - "learning_rate": 3.4284452080264985e-07, - "loss": 0.1269, + "epoch": 1.8563964981643604, + "grad_norm": 3.6101116522698873, + "learning_rate": 6.701526442898855e-06, + "loss": 0.6204, "step": 13147 }, { - "epoch": 2.756972111553785, - "grad_norm": 4.282476344919575, - "learning_rate": 3.4225705808176633e-07, - "loss": 0.1437, + "epoch": 1.8565377012143462, + "grad_norm": 3.100857695475702, + "learning_rate": 6.7000873453823225e-06, + "loss": 0.4886, "step": 13148 }, { - "epoch": 2.757181799119312, - "grad_norm": 3.794478606518034, - "learning_rate": 3.4167009033695963e-07, - "loss": 0.094, + "epoch": 1.8566789042643321, + "grad_norm": 3.2494731154632506, + "learning_rate": 6.698648324554331e-06, + "loss": 0.488, "step": 13149 }, { - "epoch": 2.7573914866848397, - "grad_norm": 6.620890247824128, - "learning_rate": 3.4108361759831345e-07, - "loss": 0.1573, + "epoch": 1.856820107314318, + "grad_norm": 3.5855612556035656, + "learning_rate": 6.697209380448333e-06, + "loss": 0.5552, "step": 13150 }, { - "epoch": 2.757601174250367, - "grad_norm": 3.8967377074189216, - "learning_rate": 3.404976398958837e-07, - "loss": 0.1208, + "epoch": 1.856961310364304, + "grad_norm": 3.7446187345342334, + "learning_rate": 6.695770513097756e-06, + "loss": 0.5325, "step": 13151 }, { - "epoch": 2.7578108618158943, - "grad_norm": 4.651315657966747, - "learning_rate": 3.399121572597031e-07, - "loss": 0.128, + "epoch": 1.8571025134142898, + "grad_norm": 3.6266193772049435, + "learning_rate": 6.6943317225360474e-06, + "loss": 0.5474, "step": 13152 }, { - "epoch": 2.758020549381422, - "grad_norm": 4.711174221559803, - "learning_rate": 3.3932716971977976e-07, - "loss": 0.1253, + "epoch": 1.8572437164642757, + "grad_norm": 3.3781978850750596, + "learning_rate": 6.692893008796643e-06, + "loss": 0.5177, "step": 13153 }, { - "epoch": 2.758230236946949, - "grad_norm": 4.4749655339351255, - "learning_rate": 3.3874267730609314e-07, - "loss": 0.163, + "epoch": 1.8573849195142615, + "grad_norm": 3.711356342954198, + "learning_rate": 6.691454371912974e-06, + "loss": 0.5232, "step": 13154 }, { - "epoch": 2.7584399245124764, - "grad_norm": 5.135842414627989, - "learning_rate": 3.381586800486025e-07, - "loss": 0.1334, + "epoch": 1.8575261225642474, + "grad_norm": 3.831444173766949, + "learning_rate": 6.690015811918478e-06, + "loss": 0.5045, "step": 13155 }, { - "epoch": 2.7586496120780035, - "grad_norm": 3.933703983546546, - "learning_rate": 3.375751779772374e-07, - "loss": 0.1396, + "epoch": 1.8576673256142333, + "grad_norm": 4.224663475850468, + "learning_rate": 6.688577328846586e-06, + "loss": 0.6206, "step": 13156 }, { - "epoch": 2.758859299643531, - "grad_norm": 4.354213085627745, - "learning_rate": 3.3699217112190153e-07, - "loss": 0.1366, + "epoch": 1.8578085286642192, + "grad_norm": 3.798245443093454, + "learning_rate": 6.687138922730726e-06, + "loss": 0.5331, "step": 13157 }, { - "epoch": 2.7590689872090586, - "grad_norm": 4.499426226080546, - "learning_rate": 3.364096595124766e-07, - "loss": 0.1251, + "epoch": 1.857949731714205, + "grad_norm": 4.443212577118079, + "learning_rate": 6.685700593604329e-06, + "loss": 0.6278, "step": 13158 }, { - "epoch": 2.7592786747745857, - "grad_norm": 4.374934836204621, - "learning_rate": 3.3582764317881986e-07, - "loss": 0.1322, + "epoch": 1.858090934764191, + "grad_norm": 4.097600876465136, + "learning_rate": 6.684262341500818e-06, + "loss": 0.5878, "step": 13159 }, { - "epoch": 2.759488362340113, - "grad_norm": 3.9063781644538143, - "learning_rate": 3.3524612215075524e-07, - "loss": 0.1419, + "epoch": 1.8582321378141766, + "grad_norm": 3.8422551657040183, + "learning_rate": 6.6828241664536145e-06, + "loss": 0.4856, "step": 13160 }, { - "epoch": 2.7596980499056407, - "grad_norm": 5.475052792974205, - "learning_rate": 3.3466509645809e-07, - "loss": 0.1405, + "epoch": 1.8583733408641625, + "grad_norm": 4.445297057837859, + "learning_rate": 6.681386068496147e-06, + "loss": 0.6913, "step": 13161 }, { - "epoch": 2.759907737471168, - "grad_norm": 4.366633109627006, - "learning_rate": 3.3408456613060245e-07, - "loss": 0.1328, + "epoch": 1.8585145439141484, + "grad_norm": 3.831504516883622, + "learning_rate": 6.679948047661835e-06, + "loss": 0.6514, "step": 13162 }, { - "epoch": 2.7601174250366953, - "grad_norm": 6.7125982497515375, - "learning_rate": 3.335045311980467e-07, - "loss": 0.2084, + "epoch": 1.8586557469641343, + "grad_norm": 4.173824116729072, + "learning_rate": 6.678510103984095e-06, + "loss": 0.6778, "step": 13163 }, { - "epoch": 2.760327112602223, - "grad_norm": 3.876246424795854, - "learning_rate": 3.329249916901478e-07, - "loss": 0.1327, + "epoch": 1.8587969500141202, + "grad_norm": 4.4564242170252975, + "learning_rate": 6.677072237496347e-06, + "loss": 0.6755, "step": 13164 }, { - "epoch": 2.76053680016775, - "grad_norm": 3.5167242501898626, - "learning_rate": 3.323459476366109e-07, - "loss": 0.1036, + "epoch": 1.858938153064106, + "grad_norm": 2.818912871538394, + "learning_rate": 6.6756344482320046e-06, + "loss": 0.3994, "step": 13165 }, { - "epoch": 2.7607464877332775, - "grad_norm": 6.586141742223875, - "learning_rate": 3.317673990671111e-07, - "loss": 0.1746, + "epoch": 1.859079356114092, + "grad_norm": 3.605395629625173, + "learning_rate": 6.674196736224481e-06, + "loss": 0.5349, "step": 13166 }, { - "epoch": 2.760956175298805, - "grad_norm": 3.8549413323907205, - "learning_rate": 3.311893460113014e-07, - "loss": 0.1316, + "epoch": 1.8592205591640778, + "grad_norm": 3.797792206410664, + "learning_rate": 6.672759101507194e-06, + "loss": 0.5109, "step": 13167 }, { - "epoch": 2.761165862864332, - "grad_norm": 4.710660994142496, - "learning_rate": 3.3061178849880695e-07, - "loss": 0.1774, + "epoch": 1.8593617622140637, + "grad_norm": 2.9021734759222766, + "learning_rate": 6.6713215441135424e-06, + "loss": 0.4372, "step": 13168 }, { - "epoch": 2.7613755504298596, - "grad_norm": 4.45990056994574, - "learning_rate": 3.300347265592285e-07, - "loss": 0.1628, + "epoch": 1.8595029652640496, + "grad_norm": 2.7199984933934798, + "learning_rate": 6.669884064076944e-06, + "loss": 0.4133, "step": 13169 }, { - "epoch": 2.761585237995387, - "grad_norm": 4.370817919551644, - "learning_rate": 3.2945816022214247e-07, - "loss": 0.1406, + "epoch": 1.8596441683140355, + "grad_norm": 3.390475526353319, + "learning_rate": 6.668446661430801e-06, + "loss": 0.5075, "step": 13170 }, { - "epoch": 2.761794925560914, - "grad_norm": 3.2183257385117368, - "learning_rate": 3.2888208951709966e-07, - "loss": 0.1176, + "epoch": 1.8597853713640213, + "grad_norm": 3.53192668759502, + "learning_rate": 6.667009336208519e-06, + "loss": 0.6084, "step": 13171 }, { - "epoch": 2.7620046131264417, - "grad_norm": 3.627993035052207, - "learning_rate": 3.2830651447362304e-07, - "loss": 0.1095, + "epoch": 1.8599265744140072, + "grad_norm": 5.113660762805377, + "learning_rate": 6.665572088443497e-06, + "loss": 0.5475, "step": 13172 }, { - "epoch": 2.762214300691969, - "grad_norm": 4.313726478601724, - "learning_rate": 3.277314351212102e-07, - "loss": 0.1335, + "epoch": 1.860067777463993, + "grad_norm": 3.6827022856035554, + "learning_rate": 6.664134918169142e-06, + "loss": 0.5938, "step": 13173 }, { - "epoch": 2.7624239882574964, - "grad_norm": 3.8086468137810288, - "learning_rate": 3.2715685148933976e-07, - "loss": 0.1191, + "epoch": 1.860208980513979, + "grad_norm": 3.807741885754998, + "learning_rate": 6.662697825418853e-06, + "loss": 0.5218, "step": 13174 }, { - "epoch": 2.7626336758230234, - "grad_norm": 3.009829538989232, - "learning_rate": 3.265827636074548e-07, - "loss": 0.081, + "epoch": 1.8603501835639649, + "grad_norm": 2.7613123424349144, + "learning_rate": 6.6612608102260265e-06, + "loss": 0.4454, "step": 13175 }, { - "epoch": 2.762843363388551, - "grad_norm": 4.338289492860503, - "learning_rate": 3.260091715049829e-07, - "loss": 0.1237, + "epoch": 1.8604913866139507, + "grad_norm": 3.1601724704165366, + "learning_rate": 6.659823872624054e-06, + "loss": 0.4343, "step": 13176 }, { - "epoch": 2.7630530509540785, - "grad_norm": 4.242690293121193, - "learning_rate": 3.2543607521131835e-07, - "loss": 0.1333, + "epoch": 1.8606325896639366, + "grad_norm": 4.42148415440738, + "learning_rate": 6.65838701264633e-06, + "loss": 0.5935, "step": 13177 }, { - "epoch": 2.7632627385196056, - "grad_norm": 4.337602066338219, - "learning_rate": 3.248634747558366e-07, - "loss": 0.1307, + "epoch": 1.8607737927139225, + "grad_norm": 4.044355040478655, + "learning_rate": 6.656950230326251e-06, + "loss": 0.567, "step": 13178 }, { - "epoch": 2.763472426085133, - "grad_norm": 5.4042180155193495, - "learning_rate": 3.242913701678796e-07, - "loss": 0.1744, + "epoch": 1.8609149957639084, + "grad_norm": 3.210090182292342, + "learning_rate": 6.655513525697206e-06, + "loss": 0.4883, "step": 13179 }, { - "epoch": 2.7636821136506606, - "grad_norm": 4.447769647449593, - "learning_rate": 3.2371976147677396e-07, - "loss": 0.1477, + "epoch": 1.8610561988138943, + "grad_norm": 3.6765869524824324, + "learning_rate": 6.65407689879258e-06, + "loss": 0.6429, "step": 13180 }, { - "epoch": 2.7638918012161877, - "grad_norm": 3.8935621359356585, - "learning_rate": 3.23148648711813e-07, - "loss": 0.1319, + "epoch": 1.8611974018638802, + "grad_norm": 3.3801477984714197, + "learning_rate": 6.652640349645763e-06, + "loss": 0.4766, "step": 13181 }, { - "epoch": 2.7641014887817152, - "grad_norm": 4.072471285733634, - "learning_rate": 3.2257803190226534e-07, - "loss": 0.1161, + "epoch": 1.861338604913866, + "grad_norm": 3.741155500066469, + "learning_rate": 6.651203878290139e-06, + "loss": 0.5556, "step": 13182 }, { - "epoch": 2.7643111763472428, - "grad_norm": 4.554949117515458, - "learning_rate": 3.2200791107737884e-07, - "loss": 0.1426, + "epoch": 1.861479807963852, + "grad_norm": 4.429531478640785, + "learning_rate": 6.649767484759091e-06, + "loss": 0.5864, "step": 13183 }, { - "epoch": 2.76452086391277, - "grad_norm": 3.8996627795410435, - "learning_rate": 3.2143828626637564e-07, - "loss": 0.1214, + "epoch": 1.8616210110138378, + "grad_norm": 3.1326312882872607, + "learning_rate": 6.648331169086002e-06, + "loss": 0.4857, "step": 13184 }, { - "epoch": 2.7647305514782974, - "grad_norm": 3.2681467738030805, - "learning_rate": 3.2086915749844347e-07, - "loss": 0.0944, + "epoch": 1.8617622140638237, + "grad_norm": 3.012129498114597, + "learning_rate": 6.646894931304244e-06, + "loss": 0.5548, "step": 13185 }, { - "epoch": 2.764940239043825, - "grad_norm": 3.6782714882792775, - "learning_rate": 3.2030052480275464e-07, - "loss": 0.0849, + "epoch": 1.8619034171138096, + "grad_norm": 4.585220395427646, + "learning_rate": 6.645458771447202e-06, + "loss": 0.7124, "step": 13186 }, { - "epoch": 2.765149926609352, - "grad_norm": 3.9787891303993788, - "learning_rate": 3.1973238820845465e-07, - "loss": 0.1022, + "epoch": 1.8620446201637955, + "grad_norm": 4.129407792413521, + "learning_rate": 6.64402268954825e-06, + "loss": 0.7036, "step": 13187 }, { - "epoch": 2.7653596141748795, - "grad_norm": 5.513086441261112, - "learning_rate": 3.1916474774465797e-07, - "loss": 0.1556, + "epoch": 1.8621858232137813, + "grad_norm": 2.8541716504993153, + "learning_rate": 6.642586685640761e-06, + "loss": 0.4912, "step": 13188 }, { - "epoch": 2.765569301740407, - "grad_norm": 4.42266094987165, - "learning_rate": 3.185976034404581e-07, - "loss": 0.1327, + "epoch": 1.8623270262637672, + "grad_norm": 4.233359860466865, + "learning_rate": 6.641150759758106e-06, + "loss": 0.6204, "step": 13189 }, { - "epoch": 2.765778989305934, - "grad_norm": 2.774821760792061, - "learning_rate": 3.1803095532492387e-07, - "loss": 0.0775, + "epoch": 1.862468229313753, + "grad_norm": 2.9332293809772767, + "learning_rate": 6.639714911933658e-06, + "loss": 0.4589, "step": 13190 }, { - "epoch": 2.7659886768714617, - "grad_norm": 4.7093929105051355, - "learning_rate": 3.174648034270944e-07, - "loss": 0.1502, + "epoch": 1.862609432363739, + "grad_norm": 3.8358346673957215, + "learning_rate": 6.638279142200785e-06, + "loss": 0.6357, "step": 13191 }, { - "epoch": 2.7661983644369887, - "grad_norm": 5.950973909952062, - "learning_rate": 3.168991477759886e-07, - "loss": 0.1964, + "epoch": 1.8627506354137249, + "grad_norm": 3.4156955428229114, + "learning_rate": 6.636843450592854e-06, + "loss": 0.5747, "step": 13192 }, { - "epoch": 2.7664080520025163, - "grad_norm": 4.124575489567756, - "learning_rate": 3.163339884005956e-07, - "loss": 0.1224, + "epoch": 1.8628918384637108, + "grad_norm": 3.2224823042488278, + "learning_rate": 6.635407837143228e-06, + "loss": 0.4613, "step": 13193 }, { - "epoch": 2.7666177395680434, - "grad_norm": 5.701105732450794, - "learning_rate": 3.157693253298799e-07, - "loss": 0.151, + "epoch": 1.8630330415136966, + "grad_norm": 3.2379700706519303, + "learning_rate": 6.633972301885268e-06, + "loss": 0.5676, "step": 13194 }, { - "epoch": 2.766827427133571, - "grad_norm": 4.971578767651743, - "learning_rate": 3.15205158592784e-07, - "loss": 0.1753, + "epoch": 1.8631742445636825, + "grad_norm": 3.7897943448465283, + "learning_rate": 6.6325368448523395e-06, + "loss": 0.553, "step": 13195 }, { - "epoch": 2.7670371146990984, - "grad_norm": 4.406002302237589, - "learning_rate": 3.146414882182214e-07, - "loss": 0.1239, + "epoch": 1.8633154476136684, + "grad_norm": 3.0750355700643217, + "learning_rate": 6.631101466077801e-06, + "loss": 0.4373, "step": 13196 }, { - "epoch": 2.7672468022646255, - "grad_norm": 5.1151282844597, - "learning_rate": 3.1407831423508006e-07, - "loss": 0.1418, + "epoch": 1.8634566506636543, + "grad_norm": 3.232610584690483, + "learning_rate": 6.629666165595008e-06, + "loss": 0.4728, "step": 13197 }, { - "epoch": 2.767456489830153, - "grad_norm": 3.9696552442640556, - "learning_rate": 3.1351563667222474e-07, - "loss": 0.1536, + "epoch": 1.8635978537136402, + "grad_norm": 3.186165804370411, + "learning_rate": 6.628230943437319e-06, + "loss": 0.4368, "step": 13198 }, { - "epoch": 2.7676661773956805, - "grad_norm": 4.228064521135329, - "learning_rate": 3.1295345555849344e-07, - "loss": 0.1322, + "epoch": 1.863739056763626, + "grad_norm": 3.9927214798940467, + "learning_rate": 6.626795799638087e-06, + "loss": 0.6458, "step": 13199 }, { - "epoch": 2.7678758649612076, - "grad_norm": 4.404335508896068, - "learning_rate": 3.123917709226987e-07, - "loss": 0.1375, + "epoch": 1.863880259813612, + "grad_norm": 3.738156165141249, + "learning_rate": 6.625360734230663e-06, + "loss": 0.5925, "step": 13200 }, { - "epoch": 2.768085552526735, - "grad_norm": 4.354174268486226, - "learning_rate": 3.118305827936285e-07, - "loss": 0.1489, + "epoch": 1.8640214628635978, + "grad_norm": 3.0906346877749895, + "learning_rate": 6.623925747248403e-06, + "loss": 0.4898, "step": 13201 }, { - "epoch": 2.7682952400922627, - "grad_norm": 3.7798264144815943, - "learning_rate": 3.112698912000445e-07, - "loss": 0.1124, + "epoch": 1.8641626659135837, + "grad_norm": 3.7464767933131897, + "learning_rate": 6.6224908387246466e-06, + "loss": 0.6464, "step": 13202 }, { - "epoch": 2.7685049276577898, - "grad_norm": 4.033037960351409, - "learning_rate": 3.1070969617068123e-07, - "loss": 0.108, + "epoch": 1.8643038689635696, + "grad_norm": 4.629532250194823, + "learning_rate": 6.621056008692741e-06, + "loss": 0.6588, "step": 13203 }, { - "epoch": 2.7687146152233173, - "grad_norm": 4.606385702313986, - "learning_rate": 3.1014999773425257e-07, - "loss": 0.1498, + "epoch": 1.8644450720135555, + "grad_norm": 3.1660357845161196, + "learning_rate": 6.619621257186039e-06, + "loss": 0.5372, "step": 13204 }, { - "epoch": 2.768924302788845, - "grad_norm": 3.9903196210433194, - "learning_rate": 3.095907959194433e-07, - "loss": 0.1197, + "epoch": 1.8645862750635414, + "grad_norm": 3.5775661221857926, + "learning_rate": 6.618186584237878e-06, + "loss": 0.6156, "step": 13205 }, { - "epoch": 2.769133990354372, - "grad_norm": 5.000950281356826, - "learning_rate": 3.0903209075491046e-07, - "loss": 0.1409, + "epoch": 1.8647274781135272, + "grad_norm": 3.3921242346085343, + "learning_rate": 6.616751989881598e-06, + "loss": 0.5354, "step": 13206 }, { - "epoch": 2.7693436779198994, - "grad_norm": 4.097114424889244, - "learning_rate": 3.084738822692923e-07, - "loss": 0.1381, + "epoch": 1.8648686811635131, + "grad_norm": 3.517663575738852, + "learning_rate": 6.6153174741505445e-06, + "loss": 0.5172, "step": 13207 }, { - "epoch": 2.769553365485427, - "grad_norm": 4.2408223082803405, - "learning_rate": 3.0791617049119816e-07, - "loss": 0.1668, + "epoch": 1.865009884213499, + "grad_norm": 3.7536978653964606, + "learning_rate": 6.613883037078048e-06, + "loss": 0.4841, "step": 13208 }, { - "epoch": 2.769763053050954, - "grad_norm": 4.701739691559669, - "learning_rate": 3.0735895544920957e-07, - "loss": 0.161, + "epoch": 1.8651510872634849, + "grad_norm": 4.409619305926845, + "learning_rate": 6.612448678697452e-06, + "loss": 0.6553, "step": 13209 }, { - "epoch": 2.7699727406164816, - "grad_norm": 4.374463970052749, - "learning_rate": 3.0680223717188483e-07, - "loss": 0.1005, + "epoch": 1.8652922903134708, + "grad_norm": 3.817908493054709, + "learning_rate": 6.6110143990420824e-06, + "loss": 0.5406, "step": 13210 }, { - "epoch": 2.7701824281820087, - "grad_norm": 3.731434750810305, - "learning_rate": 3.062460156877578e-07, - "loss": 0.121, + "epoch": 1.8654334933634567, + "grad_norm": 3.0210053265079004, + "learning_rate": 6.6095801981452735e-06, + "loss": 0.4269, "step": 13211 }, { - "epoch": 2.770392115747536, - "grad_norm": 3.6962792969628846, - "learning_rate": 3.056902910253345e-07, - "loss": 0.1008, + "epoch": 1.8655746964134425, + "grad_norm": 4.141665234498497, + "learning_rate": 6.608146076040358e-06, + "loss": 0.611, "step": 13212 }, { - "epoch": 2.7706018033130633, - "grad_norm": 3.575788771358574, - "learning_rate": 3.0513506321310007e-07, - "loss": 0.1198, + "epoch": 1.8657158994634284, + "grad_norm": 2.929247158395267, + "learning_rate": 6.606712032760663e-06, + "loss": 0.4732, "step": 13213 }, { - "epoch": 2.770811490878591, - "grad_norm": 6.130992160026336, - "learning_rate": 3.0458033227950714e-07, - "loss": 0.1674, + "epoch": 1.8658571025134143, + "grad_norm": 3.6540823968137395, + "learning_rate": 6.605278068339516e-06, + "loss": 0.5445, "step": 13214 }, { - "epoch": 2.7710211784441183, - "grad_norm": 4.110630643355918, - "learning_rate": 3.0402609825298744e-07, - "loss": 0.1338, + "epoch": 1.8659983055634002, + "grad_norm": 3.3519448042672133, + "learning_rate": 6.603844182810238e-06, + "loss": 0.4589, "step": 13215 }, { - "epoch": 2.7712308660096454, - "grad_norm": 4.421286954444065, - "learning_rate": 3.0347236116194724e-07, - "loss": 0.1642, + "epoch": 1.866139508613386, + "grad_norm": 3.2319730611865514, + "learning_rate": 6.602410376206154e-06, + "loss": 0.4312, "step": 13216 }, { - "epoch": 2.771440553575173, - "grad_norm": 3.642610598744105, - "learning_rate": 3.0291912103476705e-07, - "loss": 0.1415, + "epoch": 1.866280711663372, + "grad_norm": 3.0712457750424136, + "learning_rate": 6.600976648560588e-06, + "loss": 0.5097, "step": 13217 }, { - "epoch": 2.7716502411407005, - "grad_norm": 4.492624602509829, - "learning_rate": 3.0236637789980094e-07, - "loss": 0.1837, + "epoch": 1.8664219147133578, + "grad_norm": 3.6400458983703703, + "learning_rate": 6.599542999906858e-06, + "loss": 0.5132, "step": 13218 }, { - "epoch": 2.7718599287062275, - "grad_norm": 4.084580975000896, - "learning_rate": 3.0181413178537623e-07, - "loss": 0.1328, + "epoch": 1.8665631177633437, + "grad_norm": 3.528596644044079, + "learning_rate": 6.598109430278279e-06, + "loss": 0.5947, "step": 13219 }, { - "epoch": 2.772069616271755, - "grad_norm": 4.221260181438162, - "learning_rate": 3.012623827197991e-07, - "loss": 0.1304, + "epoch": 1.8667043208133296, + "grad_norm": 3.7707860703714697, + "learning_rate": 6.596675939708166e-06, + "loss": 0.6011, "step": 13220 }, { - "epoch": 2.7722793038372826, - "grad_norm": 2.6678786441456386, - "learning_rate": 3.0071113073134595e-07, - "loss": 0.0824, + "epoch": 1.8668455238633155, + "grad_norm": 3.3337937392635593, + "learning_rate": 6.595242528229835e-06, + "loss": 0.4915, "step": 13221 }, { - "epoch": 2.7724889914028097, - "grad_norm": 4.386237886871211, - "learning_rate": 3.001603758482696e-07, - "loss": 0.1636, + "epoch": 1.8669867269133014, + "grad_norm": 4.500622294597003, + "learning_rate": 6.593809195876597e-06, + "loss": 0.6355, "step": 13222 }, { - "epoch": 2.772698678968337, - "grad_norm": 5.483496072159809, - "learning_rate": 2.996101180987987e-07, - "loss": 0.1284, + "epoch": 1.8671279299632872, + "grad_norm": 3.746844852212628, + "learning_rate": 6.5923759426817615e-06, + "loss": 0.4833, "step": 13223 }, { - "epoch": 2.7729083665338647, - "grad_norm": 4.901220317174622, - "learning_rate": 2.990603575111328e-07, - "loss": 0.1454, + "epoch": 1.8672691330132731, + "grad_norm": 3.5216631159715597, + "learning_rate": 6.5909427686786386e-06, + "loss": 0.5094, "step": 13224 }, { - "epoch": 2.773118054099392, - "grad_norm": 4.096252670016384, - "learning_rate": 2.9851109411344946e-07, - "loss": 0.1577, + "epoch": 1.867410336063259, + "grad_norm": 3.7665884273595895, + "learning_rate": 6.589509673900534e-06, + "loss": 0.5914, "step": 13225 }, { - "epoch": 2.7733277416649194, - "grad_norm": 3.1037193506652656, - "learning_rate": 2.979623279338983e-07, - "loss": 0.0851, + "epoch": 1.867551539113245, + "grad_norm": 3.835721010215854, + "learning_rate": 6.588076658380754e-06, + "loss": 0.4735, "step": 13226 }, { - "epoch": 2.773537429230447, - "grad_norm": 4.193902508319986, - "learning_rate": 2.974140590006058e-07, - "loss": 0.105, + "epoch": 1.8676927421632308, + "grad_norm": 3.7155716989122833, + "learning_rate": 6.586643722152597e-06, + "loss": 0.508, "step": 13227 }, { - "epoch": 2.773747116795974, - "grad_norm": 5.239245632473883, - "learning_rate": 2.9686628734166944e-07, - "loss": 0.1914, + "epoch": 1.8678339452132167, + "grad_norm": 4.089702456313327, + "learning_rate": 6.585210865249363e-06, + "loss": 0.5398, "step": 13228 }, { - "epoch": 2.7739568043615015, - "grad_norm": 3.965351670363004, - "learning_rate": 2.963190129851667e-07, - "loss": 0.1016, + "epoch": 1.8679751482632025, + "grad_norm": 4.527129741516544, + "learning_rate": 6.5837780877043555e-06, + "loss": 0.6788, "step": 13229 }, { - "epoch": 2.7741664919270286, - "grad_norm": 3.810717325074233, - "learning_rate": 2.957722359591453e-07, - "loss": 0.1267, + "epoch": 1.8681163513131884, + "grad_norm": 5.550468427981594, + "learning_rate": 6.58234538955087e-06, + "loss": 0.8167, "step": 13230 }, { - "epoch": 2.774376179492556, - "grad_norm": 4.540066015976268, - "learning_rate": 2.95225956291626e-07, - "loss": 0.1536, + "epoch": 1.8682575543631743, + "grad_norm": 3.834967983788457, + "learning_rate": 6.580912770822203e-06, + "loss": 0.5485, "step": 13231 }, { - "epoch": 2.774585867058083, - "grad_norm": 5.6886808235922155, - "learning_rate": 2.9468017401061087e-07, - "loss": 0.1417, + "epoch": 1.8683987574131602, + "grad_norm": 3.7253343281102023, + "learning_rate": 6.579480231551644e-06, + "loss": 0.5816, "step": 13232 }, { - "epoch": 2.7747955546236107, - "grad_norm": 3.6858062081808116, - "learning_rate": 2.941348891440676e-07, - "loss": 0.1103, + "epoch": 1.868539960463146, + "grad_norm": 4.171048777463959, + "learning_rate": 6.578047771772489e-06, + "loss": 0.5228, "step": 13233 }, { - "epoch": 2.7750052421891382, - "grad_norm": 2.8136004089160527, - "learning_rate": 2.9359010171994605e-07, - "loss": 0.0968, + "epoch": 1.868681163513132, + "grad_norm": 3.4376079327021665, + "learning_rate": 6.576615391518026e-06, + "loss": 0.507, "step": 13234 }, { - "epoch": 2.7752149297546653, - "grad_norm": 3.7777329352138147, - "learning_rate": 2.930458117661683e-07, - "loss": 0.1353, + "epoch": 1.8688223665631178, + "grad_norm": 3.0862798114155856, + "learning_rate": 6.5751830908215445e-06, + "loss": 0.5099, "step": 13235 }, { - "epoch": 2.775424617320193, - "grad_norm": 3.324504828568484, - "learning_rate": 2.925020193106265e-07, - "loss": 0.116, + "epoch": 1.8689635696131037, + "grad_norm": 3.995719063479375, + "learning_rate": 6.573750869716327e-06, + "loss": 0.5899, "step": 13236 }, { - "epoch": 2.7756343048857204, - "grad_norm": 4.8591723309736485, - "learning_rate": 2.919587243811928e-07, - "loss": 0.1537, + "epoch": 1.8691047726630896, + "grad_norm": 4.053227446274229, + "learning_rate": 6.572318728235658e-06, + "loss": 0.5047, "step": 13237 }, { - "epoch": 2.7758439924512475, - "grad_norm": 5.177073868079106, - "learning_rate": 2.9141592700571265e-07, - "loss": 0.1665, + "epoch": 1.8692459757130755, + "grad_norm": 3.3189551282305816, + "learning_rate": 6.570886666412823e-06, + "loss": 0.4606, "step": 13238 }, { - "epoch": 2.776053680016775, - "grad_norm": 5.356782641086413, - "learning_rate": 2.908736272120061e-07, - "loss": 0.1497, + "epoch": 1.8693871787630614, + "grad_norm": 3.80614266736162, + "learning_rate": 6.569454684281102e-06, + "loss": 0.5826, "step": 13239 }, { - "epoch": 2.7762633675823025, - "grad_norm": 5.354002925526554, - "learning_rate": 2.903318250278642e-07, - "loss": 0.1611, + "epoch": 1.8695283818130473, + "grad_norm": 4.0220582457978855, + "learning_rate": 6.5680227818737695e-06, + "loss": 0.5096, "step": 13240 }, { - "epoch": 2.7764730551478296, - "grad_norm": 4.872058242263722, - "learning_rate": 2.8979052048105694e-07, - "loss": 0.1889, + "epoch": 1.8696695848630331, + "grad_norm": 3.6519687199144517, + "learning_rate": 6.566590959224109e-06, + "loss": 0.6232, "step": 13241 }, { - "epoch": 2.776682742713357, - "grad_norm": 4.386710451305927, - "learning_rate": 2.8924971359932996e-07, - "loss": 0.142, + "epoch": 1.869810787913019, + "grad_norm": 3.884859702749816, + "learning_rate": 6.5651592163653885e-06, + "loss": 0.5262, "step": 13242 }, { - "epoch": 2.7768924302788847, - "grad_norm": 4.44115690340378, - "learning_rate": 2.887094044103944e-07, - "loss": 0.1553, + "epoch": 1.869951990963005, + "grad_norm": 3.6746511824842494, + "learning_rate": 6.56372755333089e-06, + "loss": 0.5085, "step": 13243 }, { - "epoch": 2.7771021178444117, - "grad_norm": 3.9178146887532592, - "learning_rate": 2.881695929419459e-07, - "loss": 0.142, + "epoch": 1.8700931940129908, + "grad_norm": 4.367210289639769, + "learning_rate": 6.562295970153875e-06, + "loss": 0.6955, "step": 13244 }, { - "epoch": 2.7773118054099393, - "grad_norm": 4.850911207776765, - "learning_rate": 2.876302792216501e-07, - "loss": 0.1315, + "epoch": 1.8702343970629767, + "grad_norm": 2.959460677317954, + "learning_rate": 6.560864466867616e-06, + "loss": 0.4334, "step": 13245 }, { - "epoch": 2.777521492975467, - "grad_norm": 5.007647178907057, - "learning_rate": 2.870914632771482e-07, - "loss": 0.1467, + "epoch": 1.8703756001129626, + "grad_norm": 3.8650677130261313, + "learning_rate": 6.559433043505383e-06, + "loss": 0.6255, "step": 13246 }, { - "epoch": 2.777731180540994, - "grad_norm": 4.294105792587498, - "learning_rate": 2.8655314513605483e-07, - "loss": 0.1604, + "epoch": 1.8705168031629484, + "grad_norm": 3.5870292720167356, + "learning_rate": 6.558001700100441e-06, + "loss": 0.5219, "step": 13247 }, { - "epoch": 2.7779408681065214, - "grad_norm": 4.720307300617483, - "learning_rate": 2.860153248259601e-07, - "loss": 0.1859, + "epoch": 1.8706580062129343, + "grad_norm": 3.779098333116538, + "learning_rate": 6.556570436686052e-06, + "loss": 0.5694, "step": 13248 }, { - "epoch": 2.7781505556720485, - "grad_norm": 4.2996407201859395, - "learning_rate": 2.8547800237442746e-07, - "loss": 0.1193, + "epoch": 1.8707992092629202, + "grad_norm": 4.110056802257806, + "learning_rate": 6.555139253295477e-06, + "loss": 0.6495, "step": 13249 }, { - "epoch": 2.778360243237576, - "grad_norm": 6.042867224906332, - "learning_rate": 2.8494117780899613e-07, - "loss": 0.2001, + "epoch": 1.870940412312906, + "grad_norm": 3.358993328099325, + "learning_rate": 6.553708149961979e-06, + "loss": 0.5509, "step": 13250 }, { - "epoch": 2.778569930803103, - "grad_norm": 4.753267286995288, - "learning_rate": 2.844048511571806e-07, - "loss": 0.1591, + "epoch": 1.871081615362892, + "grad_norm": 3.293742877721644, + "learning_rate": 6.552277126718818e-06, + "loss": 0.4884, "step": 13251 }, { - "epoch": 2.7787796183686306, - "grad_norm": 3.3992199079327023, - "learning_rate": 2.8386902244646573e-07, - "loss": 0.1234, + "epoch": 1.8712228184128779, + "grad_norm": 3.257602986908871, + "learning_rate": 6.550846183599249e-06, + "loss": 0.5314, "step": 13252 }, { - "epoch": 2.778989305934158, - "grad_norm": 3.3785206999929733, - "learning_rate": 2.83333691704315e-07, - "loss": 0.1131, + "epoch": 1.8713640214628637, + "grad_norm": 3.7510117738060713, + "learning_rate": 6.549415320636521e-06, + "loss": 0.5448, "step": 13253 }, { - "epoch": 2.7791989934996852, - "grad_norm": 3.7202110345582877, - "learning_rate": 2.827988589581665e-07, - "loss": 0.1032, + "epoch": 1.8715052245128496, + "grad_norm": 3.3669070967586516, + "learning_rate": 6.5479845378638905e-06, + "loss": 0.4987, "step": 13254 }, { - "epoch": 2.7794086810652128, - "grad_norm": 4.387286658140256, - "learning_rate": 2.822645242354305e-07, - "loss": 0.1224, + "epoch": 1.8716464275628355, + "grad_norm": 4.209737643909711, + "learning_rate": 6.5465538353146106e-06, + "loss": 0.6557, "step": 13255 }, { - "epoch": 2.7796183686307403, - "grad_norm": 4.064823347330441, - "learning_rate": 2.8173068756349065e-07, - "loss": 0.1123, + "epoch": 1.8717876306128214, + "grad_norm": 3.6612954158584663, + "learning_rate": 6.545123213021927e-06, + "loss": 0.4986, "step": 13256 }, { - "epoch": 2.7798280561962674, - "grad_norm": 4.477784656540853, - "learning_rate": 2.811973489697106e-07, - "loss": 0.1306, + "epoch": 1.8719288336628073, + "grad_norm": 2.899452287408762, + "learning_rate": 6.543692671019086e-06, + "loss": 0.4209, "step": 13257 }, { - "epoch": 2.780037743761795, - "grad_norm": 4.993774824728272, - "learning_rate": 2.806645084814208e-07, - "loss": 0.134, + "epoch": 1.8720700367127932, + "grad_norm": 3.7316288813116025, + "learning_rate": 6.542262209339339e-06, + "loss": 0.6227, "step": 13258 }, { - "epoch": 2.7802474313273224, - "grad_norm": 3.463738373333924, - "learning_rate": 2.8013216612593376e-07, - "loss": 0.1061, + "epoch": 1.872211239762779, + "grad_norm": 2.9508623323348724, + "learning_rate": 6.5408318280159225e-06, + "loss": 0.4597, "step": 13259 }, { - "epoch": 2.7804571188928495, - "grad_norm": 3.94592705805319, - "learning_rate": 2.79600321930531e-07, - "loss": 0.1192, + "epoch": 1.872352442812765, + "grad_norm": 3.754647676355563, + "learning_rate": 6.539401527082083e-06, + "loss": 0.6288, "step": 13260 }, { - "epoch": 2.780666806458377, - "grad_norm": 4.547791766194817, - "learning_rate": 2.790689759224696e-07, - "loss": 0.1759, + "epoch": 1.8724936458627508, + "grad_norm": 4.5952899856987015, + "learning_rate": 6.537971306571056e-06, + "loss": 0.723, "step": 13261 }, { - "epoch": 2.7808764940239046, - "grad_norm": 4.670913251031462, - "learning_rate": 2.785381281289845e-07, - "loss": 0.1147, + "epoch": 1.8726348489127365, + "grad_norm": 2.904328001710089, + "learning_rate": 6.536541166516079e-06, + "loss": 0.4138, "step": 13262 }, { - "epoch": 2.7810861815894317, - "grad_norm": 3.587542422623015, - "learning_rate": 2.780077785772828e-07, - "loss": 0.1112, + "epoch": 1.8727760519627223, + "grad_norm": 3.8070860535370077, + "learning_rate": 6.535111106950389e-06, + "loss": 0.5581, "step": 13263 }, { - "epoch": 2.781295869154959, - "grad_norm": 4.4830026088330905, - "learning_rate": 2.774779272945416e-07, - "loss": 0.151, + "epoch": 1.8729172550127082, + "grad_norm": 3.876310629445477, + "learning_rate": 6.533681127907224e-06, + "loss": 0.5434, "step": 13264 }, { - "epoch": 2.7815055567204867, - "grad_norm": 4.199113395056815, - "learning_rate": 2.769485743079203e-07, - "loss": 0.127, + "epoch": 1.873058458062694, + "grad_norm": 3.7364774946944204, + "learning_rate": 6.53225122941981e-06, + "loss": 0.6367, "step": 13265 }, { - "epoch": 2.781715244286014, - "grad_norm": 5.028060822858209, - "learning_rate": 2.764197196445495e-07, - "loss": 0.174, + "epoch": 1.87319966111268, + "grad_norm": 3.323247140488036, + "learning_rate": 6.5308214115213785e-06, + "loss": 0.4401, "step": 13266 }, { - "epoch": 2.7819249318515413, - "grad_norm": 3.791738029566469, - "learning_rate": 2.7589136333153186e-07, - "loss": 0.1046, + "epoch": 1.8733408641626659, + "grad_norm": 3.492196199760023, + "learning_rate": 6.529391674245162e-06, + "loss": 0.5128, "step": 13267 }, { - "epoch": 2.7821346194170684, - "grad_norm": 4.87676423634727, - "learning_rate": 2.7536350539594694e-07, - "loss": 0.1525, + "epoch": 1.8734820672126518, + "grad_norm": 2.9732896247567946, + "learning_rate": 6.527962017624383e-06, + "loss": 0.4394, "step": 13268 }, { - "epoch": 2.782344306982596, - "grad_norm": 3.853860273551908, - "learning_rate": 2.7483614586485075e-07, - "loss": 0.1057, + "epoch": 1.8736232702626376, + "grad_norm": 3.5502940011461677, + "learning_rate": 6.52653244169227e-06, + "loss": 0.5024, "step": 13269 }, { - "epoch": 2.782553994548123, - "grad_norm": 5.380177082396529, - "learning_rate": 2.743092847652673e-07, - "loss": 0.1391, + "epoch": 1.8737644733126235, + "grad_norm": 3.8506897935388227, + "learning_rate": 6.52510294648204e-06, + "loss": 0.5311, "step": 13270 }, { - "epoch": 2.7827636821136505, - "grad_norm": 3.7406023384844285, - "learning_rate": 2.737829221242028e-07, - "loss": 0.1207, + "epoch": 1.8739056763626094, + "grad_norm": 4.151487972040856, + "learning_rate": 6.523673532026916e-06, + "loss": 0.6355, "step": 13271 }, { - "epoch": 2.782973369679178, - "grad_norm": 3.731899972741993, - "learning_rate": 2.7325705796863333e-07, - "loss": 0.1035, + "epoch": 1.8740468794125953, + "grad_norm": 3.2596542079799504, + "learning_rate": 6.522244198360119e-06, + "loss": 0.535, "step": 13272 }, { - "epoch": 2.783183057244705, - "grad_norm": 3.7811428801923723, - "learning_rate": 2.7273169232550743e-07, - "loss": 0.109, + "epoch": 1.8741880824625812, + "grad_norm": 3.091881292247918, + "learning_rate": 6.520814945514866e-06, + "loss": 0.481, "step": 13273 }, { - "epoch": 2.7833927448102327, - "grad_norm": 3.634877454276499, - "learning_rate": 2.7220682522175465e-07, - "loss": 0.1139, + "epoch": 1.874329285512567, + "grad_norm": 3.7037364931546364, + "learning_rate": 6.519385773524371e-06, + "loss": 0.5992, "step": 13274 }, { - "epoch": 2.78360243237576, - "grad_norm": 4.811346723262429, - "learning_rate": 2.7168245668427464e-07, - "loss": 0.1475, + "epoch": 1.874470488562553, + "grad_norm": 3.600225803029264, + "learning_rate": 6.517956682421845e-06, + "loss": 0.5096, "step": 13275 }, { - "epoch": 2.7838121199412873, - "grad_norm": 4.460029592229935, - "learning_rate": 2.711585867399413e-07, - "loss": 0.1412, + "epoch": 1.8746116916125388, + "grad_norm": 2.88656877277685, + "learning_rate": 6.516527672240504e-06, + "loss": 0.4774, "step": 13276 }, { - "epoch": 2.784021807506815, - "grad_norm": 3.7279404908001363, - "learning_rate": 2.7063521541560444e-07, - "loss": 0.1025, + "epoch": 1.8747528946625247, + "grad_norm": 3.7711505685858335, + "learning_rate": 6.515098743013557e-06, + "loss": 0.5997, "step": 13277 }, { - "epoch": 2.7842314950723424, - "grad_norm": 2.6041155861722505, - "learning_rate": 2.7011234273808693e-07, - "loss": 0.0832, + "epoch": 1.8748940977125106, + "grad_norm": 3.3614375607947555, + "learning_rate": 6.513669894774209e-06, + "loss": 0.5629, "step": 13278 }, { - "epoch": 2.7844411826378694, - "grad_norm": 4.1101684924925665, - "learning_rate": 2.695899687341874e-07, - "loss": 0.1454, + "epoch": 1.8750353007624965, + "grad_norm": 3.6821542139177437, + "learning_rate": 6.512241127555665e-06, + "loss": 0.6094, "step": 13279 }, { - "epoch": 2.784650870203397, - "grad_norm": 4.486403356900338, - "learning_rate": 2.6906809343067997e-07, - "loss": 0.1305, + "epoch": 1.8751765038124824, + "grad_norm": 3.6927919264653943, + "learning_rate": 6.510812441391131e-06, + "loss": 0.5035, "step": 13280 }, { - "epoch": 2.7848605577689245, - "grad_norm": 4.548052337126846, - "learning_rate": 2.6854671685430875e-07, - "loss": 0.1334, + "epoch": 1.8753177068624682, + "grad_norm": 3.9845129731177553, + "learning_rate": 6.509383836313811e-06, + "loss": 0.6768, "step": 13281 }, { - "epoch": 2.7850702453344516, - "grad_norm": 4.348308977147785, - "learning_rate": 2.680258390317969e-07, - "loss": 0.1302, + "epoch": 1.8754589099124541, + "grad_norm": 4.602094716953846, + "learning_rate": 6.507955312356903e-06, + "loss": 0.5424, "step": 13282 }, { - "epoch": 2.785279932899979, - "grad_norm": 3.812327835913294, - "learning_rate": 2.675054599898408e-07, - "loss": 0.1208, + "epoch": 1.87560011296244, + "grad_norm": 3.5802608856595484, + "learning_rate": 6.506526869553602e-06, + "loss": 0.5582, "step": 13283 }, { - "epoch": 2.7854896204655066, - "grad_norm": 3.7762053949250074, - "learning_rate": 2.6698557975511017e-07, - "loss": 0.0939, + "epoch": 1.8757413160124259, + "grad_norm": 3.4384454898394945, + "learning_rate": 6.505098507937111e-06, + "loss": 0.4913, "step": 13284 }, { - "epoch": 2.7856993080310337, - "grad_norm": 4.065729623158698, - "learning_rate": 2.6646619835424824e-07, - "loss": 0.1398, + "epoch": 1.8758825190624118, + "grad_norm": 4.651638684391711, + "learning_rate": 6.503670227540624e-06, + "loss": 0.5908, "step": 13285 }, { - "epoch": 2.7859089955965612, - "grad_norm": 3.97148992639932, - "learning_rate": 2.659473158138759e-07, - "loss": 0.1337, + "epoch": 1.8760237221123977, + "grad_norm": 5.819995341733787, + "learning_rate": 6.502242028397328e-06, + "loss": 0.5318, "step": 13286 }, { - "epoch": 2.7861186831620883, - "grad_norm": 3.8726561537283373, - "learning_rate": 2.654289321605863e-07, - "loss": 0.0981, + "epoch": 1.8761649251623835, + "grad_norm": 3.0690932348209725, + "learning_rate": 6.5008139105404175e-06, + "loss": 0.4236, "step": 13287 }, { - "epoch": 2.786328370727616, - "grad_norm": 4.023019213367914, - "learning_rate": 2.649110474209493e-07, - "loss": 0.1509, + "epoch": 1.8763061282123694, + "grad_norm": 3.7708567802896984, + "learning_rate": 6.499385874003077e-06, + "loss": 0.4597, "step": 13288 }, { - "epoch": 2.7865380582931434, - "grad_norm": 3.8401844888756225, - "learning_rate": 2.643936616215037e-07, - "loss": 0.1319, + "epoch": 1.8764473312623553, + "grad_norm": 3.9593706697466247, + "learning_rate": 6.4979579188185e-06, + "loss": 0.6371, "step": 13289 }, { - "epoch": 2.7867477458586705, - "grad_norm": 3.837441245795726, - "learning_rate": 2.638767747887705e-07, - "loss": 0.1347, + "epoch": 1.8765885343123412, + "grad_norm": 3.5416365358914508, + "learning_rate": 6.496530045019869e-06, + "loss": 0.5762, "step": 13290 }, { - "epoch": 2.786957433424198, - "grad_norm": 5.22801696706483, - "learning_rate": 2.633603869492374e-07, - "loss": 0.1563, + "epoch": 1.876729737362327, + "grad_norm": 4.206089926380315, + "learning_rate": 6.495102252640366e-06, + "loss": 0.7409, "step": 13291 }, { - "epoch": 2.787167120989725, - "grad_norm": 5.577282455268896, - "learning_rate": 2.6284449812937317e-07, - "loss": 0.1395, + "epoch": 1.876870940412313, + "grad_norm": 3.117457412710474, + "learning_rate": 6.493674541713172e-06, + "loss": 0.4055, "step": 13292 }, { - "epoch": 2.7873768085552526, - "grad_norm": 4.596783573410365, - "learning_rate": 2.6232910835561565e-07, - "loss": 0.1156, + "epoch": 1.8770121434622988, + "grad_norm": 4.67473719947955, + "learning_rate": 6.492246912271468e-06, + "loss": 0.6948, "step": 13293 }, { - "epoch": 2.78758649612078, - "grad_norm": 4.47478025260815, - "learning_rate": 2.618142176543803e-07, - "loss": 0.1439, + "epoch": 1.8771533465122847, + "grad_norm": 3.1219221800702033, + "learning_rate": 6.490819364348434e-06, + "loss": 0.5519, "step": 13294 }, { - "epoch": 2.787796183686307, - "grad_norm": 3.825297221115946, - "learning_rate": 2.6129982605205605e-07, - "loss": 0.1649, + "epoch": 1.8772945495622706, + "grad_norm": 3.522146490098211, + "learning_rate": 6.4893918979772395e-06, + "loss": 0.5258, "step": 13295 }, { - "epoch": 2.7880058712518347, - "grad_norm": 4.996769482937837, - "learning_rate": 2.607859335750085e-07, - "loss": 0.1374, + "epoch": 1.8774357526122563, + "grad_norm": 3.5681076743752547, + "learning_rate": 6.4879645131910626e-06, + "loss": 0.5677, "step": 13296 }, { - "epoch": 2.7882155588173623, - "grad_norm": 4.379278775838625, - "learning_rate": 2.6027254024957205e-07, - "loss": 0.1055, + "epoch": 1.8775769556622421, + "grad_norm": 3.1061815444201417, + "learning_rate": 6.4865372100230695e-06, + "loss": 0.4457, "step": 13297 }, { - "epoch": 2.7884252463828894, - "grad_norm": 5.090274587988631, - "learning_rate": 2.5975964610206125e-07, - "loss": 0.1799, + "epoch": 1.877718158712228, + "grad_norm": 3.9056517850329597, + "learning_rate": 6.485109988506439e-06, + "loss": 0.5507, "step": 13298 }, { - "epoch": 2.788634933948417, - "grad_norm": 3.465402424116034, - "learning_rate": 2.592472511587607e-07, - "loss": 0.0833, + "epoch": 1.877859361762214, + "grad_norm": 4.065038840040629, + "learning_rate": 6.483682848674332e-06, + "loss": 0.5615, "step": 13299 }, { - "epoch": 2.7888446215139444, - "grad_norm": 4.514405846601945, - "learning_rate": 2.58735355445936e-07, - "loss": 0.151, + "epoch": 1.8780005648121998, + "grad_norm": 3.243149879002145, + "learning_rate": 6.4822557905599156e-06, + "loss": 0.4422, "step": 13300 }, { - "epoch": 2.7890543090794715, - "grad_norm": 3.683925046413724, - "learning_rate": 2.5822395898981724e-07, - "loss": 0.1417, + "epoch": 1.8781417678621857, + "grad_norm": 4.13133332778356, + "learning_rate": 6.480828814196358e-06, + "loss": 0.7124, "step": 13301 }, { - "epoch": 2.789263996644999, - "grad_norm": 4.396313897432646, - "learning_rate": 2.5771306181661794e-07, - "loss": 0.1254, + "epoch": 1.8782829709121716, + "grad_norm": 3.4284311096482236, + "learning_rate": 6.479401919616821e-06, + "loss": 0.414, "step": 13302 }, { - "epoch": 2.7894736842105265, - "grad_norm": 3.449012913831787, - "learning_rate": 2.5720266395252044e-07, - "loss": 0.104, + "epoch": 1.8784241739621574, + "grad_norm": 3.12588485211655, + "learning_rate": 6.477975106854462e-06, + "loss": 0.4449, "step": 13303 }, { - "epoch": 2.7896833717760536, - "grad_norm": 4.105720631955689, - "learning_rate": 2.566927654236862e-07, - "loss": 0.1275, + "epoch": 1.8785653770121433, + "grad_norm": 3.7166749320989387, + "learning_rate": 6.476548375942437e-06, + "loss": 0.58, "step": 13304 }, { - "epoch": 2.789893059341581, - "grad_norm": 3.531049041639615, - "learning_rate": 2.561833662562452e-07, - "loss": 0.1012, + "epoch": 1.8787065800621292, + "grad_norm": 3.582069565829473, + "learning_rate": 6.475121726913909e-06, + "loss": 0.5004, "step": 13305 }, { - "epoch": 2.7901027469071082, - "grad_norm": 3.0278918375427497, - "learning_rate": 2.5567446647630665e-07, - "loss": 0.1038, + "epoch": 1.878847783112115, + "grad_norm": 3.6182775928725017, + "learning_rate": 6.473695159802027e-06, + "loss": 0.586, "step": 13306 }, { - "epoch": 2.7903124344726358, - "grad_norm": 4.244885059852428, - "learning_rate": 2.5516606610995197e-07, - "loss": 0.1552, + "epoch": 1.878988986162101, + "grad_norm": 3.482386547096671, + "learning_rate": 6.472268674639949e-06, + "loss": 0.4719, "step": 13307 }, { - "epoch": 2.7905221220381633, - "grad_norm": 4.26462786422547, - "learning_rate": 2.5465816518323916e-07, - "loss": 0.1255, + "epoch": 1.8791301892120869, + "grad_norm": 3.617250072196595, + "learning_rate": 6.470842271460823e-06, + "loss": 0.5609, "step": 13308 }, { - "epoch": 2.7907318096036904, - "grad_norm": 3.901507664181694, - "learning_rate": 2.541507637221974e-07, - "loss": 0.1547, + "epoch": 1.8792713922620727, + "grad_norm": 4.055456132111511, + "learning_rate": 6.469415950297797e-06, + "loss": 0.6994, "step": 13309 }, { - "epoch": 2.790941497169218, - "grad_norm": 3.7429994203070915, - "learning_rate": 2.536438617528314e-07, - "loss": 0.1356, + "epoch": 1.8794125953120586, + "grad_norm": 3.215078435312352, + "learning_rate": 6.467989711184021e-06, + "loss": 0.4796, "step": 13310 }, { - "epoch": 2.791151184734745, - "grad_norm": 5.248432174028611, - "learning_rate": 2.5313745930112153e-07, - "loss": 0.1585, + "epoch": 1.8795537983620445, + "grad_norm": 3.4871408840878835, + "learning_rate": 6.46656355415264e-06, + "loss": 0.5348, "step": 13311 }, { - "epoch": 2.7913608723002725, - "grad_norm": 3.638367645037781, - "learning_rate": 2.5263155639302375e-07, - "loss": 0.1187, + "epoch": 1.8796950014120304, + "grad_norm": 3.2392538721266284, + "learning_rate": 6.465137479236796e-06, + "loss": 0.5251, "step": 13312 }, { - "epoch": 2.7915705598658, - "grad_norm": 4.016157366733858, - "learning_rate": 2.5212615305446384e-07, - "loss": 0.1479, + "epoch": 1.8798362044620163, + "grad_norm": 3.2666248070982693, + "learning_rate": 6.463711486469629e-06, + "loss": 0.4967, "step": 13313 }, { - "epoch": 2.791780247431327, - "grad_norm": 3.2226092877723107, - "learning_rate": 2.516212493113446e-07, - "loss": 0.1437, + "epoch": 1.8799774075120022, + "grad_norm": 3.5462969166958964, + "learning_rate": 6.4622855758842785e-06, + "loss": 0.665, "step": 13314 }, { - "epoch": 2.7919899349968547, - "grad_norm": 3.6950571738641846, - "learning_rate": 2.5111684518954515e-07, - "loss": 0.1085, + "epoch": 1.880118610561988, + "grad_norm": 3.236249053090492, + "learning_rate": 6.460859747513885e-06, + "loss": 0.4847, "step": 13315 }, { - "epoch": 2.792199622562382, - "grad_norm": 4.5492402780003145, - "learning_rate": 2.5061294071491493e-07, - "loss": 0.1115, + "epoch": 1.880259813611974, + "grad_norm": 3.7078834410358135, + "learning_rate": 6.459434001391582e-06, + "loss": 0.5004, "step": 13316 }, { - "epoch": 2.7924093101279093, - "grad_norm": 4.140942686642433, - "learning_rate": 2.501095359132821e-07, - "loss": 0.121, + "epoch": 1.8804010166619598, + "grad_norm": 3.7332869687965076, + "learning_rate": 6.458008337550505e-06, + "loss": 0.5764, "step": 13317 }, { - "epoch": 2.792618997693437, - "grad_norm": 4.391077385069247, - "learning_rate": 2.49606630810445e-07, - "loss": 0.1496, + "epoch": 1.8805422197119457, + "grad_norm": 3.183721214353656, + "learning_rate": 6.456582756023781e-06, + "loss": 0.4706, "step": 13318 }, { - "epoch": 2.7928286852589643, - "grad_norm": 4.174312966372431, - "learning_rate": 2.491042254321796e-07, - "loss": 0.1259, + "epoch": 1.8806834227619316, + "grad_norm": 3.216045639309333, + "learning_rate": 6.455157256844549e-06, + "loss": 0.5382, "step": 13319 }, { - "epoch": 2.7930383728244914, - "grad_norm": 4.698581190928134, - "learning_rate": 2.486023198042342e-07, - "loss": 0.1673, + "epoch": 1.8808246258119174, + "grad_norm": 3.0262829775980475, + "learning_rate": 6.4537318400459295e-06, + "loss": 0.4435, "step": 13320 }, { - "epoch": 2.793248060390019, - "grad_norm": 3.865628368547563, - "learning_rate": 2.4810091395233384e-07, - "loss": 0.1602, + "epoch": 1.8809658288619033, + "grad_norm": 3.601663123701112, + "learning_rate": 6.45230650566105e-06, + "loss": 0.5351, "step": 13321 }, { - "epoch": 2.7934577479555465, - "grad_norm": 5.551023702138432, - "learning_rate": 2.476000079021745e-07, - "loss": 0.1346, + "epoch": 1.8811070319118892, + "grad_norm": 3.536031239045356, + "learning_rate": 6.450881253723035e-06, + "loss": 0.5229, "step": 13322 }, { - "epoch": 2.7936674355210735, - "grad_norm": 4.238893734277088, - "learning_rate": 2.470996016794291e-07, - "loss": 0.1393, + "epoch": 1.881248234961875, + "grad_norm": 3.8568867580984763, + "learning_rate": 6.449456084265005e-06, + "loss": 0.4946, "step": 13323 }, { - "epoch": 2.793877123086601, - "grad_norm": 3.636960002009199, - "learning_rate": 2.4659969530974603e-07, - "loss": 0.1055, + "epoch": 1.881389438011861, + "grad_norm": 4.076750035137307, + "learning_rate": 6.448030997320084e-06, + "loss": 0.5562, "step": 13324 }, { - "epoch": 2.7940868106521286, - "grad_norm": 3.6920532101143513, - "learning_rate": 2.461002888187447e-07, - "loss": 0.1378, + "epoch": 1.8815306410618469, + "grad_norm": 3.3246425770555765, + "learning_rate": 6.446605992921389e-06, + "loss": 0.5226, "step": 13325 }, { - "epoch": 2.7942964982176557, - "grad_norm": 3.347715153476394, - "learning_rate": 2.4560138223202023e-07, - "loss": 0.11, + "epoch": 1.8816718441118327, + "grad_norm": 3.3441983599220455, + "learning_rate": 6.445181071102034e-06, + "loss": 0.4858, "step": 13326 }, { - "epoch": 2.794506185783183, - "grad_norm": 5.117964436612665, - "learning_rate": 2.451029755751444e-07, - "loss": 0.17, + "epoch": 1.8818130471618186, + "grad_norm": 4.107283550169291, + "learning_rate": 6.443756231895138e-06, + "loss": 0.6832, "step": 13327 }, { - "epoch": 2.7947158733487103, - "grad_norm": 3.8774752157245924, - "learning_rate": 2.4460506887365786e-07, - "loss": 0.1284, + "epoch": 1.8819542502118045, + "grad_norm": 3.783570269032791, + "learning_rate": 6.4423314753338115e-06, + "loss": 0.6264, "step": 13328 }, { - "epoch": 2.794925560914238, - "grad_norm": 5.531526208772738, - "learning_rate": 2.4410766215308355e-07, - "loss": 0.1557, + "epoch": 1.8820954532617904, + "grad_norm": 3.2532549676951543, + "learning_rate": 6.440906801451165e-06, + "loss": 0.5022, "step": 13329 }, { - "epoch": 2.795135248479765, - "grad_norm": 4.775971273409095, - "learning_rate": 2.436107554389111e-07, - "loss": 0.14, + "epoch": 1.8822366563117763, + "grad_norm": 3.3893259298562186, + "learning_rate": 6.439482210280308e-06, + "loss": 0.4313, "step": 13330 }, { - "epoch": 2.7953449360452924, - "grad_norm": 3.501103370556629, - "learning_rate": 2.43114348756609e-07, - "loss": 0.1256, + "epoch": 1.8823778593617622, + "grad_norm": 3.124290506088274, + "learning_rate": 6.438057701854344e-06, + "loss": 0.4209, "step": 13331 }, { - "epoch": 2.79555462361082, - "grad_norm": 3.6012625215727354, - "learning_rate": 2.426184421316191e-07, - "loss": 0.1224, + "epoch": 1.882519062411748, + "grad_norm": 4.2072614425576775, + "learning_rate": 6.436633276206381e-06, + "loss": 0.6561, "step": 13332 }, { - "epoch": 2.795764311176347, - "grad_norm": 3.022861407191782, - "learning_rate": 2.421230355893578e-07, - "loss": 0.0864, + "epoch": 1.882660265461734, + "grad_norm": 4.634076981016983, + "learning_rate": 6.435208933369523e-06, + "loss": 0.689, "step": 13333 }, { - "epoch": 2.7959739987418746, - "grad_norm": 4.342200000330737, - "learning_rate": 2.4162812915521473e-07, - "loss": 0.1639, + "epoch": 1.8828014685117198, + "grad_norm": 3.31809737891892, + "learning_rate": 6.43378467337687e-06, + "loss": 0.4844, "step": 13334 }, { - "epoch": 2.796183686307402, - "grad_norm": 3.8348169688319476, - "learning_rate": 2.4113372285455517e-07, - "loss": 0.13, + "epoch": 1.8829426715617057, + "grad_norm": 2.766301111634548, + "learning_rate": 6.432360496261521e-06, + "loss": 0.429, "step": 13335 }, { - "epoch": 2.796393373872929, - "grad_norm": 4.248904874029018, - "learning_rate": 2.4063981671271886e-07, - "loss": 0.139, + "epoch": 1.8830838746116916, + "grad_norm": 3.9527582020374945, + "learning_rate": 6.430936402056577e-06, + "loss": 0.598, "step": 13336 }, { - "epoch": 2.7966030614384567, - "grad_norm": 5.337087106996787, - "learning_rate": 2.401464107550167e-07, - "loss": 0.1676, + "epoch": 1.8832250776616775, + "grad_norm": 4.090213962614411, + "learning_rate": 6.429512390795124e-06, + "loss": 0.6648, "step": 13337 }, { - "epoch": 2.7968127490039842, - "grad_norm": 3.9329469502031547, - "learning_rate": 2.3965350500674054e-07, - "loss": 0.1134, + "epoch": 1.8833662807116633, + "grad_norm": 3.572855283539374, + "learning_rate": 6.428088462510262e-06, + "loss": 0.6352, "step": 13338 }, { - "epoch": 2.7970224365695113, - "grad_norm": 4.717925912477342, - "learning_rate": 2.3916109949315036e-07, - "loss": 0.139, + "epoch": 1.8835074837616492, + "grad_norm": 3.8264977623464795, + "learning_rate": 6.4266646172350835e-06, + "loss": 0.697, "step": 13339 }, { - "epoch": 2.797232124135039, - "grad_norm": 3.758103720231858, - "learning_rate": 2.386691942394825e-07, - "loss": 0.1475, + "epoch": 1.883648686811635, + "grad_norm": 3.535553019708483, + "learning_rate": 6.425240855002674e-06, + "loss": 0.4849, "step": 13340 }, { - "epoch": 2.7974418117005664, - "grad_norm": 4.221343060797928, - "learning_rate": 2.3817778927094914e-07, - "loss": 0.1142, + "epoch": 1.883789889861621, + "grad_norm": 3.6965797164142, + "learning_rate": 6.4238171758461245e-06, + "loss": 0.5482, "step": 13341 }, { - "epoch": 2.7976514992660935, - "grad_norm": 5.398830119028365, - "learning_rate": 2.376868846127356e-07, - "loss": 0.1739, + "epoch": 1.8839310929116069, + "grad_norm": 3.9424184405050244, + "learning_rate": 6.422393579798519e-06, + "loss": 0.574, "step": 13342 }, { - "epoch": 2.797861186831621, - "grad_norm": 3.816363564538564, - "learning_rate": 2.3719648029000065e-07, - "loss": 0.1201, + "epoch": 1.8840722959615928, + "grad_norm": 3.4856252672897297, + "learning_rate": 6.42097006689294e-06, + "loss": 0.5414, "step": 13343 }, { - "epoch": 2.7980708743971485, - "grad_norm": 4.510563421124062, - "learning_rate": 2.3670657632787753e-07, - "loss": 0.1547, + "epoch": 1.8842134990115786, + "grad_norm": 4.10547619611018, + "learning_rate": 6.4195466371624726e-06, + "loss": 0.6549, "step": 13344 }, { - "epoch": 2.7982805619626756, - "grad_norm": 4.6583565798660596, - "learning_rate": 2.3621717275147727e-07, - "loss": 0.1657, + "epoch": 1.8843547020615645, + "grad_norm": 3.5693921668555277, + "learning_rate": 6.418123290640199e-06, + "loss": 0.4831, "step": 13345 }, { - "epoch": 2.798490249528203, - "grad_norm": 4.078213667682109, - "learning_rate": 2.3572826958588202e-07, - "loss": 0.1321, + "epoch": 1.8844959051115504, + "grad_norm": 3.3193630140798693, + "learning_rate": 6.416700027359189e-06, + "loss": 0.4772, "step": 13346 }, { - "epoch": 2.79869993709373, - "grad_norm": 4.088288727620804, - "learning_rate": 2.352398668561462e-07, - "loss": 0.1069, + "epoch": 1.8846371081615363, + "grad_norm": 4.166108686257722, + "learning_rate": 6.415276847352524e-06, + "loss": 0.6904, "step": 13347 }, { - "epoch": 2.7989096246592577, - "grad_norm": 4.5720136626806065, - "learning_rate": 2.3475196458730531e-07, - "loss": 0.1516, + "epoch": 1.8847783112115222, + "grad_norm": 4.49900031110109, + "learning_rate": 6.413853750653274e-06, + "loss": 0.5952, "step": 13348 }, { - "epoch": 2.799119312224785, - "grad_norm": 4.707557919328625, - "learning_rate": 2.342645628043616e-07, - "loss": 0.117, + "epoch": 1.884919514261508, + "grad_norm": 3.64061251058164, + "learning_rate": 6.412430737294517e-06, + "loss": 0.5355, "step": 13349 }, { - "epoch": 2.7993289997903124, - "grad_norm": 4.166837922515698, - "learning_rate": 2.337776615322984e-07, - "loss": 0.1267, + "epoch": 1.885060717311494, + "grad_norm": 3.1493922113009307, + "learning_rate": 6.41100780730932e-06, + "loss": 0.4354, "step": 13350 }, { - "epoch": 2.79953868735584, - "grad_norm": 3.985888703049242, - "learning_rate": 2.3329126079606802e-07, - "loss": 0.097, + "epoch": 1.8852019203614798, + "grad_norm": 3.874617622771981, + "learning_rate": 6.4095849607307515e-06, + "loss": 0.6274, "step": 13351 }, { - "epoch": 2.799748374921367, - "grad_norm": 4.8595647552521015, - "learning_rate": 2.3280536062059932e-07, - "loss": 0.118, + "epoch": 1.8853431234114657, + "grad_norm": 4.212925058159435, + "learning_rate": 6.408162197591876e-06, + "loss": 0.6312, "step": 13352 }, { - "epoch": 2.7999580624868945, - "grad_norm": 3.822679603469462, - "learning_rate": 2.3231996103079578e-07, - "loss": 0.1343, + "epoch": 1.8854843264614516, + "grad_norm": 3.447109987579968, + "learning_rate": 6.4067395179257664e-06, + "loss": 0.5484, "step": 13353 }, { - "epoch": 2.800167750052422, - "grad_norm": 4.325511570692241, - "learning_rate": 2.318350620515375e-07, - "loss": 0.1219, + "epoch": 1.8856255295114375, + "grad_norm": 4.268878414795226, + "learning_rate": 6.405316921765472e-06, + "loss": 0.5823, "step": 13354 }, { - "epoch": 2.800377437617949, - "grad_norm": 4.187537200583681, - "learning_rate": 2.3135066370767346e-07, - "loss": 0.1268, + "epoch": 1.8857667325614234, + "grad_norm": 3.0567200743393896, + "learning_rate": 6.403894409144063e-06, + "loss": 0.389, "step": 13355 }, { - "epoch": 2.8005871251834766, - "grad_norm": 3.423713938821339, - "learning_rate": 2.3086676602403045e-07, - "loss": 0.1202, + "epoch": 1.8859079356114092, + "grad_norm": 2.9440503096902924, + "learning_rate": 6.402471980094594e-06, + "loss": 0.4489, "step": 13356 }, { - "epoch": 2.800796812749004, - "grad_norm": 4.178897016689882, - "learning_rate": 2.3038336902540982e-07, - "loss": 0.128, + "epoch": 1.8860491386613951, + "grad_norm": 3.612524873968128, + "learning_rate": 6.401049634650119e-06, + "loss": 0.6261, "step": 13357 }, { - "epoch": 2.8010065003145312, - "grad_norm": 5.440551312740216, - "learning_rate": 2.29900472736585e-07, - "loss": 0.1098, + "epoch": 1.886190341711381, + "grad_norm": 3.566531938413125, + "learning_rate": 6.399627372843699e-06, + "loss": 0.5638, "step": 13358 }, { - "epoch": 2.8012161878800588, - "grad_norm": 4.876131371008478, - "learning_rate": 2.2941807718230736e-07, - "loss": 0.1226, + "epoch": 1.8863315447613669, + "grad_norm": 3.7667542635566487, + "learning_rate": 6.398205194708385e-06, + "loss": 0.5159, "step": 13359 }, { - "epoch": 2.8014258754455863, - "grad_norm": 3.391857696486738, - "learning_rate": 2.289361823872982e-07, - "loss": 0.12, + "epoch": 1.8864727478113528, + "grad_norm": 2.8115233745554233, + "learning_rate": 6.396783100277224e-06, + "loss": 0.4389, "step": 13360 }, { - "epoch": 2.8016355630111134, - "grad_norm": 3.9792692125491143, - "learning_rate": 2.284547883762589e-07, - "loss": 0.1006, + "epoch": 1.8866139508613387, + "grad_norm": 3.4145266613884444, + "learning_rate": 6.395361089583271e-06, + "loss": 0.5224, "step": 13361 }, { - "epoch": 2.801845250576641, - "grad_norm": 4.641618454122115, - "learning_rate": 2.2797389517385858e-07, - "loss": 0.2001, + "epoch": 1.8867551539113245, + "grad_norm": 4.156954556308672, + "learning_rate": 6.3939391626595704e-06, + "loss": 0.6595, "step": 13362 }, { - "epoch": 2.8020549381421684, - "grad_norm": 3.4121088944780333, - "learning_rate": 2.2749350280474425e-07, - "loss": 0.1185, + "epoch": 1.8868963569613104, + "grad_norm": 4.212161359871705, + "learning_rate": 6.3925173195391645e-06, + "loss": 0.5791, "step": 13363 }, { - "epoch": 2.8022646257076955, - "grad_norm": 4.710892306019308, - "learning_rate": 2.2701361129353838e-07, - "loss": 0.1793, + "epoch": 1.8870375600112963, + "grad_norm": 5.311981936263858, + "learning_rate": 6.391095560255098e-06, + "loss": 0.6401, "step": 13364 }, { - "epoch": 2.802474313273223, - "grad_norm": 3.083631263512698, - "learning_rate": 2.2653422066483355e-07, - "loss": 0.0793, + "epoch": 1.8871787630612822, + "grad_norm": 4.110452448564788, + "learning_rate": 6.389673884840413e-06, + "loss": 0.5758, "step": 13365 }, { - "epoch": 2.80268400083875, - "grad_norm": 5.497128376372013, - "learning_rate": 2.2605533094320343e-07, - "loss": 0.1497, + "epoch": 1.887319966111268, + "grad_norm": 4.969491231149269, + "learning_rate": 6.388252293328143e-06, + "loss": 0.7883, "step": 13366 }, { - "epoch": 2.8028936884042777, - "grad_norm": 4.879799166224526, - "learning_rate": 2.2557694215318838e-07, - "loss": 0.1598, + "epoch": 1.887461169161254, + "grad_norm": 4.534689742710986, + "learning_rate": 6.386830785751335e-06, + "loss": 0.6616, "step": 13367 }, { - "epoch": 2.8031033759698047, - "grad_norm": 5.877407969954646, - "learning_rate": 2.2509905431930768e-07, - "loss": 0.1611, + "epoch": 1.8876023722112398, + "grad_norm": 3.3363685844751396, + "learning_rate": 6.385409362143018e-06, + "loss": 0.5508, "step": 13368 }, { - "epoch": 2.8033130635353323, - "grad_norm": 3.599705482147894, - "learning_rate": 2.2462166746605396e-07, - "loss": 0.1113, + "epoch": 1.8877435752612257, + "grad_norm": 3.364114353876827, + "learning_rate": 6.383988022536225e-06, + "loss": 0.4853, "step": 13369 }, { - "epoch": 2.80352275110086, - "grad_norm": 4.146496729286574, - "learning_rate": 2.2414478161789545e-07, - "loss": 0.1182, + "epoch": 1.8878847783112116, + "grad_norm": 4.105589005973737, + "learning_rate": 6.382566766963993e-06, + "loss": 0.6143, "step": 13370 }, { - "epoch": 2.803732438666387, - "grad_norm": 4.385728670934805, - "learning_rate": 2.2366839679927254e-07, - "loss": 0.1419, + "epoch": 1.8880259813611975, + "grad_norm": 3.4508133735906217, + "learning_rate": 6.381145595459343e-06, + "loss": 0.5599, "step": 13371 }, { - "epoch": 2.8039421262319144, - "grad_norm": 4.956617048613931, - "learning_rate": 2.231925130345991e-07, - "loss": 0.1748, + "epoch": 1.8881671844111834, + "grad_norm": 3.9198584550309867, + "learning_rate": 6.3797245080553075e-06, + "loss": 0.6371, "step": 13372 }, { - "epoch": 2.804151813797442, - "grad_norm": 5.182060125863725, - "learning_rate": 2.227171303482678e-07, - "loss": 0.1413, + "epoch": 1.8883083874611692, + "grad_norm": 3.653590502397578, + "learning_rate": 6.37830350478491e-06, + "loss": 0.5454, "step": 13373 }, { - "epoch": 2.804361501362969, - "grad_norm": 3.987481742093236, - "learning_rate": 2.222422487646403e-07, - "loss": 0.1471, + "epoch": 1.8884495905111551, + "grad_norm": 3.3059162575717247, + "learning_rate": 6.376882585681174e-06, + "loss": 0.5282, "step": 13374 }, { - "epoch": 2.8045711889284966, - "grad_norm": 4.643603968233447, - "learning_rate": 2.2176786830805596e-07, - "loss": 0.1566, + "epoch": 1.888590793561141, + "grad_norm": 3.77895189424575, + "learning_rate": 6.375461750777125e-06, + "loss": 0.5425, "step": 13375 }, { - "epoch": 2.804780876494024, - "grad_norm": 3.9919272863346658, - "learning_rate": 2.2129398900282984e-07, - "loss": 0.1261, + "epoch": 1.888731996611127, + "grad_norm": 3.534869872056998, + "learning_rate": 6.374041000105779e-06, + "loss": 0.5993, "step": 13376 }, { - "epoch": 2.804990564059551, - "grad_norm": 3.5639047839085705, - "learning_rate": 2.2082061087324468e-07, - "loss": 0.1345, + "epoch": 1.8888731996611128, + "grad_norm": 3.7090006503191604, + "learning_rate": 6.372620333700155e-06, + "loss": 0.6007, "step": 13377 }, { - "epoch": 2.8052002516250787, - "grad_norm": 4.925106947407544, - "learning_rate": 2.2034773394356447e-07, - "loss": 0.1646, + "epoch": 1.8890144027110987, + "grad_norm": 4.467466439209398, + "learning_rate": 6.371199751593264e-06, + "loss": 0.6743, "step": 13378 }, { - "epoch": 2.805409939190606, - "grad_norm": 4.197184426324162, - "learning_rate": 2.1987535823802754e-07, - "loss": 0.1185, + "epoch": 1.8891556057610845, + "grad_norm": 3.361240355155478, + "learning_rate": 6.369779253818129e-06, + "loss": 0.5003, "step": 13379 }, { - "epoch": 2.8056196267561333, - "grad_norm": 4.343692399447504, - "learning_rate": 2.1940348378083896e-07, - "loss": 0.1401, + "epoch": 1.8892968088110704, + "grad_norm": 3.3261773652558357, + "learning_rate": 6.368358840407754e-06, + "loss": 0.4674, "step": 13380 }, { - "epoch": 2.805829314321661, - "grad_norm": 3.23282190610629, - "learning_rate": 2.1893211059618503e-07, - "loss": 0.1018, + "epoch": 1.8894380118610563, + "grad_norm": 3.3375334961220204, + "learning_rate": 6.366938511395151e-06, + "loss": 0.4981, "step": 13381 }, { - "epoch": 2.8060390018871884, - "grad_norm": 4.045202791922959, - "learning_rate": 2.1846123870822633e-07, - "loss": 0.1276, + "epoch": 1.8895792149110422, + "grad_norm": 3.261397900985418, + "learning_rate": 6.365518266813327e-06, + "loss": 0.5072, "step": 13382 }, { - "epoch": 2.8062486894527154, - "grad_norm": 4.788440782226839, - "learning_rate": 2.179908681410936e-07, - "loss": 0.1395, + "epoch": 1.889720417961028, + "grad_norm": 3.107114023979929, + "learning_rate": 6.364098106695289e-06, + "loss": 0.486, "step": 13383 }, { - "epoch": 2.806458377018243, - "grad_norm": 3.3259733894606747, - "learning_rate": 2.1752099891889533e-07, - "loss": 0.1045, + "epoch": 1.889861621011014, + "grad_norm": 3.745012110077049, + "learning_rate": 6.362678031074041e-06, + "loss": 0.6393, "step": 13384 }, { - "epoch": 2.80666806458377, - "grad_norm": 4.871621131166538, - "learning_rate": 2.1705163106571225e-07, - "loss": 0.1445, + "epoch": 1.8900028240609998, + "grad_norm": 3.041161686856863, + "learning_rate": 6.361258039982585e-06, + "loss": 0.5068, "step": 13385 }, { - "epoch": 2.8068777521492976, - "grad_norm": 4.006487365183586, - "learning_rate": 2.1658276460560068e-07, - "loss": 0.1418, + "epoch": 1.8901440271109857, + "grad_norm": 3.5355806860337577, + "learning_rate": 6.35983813345392e-06, + "loss": 0.5554, "step": 13386 }, { - "epoch": 2.8070874397148247, - "grad_norm": 4.344836029063606, - "learning_rate": 2.1611439956259139e-07, - "loss": 0.1303, + "epoch": 1.8902852301609716, + "grad_norm": 3.5259525200471895, + "learning_rate": 6.358418311521048e-06, + "loss": 0.5319, "step": 13387 }, { - "epoch": 2.807297127280352, - "grad_norm": 3.511000874662097, - "learning_rate": 2.1564653596068852e-07, - "loss": 0.1036, + "epoch": 1.8904264332109575, + "grad_norm": 3.5140474123647887, + "learning_rate": 6.3569985742169594e-06, + "loss": 0.6568, "step": 13388 }, { - "epoch": 2.8075068148458797, - "grad_norm": 4.093945265846804, - "learning_rate": 2.1517917382386955e-07, - "loss": 0.1181, + "epoch": 1.8905676362609434, + "grad_norm": 3.4902702535237795, + "learning_rate": 6.35557892157465e-06, + "loss": 0.5388, "step": 13389 }, { - "epoch": 2.807716502411407, - "grad_norm": 5.547162431251077, - "learning_rate": 2.1471231317608975e-07, - "loss": 0.1618, + "epoch": 1.8907088393109293, + "grad_norm": 3.771715764451454, + "learning_rate": 6.354159353627114e-06, + "loss": 0.5914, "step": 13390 }, { - "epoch": 2.8079261899769343, - "grad_norm": 5.366522434937, - "learning_rate": 2.1424595404127556e-07, - "loss": 0.1507, + "epoch": 1.8908500423609151, + "grad_norm": 3.8114287716261535, + "learning_rate": 6.352739870407336e-06, + "loss": 0.577, "step": 13391 }, { - "epoch": 2.808135877542462, - "grad_norm": 4.440964241269104, - "learning_rate": 2.1378009644332897e-07, - "loss": 0.1737, + "epoch": 1.890991245410901, + "grad_norm": 3.7969442538094054, + "learning_rate": 6.351320471948313e-06, + "loss": 0.6118, "step": 13392 }, { - "epoch": 2.808345565107989, - "grad_norm": 3.5677402250983015, - "learning_rate": 2.133147404061253e-07, - "loss": 0.1071, + "epoch": 1.891132448460887, + "grad_norm": 3.350525658951351, + "learning_rate": 6.349901158283025e-06, + "loss": 0.5232, "step": 13393 }, { - "epoch": 2.8085552526735165, - "grad_norm": 3.605226177024849, - "learning_rate": 2.1284988595351553e-07, - "loss": 0.1086, + "epoch": 1.8912736515108728, + "grad_norm": 3.8250529579809354, + "learning_rate": 6.348481929444458e-06, + "loss": 0.6286, "step": 13394 }, { - "epoch": 2.808764940239044, - "grad_norm": 3.6027122635852007, - "learning_rate": 2.1238553310932385e-07, - "loss": 0.1293, + "epoch": 1.8914148545608587, + "grad_norm": 3.6061428150213355, + "learning_rate": 6.347062785465592e-06, + "loss": 0.5634, "step": 13395 }, { - "epoch": 2.808974627804571, - "grad_norm": 4.340822327230391, - "learning_rate": 2.119216818973502e-07, - "loss": 0.1062, + "epoch": 1.8915560576108446, + "grad_norm": 3.8374964313894924, + "learning_rate": 6.345643726379416e-06, + "loss": 0.6316, "step": 13396 }, { - "epoch": 2.8091843153700986, - "grad_norm": 6.030837404852382, - "learning_rate": 2.1145833234136659e-07, - "loss": 0.1862, + "epoch": 1.8916972606608304, + "grad_norm": 3.5551111375087237, + "learning_rate": 6.344224752218894e-06, + "loss": 0.5392, "step": 13397 }, { - "epoch": 2.809394002935626, - "grad_norm": 4.807526726194187, - "learning_rate": 2.1099548446511963e-07, - "loss": 0.1721, + "epoch": 1.891838463710816, + "grad_norm": 4.037649865688388, + "learning_rate": 6.342805863017012e-06, + "loss": 0.6602, "step": 13398 }, { - "epoch": 2.809603690501153, - "grad_norm": 4.319526975824163, - "learning_rate": 2.105331382923337e-07, - "loss": 0.1059, + "epoch": 1.891979666760802, + "grad_norm": 3.2323793183035625, + "learning_rate": 6.341387058806742e-06, + "loss": 0.4729, "step": 13399 }, { - "epoch": 2.8098133780666807, - "grad_norm": 3.814477333747934, - "learning_rate": 2.1007129384670423e-07, - "loss": 0.1005, + "epoch": 1.8921208698107879, + "grad_norm": 4.072824109278964, + "learning_rate": 6.339968339621056e-06, + "loss": 0.6628, "step": 13400 }, { - "epoch": 2.8100230656322083, - "grad_norm": 3.6704887588770476, - "learning_rate": 2.0960995115189785e-07, - "loss": 0.1166, + "epoch": 1.8922620728607737, + "grad_norm": 3.304145602212345, + "learning_rate": 6.338549705492929e-06, + "loss": 0.4996, "step": 13401 }, { - "epoch": 2.8102327531977354, - "grad_norm": 3.6289756505723387, - "learning_rate": 2.0914911023156347e-07, - "loss": 0.1023, + "epoch": 1.8924032759107596, + "grad_norm": 2.7814762602251135, + "learning_rate": 6.337131156455323e-06, + "loss": 0.437, "step": 13402 }, { - "epoch": 2.810442440763263, - "grad_norm": 3.410194110482676, - "learning_rate": 2.0868877110931885e-07, - "loss": 0.1111, + "epoch": 1.8925444789607455, + "grad_norm": 2.7690478026777243, + "learning_rate": 6.335712692541205e-06, + "loss": 0.397, "step": 13403 }, { - "epoch": 2.81065212832879, - "grad_norm": 4.328741617231303, - "learning_rate": 2.082289338087573e-07, - "loss": 0.1339, + "epoch": 1.8926856820107314, + "grad_norm": 2.9162725628990978, + "learning_rate": 6.334294313783549e-06, + "loss": 0.3686, "step": 13404 }, { - "epoch": 2.8108618158943175, - "grad_norm": 3.5247446369169677, - "learning_rate": 2.0776959835344447e-07, - "loss": 0.1114, + "epoch": 1.8928268850607173, + "grad_norm": 3.361020902938387, + "learning_rate": 6.332876020215303e-06, + "loss": 0.5153, "step": 13405 }, { - "epoch": 2.8110715034598446, - "grad_norm": 4.774572776704726, - "learning_rate": 2.073107647669248e-07, - "loss": 0.1395, + "epoch": 1.8929680881107032, + "grad_norm": 3.7145787619061514, + "learning_rate": 6.331457811869437e-06, + "loss": 0.6155, "step": 13406 }, { - "epoch": 2.811281191025372, - "grad_norm": 4.512397297700276, - "learning_rate": 2.0685243307271176e-07, - "loss": 0.1488, + "epoch": 1.893109291160689, + "grad_norm": 3.3854951646953317, + "learning_rate": 6.3300396887789064e-06, + "loss": 0.5122, "step": 13407 }, { - "epoch": 2.8114908785908996, - "grad_norm": 3.3179684025415326, - "learning_rate": 2.0639460329429872e-07, - "loss": 0.1108, + "epoch": 1.893250494210675, + "grad_norm": 3.8256994856819633, + "learning_rate": 6.32862165097667e-06, + "loss": 0.4585, "step": 13408 }, { - "epoch": 2.8117005661564267, - "grad_norm": 4.2258377561081275, - "learning_rate": 2.0593727545514698e-07, - "loss": 0.1343, + "epoch": 1.8933916972606608, + "grad_norm": 4.322051634239428, + "learning_rate": 6.327203698495677e-06, + "loss": 0.5188, "step": 13409 }, { - "epoch": 2.8119102537219542, - "grad_norm": 3.473784463380286, - "learning_rate": 2.0548044957869661e-07, - "loss": 0.1222, + "epoch": 1.8935329003106467, + "grad_norm": 3.2812758860466373, + "learning_rate": 6.325785831368887e-06, + "loss": 0.5173, "step": 13410 }, { - "epoch": 2.8121199412874818, - "grad_norm": 4.206296141960858, - "learning_rate": 2.0502412568836118e-07, - "loss": 0.1096, + "epoch": 1.8936741033606326, + "grad_norm": 3.5247042201853533, + "learning_rate": 6.324368049629247e-06, + "loss": 0.4447, "step": 13411 }, { - "epoch": 2.812329628853009, - "grad_norm": 4.0664203284667595, - "learning_rate": 2.0456830380752857e-07, - "loss": 0.1378, + "epoch": 1.8938153064106185, + "grad_norm": 3.742815567859955, + "learning_rate": 6.322950353309705e-06, + "loss": 0.5492, "step": 13412 }, { - "epoch": 2.8125393164185364, - "grad_norm": 3.2432684241207714, - "learning_rate": 2.0411298395955904e-07, - "loss": 0.0881, + "epoch": 1.8939565094606043, + "grad_norm": 3.305856478380341, + "learning_rate": 6.321532742443214e-06, + "loss": 0.4154, "step": 13413 }, { - "epoch": 2.812749003984064, - "grad_norm": 5.433889733033916, - "learning_rate": 2.0365816616778945e-07, - "loss": 0.1785, + "epoch": 1.8940977125105902, + "grad_norm": 4.428474304286832, + "learning_rate": 6.320115217062708e-06, + "loss": 0.61, "step": 13414 }, { - "epoch": 2.812958691549591, - "grad_norm": 3.9911981227287283, - "learning_rate": 2.0320385045553004e-07, - "loss": 0.1316, + "epoch": 1.894238915560576, + "grad_norm": 3.6185164347820997, + "learning_rate": 6.318697777201136e-06, + "loss": 0.4654, "step": 13415 }, { - "epoch": 2.8131683791151185, - "grad_norm": 4.253736715467679, - "learning_rate": 2.0275003684606443e-07, - "loss": 0.1122, + "epoch": 1.894380118610562, + "grad_norm": 3.9393518817696633, + "learning_rate": 6.317280422891439e-06, + "loss": 0.5228, "step": 13416 }, { - "epoch": 2.813378066680646, - "grad_norm": 3.9183504876836603, - "learning_rate": 2.0229672536265177e-07, - "loss": 0.108, + "epoch": 1.8945213216605479, + "grad_norm": 3.686562790667477, + "learning_rate": 6.315863154166552e-06, + "loss": 0.5992, "step": 13417 }, { - "epoch": 2.813587754246173, - "grad_norm": 3.9947142758005874, - "learning_rate": 2.0184391602852572e-07, - "loss": 0.0991, + "epoch": 1.8946625247105338, + "grad_norm": 2.8924763590674396, + "learning_rate": 6.314445971059416e-06, + "loss": 0.4297, "step": 13418 }, { - "epoch": 2.8137974418117007, - "grad_norm": 4.945407354279828, - "learning_rate": 2.0139160886689213e-07, - "loss": 0.1568, + "epoch": 1.8948037277605196, + "grad_norm": 3.857662275882954, + "learning_rate": 6.313028873602964e-06, + "loss": 0.6117, "step": 13419 }, { - "epoch": 2.814007129377228, - "grad_norm": 4.2994197695479395, - "learning_rate": 2.009398039009336e-07, - "loss": 0.1459, + "epoch": 1.8949449308105055, + "grad_norm": 3.682311142218315, + "learning_rate": 6.311611861830129e-06, + "loss": 0.4986, "step": 13420 }, { - "epoch": 2.8142168169427553, - "grad_norm": 5.941397765208098, - "learning_rate": 2.0048850115380713e-07, - "loss": 0.1378, + "epoch": 1.8950861338604914, + "grad_norm": 3.500187988638975, + "learning_rate": 6.3101949357738435e-06, + "loss": 0.4973, "step": 13421 }, { - "epoch": 2.814426504508283, - "grad_norm": 4.090855495964533, - "learning_rate": 2.0003770064864092e-07, - "loss": 0.1277, + "epoch": 1.8952273369104773, + "grad_norm": 3.392362276039131, + "learning_rate": 6.3087780954670306e-06, + "loss": 0.5516, "step": 13422 }, { - "epoch": 2.81463619207381, - "grad_norm": 3.6856477686470397, - "learning_rate": 1.9958740240853868e-07, - "loss": 0.1156, + "epoch": 1.8953685399604632, + "grad_norm": 4.096021084596819, + "learning_rate": 6.307361340942623e-06, + "loss": 0.5851, "step": 13423 }, { - "epoch": 2.8148458796393374, - "grad_norm": 3.6133898588559776, - "learning_rate": 1.9913760645658087e-07, - "loss": 0.1181, + "epoch": 1.895509743010449, + "grad_norm": 3.316633320338202, + "learning_rate": 6.305944672233542e-06, + "loss": 0.5026, "step": 13424 }, { - "epoch": 2.8150555672048645, - "grad_norm": 5.668607099434243, - "learning_rate": 1.9868831281581902e-07, - "loss": 0.1823, + "epoch": 1.895650946060435, + "grad_norm": 4.3752702098309335, + "learning_rate": 6.304528089372712e-06, + "loss": 0.7208, "step": 13425 }, { - "epoch": 2.815265254770392, - "grad_norm": 4.876721713511406, - "learning_rate": 1.9823952150928027e-07, - "loss": 0.1394, + "epoch": 1.8957921491104208, + "grad_norm": 4.104722949167642, + "learning_rate": 6.303111592393051e-06, + "loss": 0.6428, "step": 13426 }, { - "epoch": 2.8154749423359196, - "grad_norm": 4.731609195984963, - "learning_rate": 1.9779123255996625e-07, - "loss": 0.1926, + "epoch": 1.8959333521604067, + "grad_norm": 3.3739364782247003, + "learning_rate": 6.301695181327482e-06, + "loss": 0.5557, "step": 13427 }, { - "epoch": 2.8156846299014466, - "grad_norm": 3.238563367417499, - "learning_rate": 1.9734344599085408e-07, - "loss": 0.1237, + "epoch": 1.8960745552103926, + "grad_norm": 3.8451021853390897, + "learning_rate": 6.300278856208919e-06, + "loss": 0.6002, "step": 13428 }, { - "epoch": 2.815894317466974, - "grad_norm": 3.6109569587311143, - "learning_rate": 1.9689616182489102e-07, - "loss": 0.1229, + "epoch": 1.8962157582603785, + "grad_norm": 3.6901140238572223, + "learning_rate": 6.298862617070277e-06, + "loss": 0.5857, "step": 13429 }, { - "epoch": 2.8161040050325017, - "grad_norm": 4.674669086871531, - "learning_rate": 1.9644938008500313e-07, - "loss": 0.1334, + "epoch": 1.8963569613103644, + "grad_norm": 3.5633327254260756, + "learning_rate": 6.2974464639444745e-06, + "loss": 0.5642, "step": 13430 }, { - "epoch": 2.8163136925980288, - "grad_norm": 5.58283762666598, - "learning_rate": 1.9600310079408767e-07, - "loss": 0.1531, + "epoch": 1.8964981643603502, + "grad_norm": 2.858638542833089, + "learning_rate": 6.2960303968644125e-06, + "loss": 0.4307, "step": 13431 }, { - "epoch": 2.8165233801635563, - "grad_norm": 5.608198001567224, - "learning_rate": 1.9555732397501636e-07, - "loss": 0.1644, + "epoch": 1.896639367410336, + "grad_norm": 3.3250048953514986, + "learning_rate": 6.294614415863005e-06, + "loss": 0.4451, "step": 13432 }, { - "epoch": 2.816733067729084, - "grad_norm": 4.966514836845429, - "learning_rate": 1.9511204965063756e-07, - "loss": 0.1409, + "epoch": 1.8967805704603218, + "grad_norm": 4.135491620831202, + "learning_rate": 6.2931985209731586e-06, + "loss": 0.6512, "step": 13433 }, { - "epoch": 2.816942755294611, - "grad_norm": 3.643769678865067, - "learning_rate": 1.9466727784377304e-07, - "loss": 0.0919, + "epoch": 1.8969217735103077, + "grad_norm": 4.24784567898859, + "learning_rate": 6.291782712227776e-06, + "loss": 0.563, "step": 13434 }, { - "epoch": 2.8171524428601384, - "grad_norm": 3.0153687350004903, - "learning_rate": 1.9422300857721566e-07, - "loss": 0.1018, + "epoch": 1.8970629765602935, + "grad_norm": 4.115088763429305, + "learning_rate": 6.2903669896597645e-06, + "loss": 0.5511, "step": 13435 }, { - "epoch": 2.817362130425666, - "grad_norm": 3.2210536573591324, - "learning_rate": 1.937792418737372e-07, - "loss": 0.0821, + "epoch": 1.8972041796102794, + "grad_norm": 3.448432054760496, + "learning_rate": 6.28895135330202e-06, + "loss": 0.5025, "step": 13436 }, { - "epoch": 2.817571817991193, - "grad_norm": 3.9368810303183195, - "learning_rate": 1.933359777560806e-07, - "loss": 0.1141, + "epoch": 1.8973453826602653, + "grad_norm": 3.4798574006570493, + "learning_rate": 6.287535803187446e-06, + "loss": 0.454, "step": 13437 }, { - "epoch": 2.8177815055567206, - "grad_norm": 4.343287273930058, - "learning_rate": 1.9289321624696322e-07, - "loss": 0.1394, + "epoch": 1.8974865857102512, + "grad_norm": 3.711726181863225, + "learning_rate": 6.286120339348935e-06, + "loss": 0.5461, "step": 13438 }, { - "epoch": 2.817991193122248, - "grad_norm": 3.950078195549755, - "learning_rate": 1.9245095736907805e-07, - "loss": 0.1274, + "epoch": 1.897627788760237, + "grad_norm": 4.103743635482809, + "learning_rate": 6.284704961819385e-06, + "loss": 0.5905, "step": 13439 }, { - "epoch": 2.818200880687775, - "grad_norm": 4.474596061212188, - "learning_rate": 1.9200920114509248e-07, - "loss": 0.1497, + "epoch": 1.897768991810223, + "grad_norm": 3.4591780582248086, + "learning_rate": 6.283289670631684e-06, + "loss": 0.4903, "step": 13440 }, { - "epoch": 2.8184105682533027, - "grad_norm": 4.488894173186195, - "learning_rate": 1.915679475976473e-07, - "loss": 0.1663, + "epoch": 1.8979101948602088, + "grad_norm": 3.1723619915091064, + "learning_rate": 6.281874465818727e-06, + "loss": 0.422, "step": 13441 }, { - "epoch": 2.81862025581883, - "grad_norm": 4.115302858837432, - "learning_rate": 1.9112719674935442e-07, - "loss": 0.1387, + "epoch": 1.8980513979101947, + "grad_norm": 3.9345175956355085, + "learning_rate": 6.280459347413402e-06, + "loss": 0.5671, "step": 13442 }, { - "epoch": 2.8188299433843573, - "grad_norm": 4.5602341278182825, - "learning_rate": 1.906869486228069e-07, - "loss": 0.1523, + "epoch": 1.8981926009601806, + "grad_norm": 4.293579203310036, + "learning_rate": 6.279044315448595e-06, + "loss": 0.5236, "step": 13443 }, { - "epoch": 2.8190396309498844, - "grad_norm": 4.295693618321269, - "learning_rate": 1.9024720324056555e-07, - "loss": 0.1345, + "epoch": 1.8983338040101665, + "grad_norm": 3.5823078507523607, + "learning_rate": 6.277629369957191e-06, + "loss": 0.577, "step": 13444 }, { - "epoch": 2.819249318515412, - "grad_norm": 4.037791341024337, - "learning_rate": 1.8980796062517015e-07, - "loss": 0.1189, + "epoch": 1.8984750070601524, + "grad_norm": 3.477605356948143, + "learning_rate": 6.276214510972074e-06, + "loss": 0.4667, "step": 13445 }, { - "epoch": 2.8194590060809395, - "grad_norm": 4.729523782311469, - "learning_rate": 1.8936922079913155e-07, - "loss": 0.1345, + "epoch": 1.8986162101101383, + "grad_norm": 3.59319449585428, + "learning_rate": 6.274799738526125e-06, + "loss": 0.5796, "step": 13446 }, { - "epoch": 2.8196686936464666, - "grad_norm": 3.2219591194457897, - "learning_rate": 1.8893098378493625e-07, - "loss": 0.0863, + "epoch": 1.8987574131601241, + "grad_norm": 3.8654935757970623, + "learning_rate": 6.273385052652221e-06, + "loss": 0.5569, "step": 13447 }, { - "epoch": 2.819878381211994, - "grad_norm": 3.9535016148577804, - "learning_rate": 1.8849324960504288e-07, - "loss": 0.1276, + "epoch": 1.89889861621011, + "grad_norm": 3.4531695729438394, + "learning_rate": 6.271970453383235e-06, + "loss": 0.5127, "step": 13448 }, { - "epoch": 2.8200880687775216, - "grad_norm": 4.669735554991172, - "learning_rate": 1.8805601828188914e-07, - "loss": 0.1369, + "epoch": 1.899039819260096, + "grad_norm": 3.3825740334469367, + "learning_rate": 6.270555940752048e-06, + "loss": 0.5218, "step": 13449 }, { - "epoch": 2.8202977563430487, - "grad_norm": 4.035105933095399, - "learning_rate": 1.8761928983788258e-07, - "loss": 0.1084, + "epoch": 1.8991810223100818, + "grad_norm": 3.479506791495596, + "learning_rate": 6.2691415147915306e-06, + "loss": 0.5321, "step": 13450 }, { - "epoch": 2.820507443908576, - "grad_norm": 4.492576791024945, - "learning_rate": 1.8718306429540534e-07, - "loss": 0.1534, + "epoch": 1.8993222253600677, + "grad_norm": 3.5457010359204455, + "learning_rate": 6.267727175534553e-06, + "loss": 0.5192, "step": 13451 }, { - "epoch": 2.8207171314741037, - "grad_norm": 4.959557698820072, - "learning_rate": 1.867473416768162e-07, - "loss": 0.1383, + "epoch": 1.8994634284100536, + "grad_norm": 3.03673595400293, + "learning_rate": 6.266312923013983e-06, + "loss": 0.385, "step": 13452 }, { - "epoch": 2.820926819039631, - "grad_norm": 4.469564353974139, - "learning_rate": 1.8631212200444504e-07, - "loss": 0.1082, + "epoch": 1.8996046314600394, + "grad_norm": 3.240206378492022, + "learning_rate": 6.26489875726269e-06, + "loss": 0.4957, "step": 13453 }, { - "epoch": 2.8211365066051584, - "grad_norm": 3.8561423171316833, - "learning_rate": 1.858774053005996e-07, - "loss": 0.1277, + "epoch": 1.8997458345100253, + "grad_norm": 3.194625803251091, + "learning_rate": 6.263484678313536e-06, + "loss": 0.4624, "step": 13454 }, { - "epoch": 2.821346194170686, - "grad_norm": 2.7173770401458146, - "learning_rate": 1.8544319158755876e-07, - "loss": 0.0854, + "epoch": 1.8998870375600112, + "grad_norm": 3.3929030867402874, + "learning_rate": 6.262070686199386e-06, + "loss": 0.521, "step": 13455 }, { - "epoch": 2.821555881736213, - "grad_norm": 3.8595467412918096, - "learning_rate": 1.8500948088757687e-07, - "loss": 0.132, + "epoch": 1.900028240609997, + "grad_norm": 2.7643868637121054, + "learning_rate": 6.260656780953098e-06, + "loss": 0.4259, "step": 13456 }, { - "epoch": 2.8217655693017405, - "grad_norm": 3.8034064217534502, - "learning_rate": 1.8457627322288064e-07, - "loss": 0.1086, + "epoch": 1.900169443659983, + "grad_norm": 3.9243369454838475, + "learning_rate": 6.2592429626075275e-06, + "loss": 0.602, "step": 13457 }, { - "epoch": 2.821975256867268, - "grad_norm": 5.1509953222698694, - "learning_rate": 1.8414356861567784e-07, - "loss": 0.1249, + "epoch": 1.9003106467099689, + "grad_norm": 3.6633559492120233, + "learning_rate": 6.25782923119554e-06, + "loss": 0.5963, "step": 13458 }, { - "epoch": 2.822184944432795, - "grad_norm": 5.3051581073928284, - "learning_rate": 1.8371136708813852e-07, - "loss": 0.1858, + "epoch": 1.9004518497599547, + "grad_norm": 3.7349347708267544, + "learning_rate": 6.2564155867499845e-06, + "loss": 0.6204, "step": 13459 }, { - "epoch": 2.8223946319983226, - "grad_norm": 3.7460790321609916, - "learning_rate": 1.832796686624183e-07, - "loss": 0.1171, + "epoch": 1.9005930528099406, + "grad_norm": 3.489559172484282, + "learning_rate": 6.2550020293037095e-06, + "loss": 0.4741, "step": 13460 }, { - "epoch": 2.8226043195638497, - "grad_norm": 3.256306776841788, - "learning_rate": 1.8284847336064171e-07, - "loss": 0.1264, + "epoch": 1.9007342558599265, + "grad_norm": 3.1411824620365554, + "learning_rate": 6.253588558889574e-06, + "loss": 0.4453, "step": 13461 }, { - "epoch": 2.8228140071293772, - "grad_norm": 4.4672830710706934, - "learning_rate": 1.824177812049077e-07, - "loss": 0.1145, + "epoch": 1.9008754589099124, + "grad_norm": 2.9236731460592265, + "learning_rate": 6.2521751755404226e-06, + "loss": 0.4667, "step": 13462 }, { - "epoch": 2.8230236946949043, - "grad_norm": 4.142844164501183, - "learning_rate": 1.819875922172898e-07, - "loss": 0.147, + "epoch": 1.9010166619598983, + "grad_norm": 2.6878718262365084, + "learning_rate": 6.250761879289103e-06, + "loss": 0.3701, "step": 13463 }, { - "epoch": 2.823233382260432, - "grad_norm": 6.499598665238347, - "learning_rate": 1.8155790641983696e-07, - "loss": 0.1753, + "epoch": 1.9011578650098842, + "grad_norm": 4.042627706642249, + "learning_rate": 6.2493486701684556e-06, + "loss": 0.5441, "step": 13464 }, { - "epoch": 2.8234430698259594, - "grad_norm": 4.206203718005195, - "learning_rate": 1.8112872383456937e-07, - "loss": 0.1626, + "epoch": 1.90129906805987, + "grad_norm": 4.404790604062429, + "learning_rate": 6.247935548211324e-06, + "loss": 0.572, "step": 13465 }, { - "epoch": 2.8236527573914865, - "grad_norm": 4.53781318891553, - "learning_rate": 1.807000444834861e-07, - "loss": 0.1755, + "epoch": 1.901440271109856, + "grad_norm": 3.3656212831646624, + "learning_rate": 6.246522513450552e-06, + "loss": 0.5335, "step": 13466 }, { - "epoch": 2.823862444957014, - "grad_norm": 4.249738883313692, - "learning_rate": 1.8027186838855626e-07, - "loss": 0.1362, + "epoch": 1.9015814741598418, + "grad_norm": 4.577944759719374, + "learning_rate": 6.245109565918976e-06, + "loss": 0.6387, "step": 13467 }, { - "epoch": 2.8240721325225415, - "grad_norm": 4.560572345384787, - "learning_rate": 1.798441955717234e-07, - "loss": 0.1466, + "epoch": 1.9017226772098277, + "grad_norm": 2.9637291767477945, + "learning_rate": 6.243696705649432e-06, + "loss": 0.3523, "step": 13468 }, { - "epoch": 2.8242818200880686, - "grad_norm": 4.182646673478448, - "learning_rate": 1.7941702605490773e-07, - "loss": 0.1522, + "epoch": 1.9018638802598136, + "grad_norm": 3.2359656238349226, + "learning_rate": 6.242283932674752e-06, + "loss": 0.483, "step": 13469 }, { - "epoch": 2.824491507653596, - "grad_norm": 6.43631398921247, - "learning_rate": 1.789903598600029e-07, - "loss": 0.1756, + "epoch": 1.9020050833097994, + "grad_norm": 3.55124655840089, + "learning_rate": 6.240871247027774e-06, + "loss": 0.5854, "step": 13470 }, { - "epoch": 2.8247011952191237, - "grad_norm": 4.109778781233847, - "learning_rate": 1.7856419700887694e-07, - "loss": 0.1193, + "epoch": 1.9021462863597853, + "grad_norm": 3.235279043523479, + "learning_rate": 6.2394586487413225e-06, + "loss": 0.4653, "step": 13471 }, { - "epoch": 2.8249108827846507, - "grad_norm": 4.8815366638382205, - "learning_rate": 1.7813853752336908e-07, - "loss": 0.1775, + "epoch": 1.9022874894097712, + "grad_norm": 3.483502072499075, + "learning_rate": 6.23804613784823e-06, + "loss": 0.555, "step": 13472 }, { - "epoch": 2.8251205703501783, - "grad_norm": 4.277927107201278, - "learning_rate": 1.7771338142529625e-07, - "loss": 0.1675, + "epoch": 1.902428692459757, + "grad_norm": 3.5061307283348504, + "learning_rate": 6.23663371438132e-06, + "loss": 0.4853, "step": 13473 }, { - "epoch": 2.825330257915706, - "grad_norm": 3.198094000113271, - "learning_rate": 1.7728872873644886e-07, - "loss": 0.0756, + "epoch": 1.902569895509743, + "grad_norm": 2.9753120736905108, + "learning_rate": 6.235221378373415e-06, + "loss": 0.4698, "step": 13474 }, { - "epoch": 2.825539945481233, - "grad_norm": 3.70242927349456, - "learning_rate": 1.7686457947859058e-07, - "loss": 0.1214, + "epoch": 1.9027110985597289, + "grad_norm": 3.4874116829860085, + "learning_rate": 6.23380912985734e-06, + "loss": 0.5942, "step": 13475 }, { - "epoch": 2.8257496330467604, - "grad_norm": 4.600217984464785, - "learning_rate": 1.764409336734607e-07, - "loss": 0.1765, + "epoch": 1.9028523016097147, + "grad_norm": 3.932341691813281, + "learning_rate": 6.232396968865916e-06, + "loss": 0.5674, "step": 13476 }, { - "epoch": 2.825959320612288, - "grad_norm": 3.8869130889910912, - "learning_rate": 1.7601779134276963e-07, - "loss": 0.1319, + "epoch": 1.9029935046597006, + "grad_norm": 3.197416839462005, + "learning_rate": 6.230984895431957e-06, + "loss": 0.5397, "step": 13477 }, { - "epoch": 2.826169008177815, - "grad_norm": 6.095516558269942, - "learning_rate": 1.7559515250820558e-07, - "loss": 0.1807, + "epoch": 1.9031347077096865, + "grad_norm": 3.901535172890022, + "learning_rate": 6.229572909588282e-06, + "loss": 0.528, "step": 13478 }, { - "epoch": 2.8263786957433426, - "grad_norm": 4.550213065818699, - "learning_rate": 1.751730171914312e-07, - "loss": 0.1463, + "epoch": 1.9032759107596724, + "grad_norm": 3.5367989820110792, + "learning_rate": 6.228161011367706e-06, + "loss": 0.5403, "step": 13479 }, { - "epoch": 2.8265883833088696, - "grad_norm": 3.786959131550005, - "learning_rate": 1.7475138541407922e-07, - "loss": 0.1104, + "epoch": 1.9034171138096583, + "grad_norm": 4.435100017125884, + "learning_rate": 6.2267492008030395e-06, + "loss": 0.8056, "step": 13480 }, { - "epoch": 2.826798070874397, - "grad_norm": 5.38653754157049, - "learning_rate": 1.7433025719775898e-07, - "loss": 0.1591, + "epoch": 1.9035583168596442, + "grad_norm": 3.287977744577752, + "learning_rate": 6.225337477927092e-06, + "loss": 0.4601, "step": 13481 }, { - "epoch": 2.8270077584399242, - "grad_norm": 4.347449163323004, - "learning_rate": 1.7390963256405545e-07, - "loss": 0.1217, + "epoch": 1.90369951990963, + "grad_norm": 3.806724059770396, + "learning_rate": 6.223925842772668e-06, + "loss": 0.5308, "step": 13482 }, { - "epoch": 2.8272174460054518, - "grad_norm": 4.044976624560833, - "learning_rate": 1.7348951153452697e-07, - "loss": 0.1449, + "epoch": 1.903840722959616, + "grad_norm": 3.148508643615088, + "learning_rate": 6.222514295372579e-06, + "loss": 0.4877, "step": 13483 }, { - "epoch": 2.8274271335709793, - "grad_norm": 4.406117673376607, - "learning_rate": 1.7306989413070186e-07, - "loss": 0.13, + "epoch": 1.9039819260096018, + "grad_norm": 3.804527430435709, + "learning_rate": 6.221102835759626e-06, + "loss": 0.4942, "step": 13484 }, { - "epoch": 2.8276368211365064, - "grad_norm": 4.16371531027683, - "learning_rate": 1.7265078037408956e-07, - "loss": 0.1264, + "epoch": 1.9041231290595877, + "grad_norm": 3.673156309053446, + "learning_rate": 6.21969146396661e-06, + "loss": 0.4812, "step": 13485 }, { - "epoch": 2.827846508702034, - "grad_norm": 3.9000760317126826, - "learning_rate": 1.7223217028616956e-07, - "loss": 0.1234, + "epoch": 1.9042643321095736, + "grad_norm": 3.749173471566194, + "learning_rate": 6.2182801800263325e-06, + "loss": 0.5813, "step": 13486 }, { - "epoch": 2.8280561962675614, - "grad_norm": 5.126820395795172, - "learning_rate": 1.7181406388839695e-07, - "loss": 0.1408, + "epoch": 1.9044055351595595, + "grad_norm": 3.6173725295608086, + "learning_rate": 6.216868983971591e-06, + "loss": 0.6701, "step": 13487 }, { - "epoch": 2.8282658838330885, - "grad_norm": 4.732556929375315, - "learning_rate": 1.71396461202199e-07, - "loss": 0.1818, + "epoch": 1.9045467382095453, + "grad_norm": 3.7015405642538046, + "learning_rate": 6.21545787583518e-06, + "loss": 0.6082, "step": 13488 }, { - "epoch": 2.828475571398616, - "grad_norm": 4.501669049557819, - "learning_rate": 1.709793622489797e-07, - "loss": 0.1237, + "epoch": 1.9046879412595312, + "grad_norm": 4.184283313805566, + "learning_rate": 6.214046855649898e-06, + "loss": 0.6809, "step": 13489 }, { - "epoch": 2.8286852589641436, - "grad_norm": 3.696165522573165, - "learning_rate": 1.7056276705011422e-07, - "loss": 0.0866, + "epoch": 1.904829144309517, + "grad_norm": 3.781968486114172, + "learning_rate": 6.212635923448526e-06, + "loss": 0.6126, "step": 13490 }, { - "epoch": 2.8288949465296707, - "grad_norm": 4.140680627404227, - "learning_rate": 1.7014667562695765e-07, - "loss": 0.1647, + "epoch": 1.904970347359503, + "grad_norm": 4.162485942149802, + "learning_rate": 6.211225079263861e-06, + "loss": 0.5773, "step": 13491 }, { - "epoch": 2.829104634095198, - "grad_norm": 3.7161421675275257, - "learning_rate": 1.6973108800083181e-07, - "loss": 0.1265, + "epoch": 1.9051115504094889, + "grad_norm": 4.480103004071899, + "learning_rate": 6.209814323128689e-06, + "loss": 0.5347, "step": 13492 }, { - "epoch": 2.8293143216607257, - "grad_norm": 4.9794656184629105, - "learning_rate": 1.693160041930364e-07, - "loss": 0.137, + "epoch": 1.9052527534594748, + "grad_norm": 3.8877504395548463, + "learning_rate": 6.208403655075797e-06, + "loss": 0.5051, "step": 13493 }, { - "epoch": 2.829524009226253, - "grad_norm": 3.137659319844232, - "learning_rate": 1.6890142422484658e-07, - "loss": 0.0996, + "epoch": 1.9053939565094606, + "grad_norm": 3.648369398878296, + "learning_rate": 6.2069930751379635e-06, + "loss": 0.5209, "step": 13494 }, { - "epoch": 2.8297336967917803, - "grad_norm": 3.3701607995097835, - "learning_rate": 1.6848734811751088e-07, - "loss": 0.1156, + "epoch": 1.9055351595594465, + "grad_norm": 3.2069234380379816, + "learning_rate": 6.205582583347974e-06, + "loss": 0.5022, "step": 13495 }, { - "epoch": 2.829943384357308, - "grad_norm": 4.526917866353032, - "learning_rate": 1.6807377589225016e-07, - "loss": 0.1719, + "epoch": 1.9056763626094324, + "grad_norm": 3.9214667493410396, + "learning_rate": 6.2041721797386075e-06, + "loss": 0.6172, "step": 13496 }, { - "epoch": 2.830153071922835, - "grad_norm": 5.4668251423924366, - "learning_rate": 1.6766070757026075e-07, - "loss": 0.1153, + "epoch": 1.9058175656594183, + "grad_norm": 3.6824811558907316, + "learning_rate": 6.2027618643426425e-06, + "loss": 0.4731, "step": 13497 }, { - "epoch": 2.8303627594883625, - "grad_norm": 4.904087989765102, - "learning_rate": 1.6724814317271354e-07, - "loss": 0.1839, + "epoch": 1.9059587687094042, + "grad_norm": 4.214602276313725, + "learning_rate": 6.201351637192849e-06, + "loss": 0.5478, "step": 13498 }, { - "epoch": 2.8305724470538896, - "grad_norm": 3.242284173558282, - "learning_rate": 1.668360827207527e-07, - "loss": 0.1211, + "epoch": 1.90609997175939, + "grad_norm": 3.5293459118156787, + "learning_rate": 6.199941498322004e-06, + "loss": 0.4591, "step": 13499 }, { - "epoch": 2.830782134619417, - "grad_norm": 4.643365991129911, - "learning_rate": 1.664245262354991e-07, - "loss": 0.1358, + "epoch": 1.906241174809376, + "grad_norm": 4.807394649678139, + "learning_rate": 6.198531447762875e-06, + "loss": 0.7148, "step": 13500 }, { - "epoch": 2.830991822184944, - "grad_norm": 4.607655380221388, - "learning_rate": 1.6601347373804256e-07, - "loss": 0.1231, + "epoch": 1.9063823778593618, + "grad_norm": 3.632114884957227, + "learning_rate": 6.1971214855482356e-06, + "loss": 0.603, "step": 13501 }, { - "epoch": 2.8312015097504717, - "grad_norm": 3.9491623004016327, - "learning_rate": 1.6560292524945176e-07, - "loss": 0.1272, + "epoch": 1.9065235809093477, + "grad_norm": 3.551633007477498, + "learning_rate": 6.195711611710851e-06, + "loss": 0.5242, "step": 13502 }, { - "epoch": 2.831411197315999, - "grad_norm": 4.734050570244802, - "learning_rate": 1.6519288079076768e-07, - "loss": 0.1557, + "epoch": 1.9066647839593336, + "grad_norm": 3.661535431365538, + "learning_rate": 6.194301826283482e-06, + "loss": 0.5458, "step": 13503 }, { - "epoch": 2.8316208848815263, - "grad_norm": 4.296665014316216, - "learning_rate": 1.647833403830068e-07, - "loss": 0.1257, + "epoch": 1.9068059870093195, + "grad_norm": 3.1531597283656896, + "learning_rate": 6.192892129298898e-06, + "loss": 0.4651, "step": 13504 }, { - "epoch": 2.831830572447054, - "grad_norm": 4.272249345063031, - "learning_rate": 1.6437430404715682e-07, - "loss": 0.1195, + "epoch": 1.9069471900593054, + "grad_norm": 3.8327274881705953, + "learning_rate": 6.1914825207898566e-06, + "loss": 0.5357, "step": 13505 }, { - "epoch": 2.8320402600125814, - "grad_norm": 5.772249320373049, - "learning_rate": 1.6396577180418316e-07, - "loss": 0.1745, + "epoch": 1.9070883931092912, + "grad_norm": 3.8559468524331457, + "learning_rate": 6.1900730007891185e-06, + "loss": 0.6479, "step": 13506 }, { - "epoch": 2.8322499475781084, - "grad_norm": 4.651866047745959, - "learning_rate": 1.6355774367502463e-07, - "loss": 0.1666, + "epoch": 1.9072295961592771, + "grad_norm": 3.0357380017259485, + "learning_rate": 6.188663569329437e-06, + "loss": 0.4086, "step": 13507 }, { - "epoch": 2.832459635143636, - "grad_norm": 3.0581453236408978, - "learning_rate": 1.631502196805912e-07, - "loss": 0.0895, + "epoch": 1.907370799209263, + "grad_norm": 3.8883329931847057, + "learning_rate": 6.187254226443566e-06, + "loss": 0.5999, "step": 13508 }, { - "epoch": 2.8326693227091635, - "grad_norm": 3.5516137441847735, - "learning_rate": 1.6274319984177057e-07, - "loss": 0.1089, + "epoch": 1.9075120022592489, + "grad_norm": 4.577885023170147, + "learning_rate": 6.185844972164262e-06, + "loss": 0.7173, "step": 13509 }, { - "epoch": 2.8328790102746906, - "grad_norm": 4.348700589667592, - "learning_rate": 1.6233668417942273e-07, - "loss": 0.1317, + "epoch": 1.9076532053092348, + "grad_norm": 3.413859526654456, + "learning_rate": 6.184435806524274e-06, + "loss": 0.4953, "step": 13510 }, { - "epoch": 2.833088697840218, - "grad_norm": 3.923422756873794, - "learning_rate": 1.6193067271438213e-07, - "loss": 0.1152, + "epoch": 1.9077944083592207, + "grad_norm": 3.262064976748346, + "learning_rate": 6.18302672955635e-06, + "loss": 0.4225, "step": 13511 }, { - "epoch": 2.8332983854057456, - "grad_norm": 3.379124584751275, - "learning_rate": 1.6152516546745766e-07, - "loss": 0.1048, + "epoch": 1.9079356114092065, + "grad_norm": 2.965394349435497, + "learning_rate": 6.181617741293234e-06, + "loss": 0.4792, "step": 13512 }, { - "epoch": 2.8335080729712727, - "grad_norm": 3.8366215183652916, - "learning_rate": 1.6112016245943274e-07, - "loss": 0.1421, + "epoch": 1.9080768144591924, + "grad_norm": 3.1037880051798457, + "learning_rate": 6.180208841767674e-06, + "loss": 0.5114, "step": 13513 }, { - "epoch": 2.8337177605368002, - "grad_norm": 5.087542291677511, - "learning_rate": 1.6071566371106406e-07, - "loss": 0.1755, + "epoch": 1.9082180175091783, + "grad_norm": 3.8124164331738823, + "learning_rate": 6.178800031012414e-06, + "loss": 0.5502, "step": 13514 }, { - "epoch": 2.8339274481023278, - "grad_norm": 3.451368807985004, - "learning_rate": 1.6031166924308284e-07, - "loss": 0.0982, + "epoch": 1.9083592205591642, + "grad_norm": 3.111656675250065, + "learning_rate": 6.1773913090601885e-06, + "loss": 0.481, "step": 13515 }, { - "epoch": 2.834137135667855, - "grad_norm": 4.593682387054715, - "learning_rate": 1.599081790761947e-07, - "loss": 0.1273, + "epoch": 1.90850042360915, + "grad_norm": 2.999046367837904, + "learning_rate": 6.175982675943739e-06, + "loss": 0.4042, "step": 13516 }, { - "epoch": 2.8343468232333824, - "grad_norm": 4.368376212679015, - "learning_rate": 1.5950519323107984e-07, - "loss": 0.1178, + "epoch": 1.908641626659136, + "grad_norm": 3.0739467931061895, + "learning_rate": 6.174574131695797e-06, + "loss": 0.4278, "step": 13517 }, { - "epoch": 2.8345565107989095, - "grad_norm": 6.243624632763378, - "learning_rate": 1.5910271172839053e-07, - "loss": 0.2089, + "epoch": 1.9087828297091218, + "grad_norm": 3.3979989662420262, + "learning_rate": 6.173165676349103e-06, + "loss": 0.5961, "step": 13518 }, { - "epoch": 2.834766198364437, - "grad_norm": 4.027191688453906, - "learning_rate": 1.587007345887559e-07, - "loss": 0.156, + "epoch": 1.9089240327591077, + "grad_norm": 3.091348252451232, + "learning_rate": 6.1717573099363856e-06, + "loss": 0.4602, "step": 13519 }, { - "epoch": 2.834975885929964, - "grad_norm": 5.65532073269755, - "learning_rate": 1.5829926183277722e-07, - "loss": 0.1536, + "epoch": 1.9090652358090936, + "grad_norm": 3.717554374266332, + "learning_rate": 6.1703490324903745e-06, + "loss": 0.5919, "step": 13520 }, { - "epoch": 2.8351855734954916, - "grad_norm": 4.614701479719456, - "learning_rate": 1.5789829348103137e-07, - "loss": 0.1399, + "epoch": 1.9092064388590795, + "grad_norm": 3.470340719364818, + "learning_rate": 6.168940844043798e-06, + "loss": 0.4467, "step": 13521 }, { - "epoch": 2.835395261061019, - "grad_norm": 5.035728326187406, - "learning_rate": 1.5749782955406745e-07, - "loss": 0.1695, + "epoch": 1.9093476419090654, + "grad_norm": 4.318080180480572, + "learning_rate": 6.167532744629383e-06, + "loss": 0.6636, "step": 13522 }, { - "epoch": 2.835604948626546, - "grad_norm": 5.493209243927689, - "learning_rate": 1.5709787007241128e-07, - "loss": 0.1555, + "epoch": 1.9094888449590512, + "grad_norm": 3.394017725245674, + "learning_rate": 6.166124734279855e-06, + "loss": 0.489, "step": 13523 }, { - "epoch": 2.8358146361920737, - "grad_norm": 4.508705264071455, - "learning_rate": 1.566984150565609e-07, - "loss": 0.1106, + "epoch": 1.9096300480090371, + "grad_norm": 3.1723969993033285, + "learning_rate": 6.164716813027929e-06, + "loss": 0.3914, "step": 13524 }, { - "epoch": 2.8360243237576013, - "grad_norm": 5.494970843066494, - "learning_rate": 1.5629946452698886e-07, - "loss": 0.1618, + "epoch": 1.909771251059023, + "grad_norm": 3.515909400637687, + "learning_rate": 6.163308980906325e-06, + "loss": 0.5366, "step": 13525 }, { - "epoch": 2.8362340113231284, - "grad_norm": 4.099897825889496, - "learning_rate": 1.5590101850414207e-07, - "loss": 0.1428, + "epoch": 1.909912454109009, + "grad_norm": 4.007155349360425, + "learning_rate": 6.161901237947768e-06, + "loss": 0.4827, "step": 13526 }, { - "epoch": 2.836443698888656, - "grad_norm": 3.8661647393033602, - "learning_rate": 1.5550307700844203e-07, - "loss": 0.1224, + "epoch": 1.9100536571589948, + "grad_norm": 3.969105619031376, + "learning_rate": 6.160493584184966e-06, + "loss": 0.6022, "step": 13527 }, { - "epoch": 2.8366533864541834, - "grad_norm": 5.323388608347447, - "learning_rate": 1.551056400602835e-07, - "loss": 0.1616, + "epoch": 1.9101948602089807, + "grad_norm": 3.8830887341165017, + "learning_rate": 6.159086019650635e-06, + "loss": 0.5606, "step": 13528 }, { - "epoch": 2.8368630740197105, - "grad_norm": 4.917644817297917, - "learning_rate": 1.5470870768003686e-07, - "loss": 0.1529, + "epoch": 1.9103360632589665, + "grad_norm": 3.244836146085893, + "learning_rate": 6.157678544377486e-06, + "loss": 0.5403, "step": 13529 }, { - "epoch": 2.837072761585238, - "grad_norm": 5.104726337910391, - "learning_rate": 1.5431227988804365e-07, - "loss": 0.19, + "epoch": 1.9104772663089524, + "grad_norm": 3.2302744983757865, + "learning_rate": 6.15627115839823e-06, + "loss": 0.4633, "step": 13530 }, { - "epoch": 2.8372824491507656, - "grad_norm": 5.622229177454503, - "learning_rate": 1.5391635670462424e-07, - "loss": 0.1559, + "epoch": 1.9106184693589383, + "grad_norm": 3.313051933301295, + "learning_rate": 6.154863861745572e-06, + "loss": 0.4123, "step": 13531 }, { - "epoch": 2.8374921367162926, - "grad_norm": 3.9099025297449517, - "learning_rate": 1.5352093815006684e-07, - "loss": 0.1036, + "epoch": 1.9107596724089242, + "grad_norm": 3.6637467968928346, + "learning_rate": 6.1534566544522175e-06, + "loss": 0.4728, "step": 13532 }, { - "epoch": 2.83770182428182, - "grad_norm": 4.44382294992835, - "learning_rate": 1.5312602424463974e-07, - "loss": 0.0961, + "epoch": 1.91090087545891, + "grad_norm": 3.4365651866362437, + "learning_rate": 6.152049536550869e-06, + "loss": 0.5196, "step": 13533 }, { - "epoch": 2.8379115118473477, - "grad_norm": 3.9443254029146764, - "learning_rate": 1.5273161500858336e-07, - "loss": 0.1097, + "epoch": 1.911042078508896, + "grad_norm": 4.673104483537478, + "learning_rate": 6.150642508074225e-06, + "loss": 0.785, "step": 13534 }, { - "epoch": 2.8381211994128748, - "grad_norm": 3.6514105217803907, - "learning_rate": 1.523377104621093e-07, - "loss": 0.1311, + "epoch": 1.9111832815588816, + "grad_norm": 3.0815360384651327, + "learning_rate": 6.149235569054989e-06, + "loss": 0.4526, "step": 13535 }, { - "epoch": 2.8383308869784023, - "grad_norm": 5.2831244240714454, - "learning_rate": 1.5194431062540817e-07, - "loss": 0.1514, + "epoch": 1.9113244846088675, + "grad_norm": 3.7679388215482086, + "learning_rate": 6.1478287195258545e-06, + "loss": 0.5181, "step": 13536 }, { - "epoch": 2.8385405745439294, - "grad_norm": 4.198605410894636, - "learning_rate": 1.5155141551864373e-07, - "loss": 0.1618, + "epoch": 1.9114656876588534, + "grad_norm": 3.502537151915502, + "learning_rate": 6.1464219595195165e-06, + "loss": 0.4431, "step": 13537 }, { - "epoch": 2.838750262109457, - "grad_norm": 5.559591065808132, - "learning_rate": 1.511590251619477e-07, - "loss": 0.1532, + "epoch": 1.9116068907088393, + "grad_norm": 3.045800990373655, + "learning_rate": 6.1450152890686684e-06, + "loss": 0.4536, "step": 13538 }, { - "epoch": 2.8389599496749844, - "grad_norm": 4.978930894028863, - "learning_rate": 1.50767139575434e-07, - "loss": 0.15, + "epoch": 1.9117480937588252, + "grad_norm": 3.2758310070994097, + "learning_rate": 6.1436087082060016e-06, + "loss": 0.477, "step": 13539 }, { - "epoch": 2.8391696372405115, - "grad_norm": 4.295804930222321, - "learning_rate": 1.5037575877918875e-07, - "loss": 0.1349, + "epoch": 1.911889296808811, + "grad_norm": 3.9009353783444776, + "learning_rate": 6.142202216964204e-06, + "loss": 0.5623, "step": 13540 }, { - "epoch": 2.839379324806039, - "grad_norm": 3.9601735192046714, - "learning_rate": 1.4998488279326816e-07, - "loss": 0.105, + "epoch": 1.912030499858797, + "grad_norm": 2.872219961471856, + "learning_rate": 6.1407958153759584e-06, + "loss": 0.4414, "step": 13541 }, { - "epoch": 2.839589012371566, - "grad_norm": 4.8306204935383334, - "learning_rate": 1.495945116377051e-07, - "loss": 0.1292, + "epoch": 1.9121717029087828, + "grad_norm": 3.2577917763885544, + "learning_rate": 6.139389503473951e-06, + "loss": 0.4844, "step": 13542 }, { - "epoch": 2.8397986999370937, - "grad_norm": 3.841762238212064, - "learning_rate": 1.492046453325091e-07, - "loss": 0.1346, + "epoch": 1.9123129059587687, + "grad_norm": 3.3329042612416893, + "learning_rate": 6.1379832812908645e-06, + "loss": 0.5126, "step": 13543 }, { - "epoch": 2.840008387502621, - "grad_norm": 4.523805638306732, - "learning_rate": 1.4881528389766087e-07, - "loss": 0.085, + "epoch": 1.9124541090087546, + "grad_norm": 3.553803280191966, + "learning_rate": 6.13657714885938e-06, + "loss": 0.5851, "step": 13544 }, { - "epoch": 2.8402180750681483, - "grad_norm": 4.741838921582771, - "learning_rate": 1.484264273531144e-07, - "loss": 0.1553, + "epoch": 1.9125953120587404, + "grad_norm": 3.720635014566452, + "learning_rate": 6.135171106212172e-06, + "loss": 0.5827, "step": 13545 }, { - "epoch": 2.840427762633676, - "grad_norm": 4.47256299540073, - "learning_rate": 1.4803807571880047e-07, - "loss": 0.142, + "epoch": 1.9127365151087263, + "grad_norm": 3.9525363796902173, + "learning_rate": 6.133765153381918e-06, + "loss": 0.6083, "step": 13546 }, { - "epoch": 2.8406374501992033, - "grad_norm": 3.4233333964104173, - "learning_rate": 1.476502290146231e-07, - "loss": 0.0947, + "epoch": 1.9128777181587122, + "grad_norm": 3.0726411828982223, + "learning_rate": 6.132359290401294e-06, + "loss": 0.4169, "step": 13547 }, { - "epoch": 2.8408471377647304, - "grad_norm": 4.485976764046395, - "learning_rate": 1.472628872604587e-07, - "loss": 0.1325, + "epoch": 1.913018921208698, + "grad_norm": 2.9150093956573593, + "learning_rate": 6.130953517302972e-06, + "loss": 0.4028, "step": 13548 }, { - "epoch": 2.841056825330258, - "grad_norm": 4.352710032384308, - "learning_rate": 1.4687605047616126e-07, - "loss": 0.1256, + "epoch": 1.913160124258684, + "grad_norm": 3.6550136124451797, + "learning_rate": 6.129547834119618e-06, + "loss": 0.6039, "step": 13549 }, { - "epoch": 2.8412665128957855, - "grad_norm": 4.786726859081019, - "learning_rate": 1.4648971868155505e-07, - "loss": 0.1572, + "epoch": 1.9133013273086699, + "grad_norm": 3.3243467567096157, + "learning_rate": 6.128142240883899e-06, + "loss": 0.5308, "step": 13550 }, { - "epoch": 2.8414762004613126, - "grad_norm": 4.592737482641132, - "learning_rate": 1.461038918964397e-07, - "loss": 0.124, + "epoch": 1.9134425303586557, + "grad_norm": 3.549920782020388, + "learning_rate": 6.1267367376284806e-06, + "loss": 0.5279, "step": 13551 }, { - "epoch": 2.84168588802684, - "grad_norm": 5.078285260239264, - "learning_rate": 1.4571857014059165e-07, - "loss": 0.1786, + "epoch": 1.9135837334086416, + "grad_norm": 3.2352285461853185, + "learning_rate": 6.12533132438603e-06, + "loss": 0.5002, "step": 13552 }, { - "epoch": 2.8418955755923676, - "grad_norm": 4.8586291165436375, - "learning_rate": 1.4533375343375845e-07, - "loss": 0.1516, + "epoch": 1.9137249364586275, + "grad_norm": 3.2576633826634995, + "learning_rate": 6.1239260011892065e-06, + "loss": 0.4877, "step": 13553 }, { - "epoch": 2.8421052631578947, - "grad_norm": 4.483687482484891, - "learning_rate": 1.4494944179566206e-07, - "loss": 0.1576, + "epoch": 1.9138661395086134, + "grad_norm": 2.8374728833586733, + "learning_rate": 6.122520768070669e-06, + "loss": 0.3855, "step": 13554 }, { - "epoch": 2.842314950723422, - "grad_norm": 4.422660326661012, - "learning_rate": 1.4456563524599899e-07, - "loss": 0.1603, + "epoch": 1.9140073425585993, + "grad_norm": 4.410852843141075, + "learning_rate": 6.121115625063072e-06, + "loss": 0.5851, "step": 13555 }, { - "epoch": 2.8425246382889493, - "grad_norm": 3.159919696835004, - "learning_rate": 1.4418233380444125e-07, - "loss": 0.1169, + "epoch": 1.9141485456085852, + "grad_norm": 3.209414470374029, + "learning_rate": 6.119710572199075e-06, + "loss": 0.366, "step": 13556 }, { - "epoch": 2.842734325854477, - "grad_norm": 4.573223903512745, - "learning_rate": 1.4379953749063202e-07, - "loss": 0.1542, + "epoch": 1.914289748658571, + "grad_norm": 3.2890412988106603, + "learning_rate": 6.11830560951133e-06, + "loss": 0.4857, "step": 13557 }, { - "epoch": 2.8429440134200044, - "grad_norm": 2.7437999341616037, - "learning_rate": 1.434172463241923e-07, - "loss": 0.0744, + "epoch": 1.914430951708557, + "grad_norm": 3.5286376712353222, + "learning_rate": 6.116900737032484e-06, + "loss": 0.5163, "step": 13558 }, { - "epoch": 2.8431537009855314, - "grad_norm": 3.3932337019077132, - "learning_rate": 1.4303546032471417e-07, - "loss": 0.1102, + "epoch": 1.9145721547585428, + "grad_norm": 3.2451366572615563, + "learning_rate": 6.115495954795187e-06, + "loss": 0.5468, "step": 13559 }, { - "epoch": 2.843363388551059, - "grad_norm": 3.6212030201899, - "learning_rate": 1.426541795117642e-07, - "loss": 0.1502, + "epoch": 1.9147133578085287, + "grad_norm": 2.8526481162127744, + "learning_rate": 6.114091262832087e-06, + "loss": 0.4907, "step": 13560 }, { - "epoch": 2.843573076116586, - "grad_norm": 4.456743997131811, - "learning_rate": 1.4227340390488342e-07, - "loss": 0.1525, + "epoch": 1.9148545608585146, + "grad_norm": 3.1051335601721703, + "learning_rate": 6.1126866611758285e-06, + "loss": 0.4752, "step": 13561 }, { - "epoch": 2.8437827636821136, - "grad_norm": 3.1878796132562086, - "learning_rate": 1.4189313352358957e-07, - "loss": 0.1002, + "epoch": 1.9149957639085005, + "grad_norm": 4.269662888694978, + "learning_rate": 6.111282149859052e-06, + "loss": 0.664, "step": 13562 }, { - "epoch": 2.843992451247641, - "grad_norm": 5.415522014487006, - "learning_rate": 1.415133683873704e-07, - "loss": 0.1632, + "epoch": 1.9151369669584863, + "grad_norm": 3.2042839713201983, + "learning_rate": 6.109877728914399e-06, + "loss": 0.473, "step": 13563 }, { - "epoch": 2.844202138813168, - "grad_norm": 4.380361604196579, - "learning_rate": 1.411341085156881e-07, - "loss": 0.1792, + "epoch": 1.9152781700084722, + "grad_norm": 3.858288973332044, + "learning_rate": 6.108473398374509e-06, + "loss": 0.5264, "step": 13564 }, { - "epoch": 2.8444118263786957, - "grad_norm": 3.835866387843376, - "learning_rate": 1.4075535392798378e-07, - "loss": 0.1075, + "epoch": 1.915419373058458, + "grad_norm": 3.1049400498448203, + "learning_rate": 6.107069158272018e-06, + "loss": 0.4681, "step": 13565 }, { - "epoch": 2.8446215139442232, - "grad_norm": 3.9882846191496637, - "learning_rate": 1.403771046436675e-07, - "loss": 0.1188, + "epoch": 1.915560576108444, + "grad_norm": 3.610705612551678, + "learning_rate": 6.105665008639557e-06, + "loss": 0.5476, "step": 13566 }, { - "epoch": 2.8448312015097503, - "grad_norm": 7.257783162906677, - "learning_rate": 1.3999936068212483e-07, - "loss": 0.1883, + "epoch": 1.9157017791584299, + "grad_norm": 3.6442190961591105, + "learning_rate": 6.1042609495097595e-06, + "loss": 0.6075, "step": 13567 }, { - "epoch": 2.845040889075278, - "grad_norm": 3.657899516699932, - "learning_rate": 1.3962212206271696e-07, - "loss": 0.0999, + "epoch": 1.9158429822084158, + "grad_norm": 3.6549497499715256, + "learning_rate": 6.102856980915253e-06, + "loss": 0.5278, "step": 13568 }, { - "epoch": 2.8452505766408054, - "grad_norm": 4.10619393383268, - "learning_rate": 1.3924538880477623e-07, - "loss": 0.1325, + "epoch": 1.9159841852584014, + "grad_norm": 3.582281737849151, + "learning_rate": 6.101453102888668e-06, + "loss": 0.6147, "step": 13569 }, { - "epoch": 2.8454602642063325, - "grad_norm": 4.222522438815761, - "learning_rate": 1.388691609276116e-07, - "loss": 0.131, + "epoch": 1.9161253883083873, + "grad_norm": 3.019319900328289, + "learning_rate": 6.10004931546263e-06, + "loss": 0.4609, "step": 13570 }, { - "epoch": 2.84566995177186, - "grad_norm": 5.461991239664398, - "learning_rate": 1.3849343845050768e-07, - "loss": 0.1327, + "epoch": 1.9162665913583732, + "grad_norm": 3.390737869258694, + "learning_rate": 6.098645618669763e-06, + "loss": 0.471, "step": 13571 }, { - "epoch": 2.8458796393373875, - "grad_norm": 3.965255898866399, - "learning_rate": 1.381182213927168e-07, - "loss": 0.0976, + "epoch": 1.916407794408359, + "grad_norm": 2.620801538705652, + "learning_rate": 6.097242012542683e-06, + "loss": 0.4171, "step": 13572 }, { - "epoch": 2.8460893269029146, - "grad_norm": 3.839500547731919, - "learning_rate": 1.3774350977347251e-07, - "loss": 0.1045, + "epoch": 1.916548997458345, + "grad_norm": 3.4813054064006717, + "learning_rate": 6.095838497114017e-06, + "loss": 0.5203, "step": 13573 }, { - "epoch": 2.846299014468442, - "grad_norm": 5.968003001890681, - "learning_rate": 1.3736930361197832e-07, - "loss": 0.1571, + "epoch": 1.9166902005083308, + "grad_norm": 4.227271443361867, + "learning_rate": 6.094435072416379e-06, + "loss": 0.7635, "step": 13574 }, { - "epoch": 2.8465087020339697, - "grad_norm": 3.3105197807626765, - "learning_rate": 1.3699560292741442e-07, - "loss": 0.0998, + "epoch": 1.9168314035583167, + "grad_norm": 3.2913629658695016, + "learning_rate": 6.093031738482382e-06, + "loss": 0.4179, "step": 13575 }, { - "epoch": 2.8467183895994967, - "grad_norm": 4.267024282311256, - "learning_rate": 1.366224077389311e-07, - "loss": 0.1403, + "epoch": 1.9169726066083026, + "grad_norm": 3.810538845406756, + "learning_rate": 6.091628495344639e-06, + "loss": 0.5746, "step": 13576 }, { - "epoch": 2.8469280771650243, - "grad_norm": 4.823953508506322, - "learning_rate": 1.3624971806565633e-07, - "loss": 0.1453, + "epoch": 1.9171138096582885, + "grad_norm": 3.0652061665347605, + "learning_rate": 6.090225343035761e-06, + "loss": 0.4449, "step": 13577 }, { - "epoch": 2.8471377647305514, - "grad_norm": 4.29776446033827, - "learning_rate": 1.3587753392669157e-07, - "loss": 0.1312, + "epoch": 1.9172550127082744, + "grad_norm": 3.3391346481787543, + "learning_rate": 6.088822281588359e-06, + "loss": 0.4395, "step": 13578 }, { - "epoch": 2.847347452296079, - "grad_norm": 4.534087073953428, - "learning_rate": 1.355058553411126e-07, - "loss": 0.176, + "epoch": 1.9173962157582602, + "grad_norm": 3.478485502970465, + "learning_rate": 6.087419311035037e-06, + "loss": 0.4929, "step": 13579 }, { - "epoch": 2.847557139861606, - "grad_norm": 7.797080729346616, - "learning_rate": 1.3513468232796757e-07, - "loss": 0.1706, + "epoch": 1.9175374188082461, + "grad_norm": 3.0885636705685817, + "learning_rate": 6.0860164314084e-06, + "loss": 0.4982, "step": 13580 }, { - "epoch": 2.8477668274271335, - "grad_norm": 3.9273316841090007, - "learning_rate": 1.3476401490627898e-07, - "loss": 0.1291, + "epoch": 1.917678621858232, + "grad_norm": 3.181171998915513, + "learning_rate": 6.084613642741053e-06, + "loss": 0.4996, "step": 13581 }, { - "epoch": 2.847976514992661, - "grad_norm": 4.446979082184453, - "learning_rate": 1.3439385309504504e-07, - "loss": 0.1889, + "epoch": 1.917819824908218, + "grad_norm": 3.461859649805699, + "learning_rate": 6.083210945065595e-06, + "loss": 0.5333, "step": 13582 }, { - "epoch": 2.848186202558188, - "grad_norm": 4.727092161727425, - "learning_rate": 1.3402419691323832e-07, - "loss": 0.12, + "epoch": 1.9179610279582038, + "grad_norm": 3.8161195976709013, + "learning_rate": 6.081808338414622e-06, + "loss": 0.704, "step": 13583 }, { - "epoch": 2.8483958901237156, - "grad_norm": 4.305504846235536, - "learning_rate": 1.3365504637980142e-07, - "loss": 0.1261, + "epoch": 1.9181022310081897, + "grad_norm": 3.6137685291659687, + "learning_rate": 6.08040582282073e-06, + "loss": 0.4822, "step": 13584 }, { - "epoch": 2.848605577689243, - "grad_norm": 4.92008365737643, - "learning_rate": 1.332864015136559e-07, - "loss": 0.1519, + "epoch": 1.9182434340581755, + "grad_norm": 4.619329460674928, + "learning_rate": 6.079003398316513e-06, + "loss": 0.7857, "step": 13585 }, { - "epoch": 2.8488152652547702, - "grad_norm": 3.7348946808043566, - "learning_rate": 1.3291826233369665e-07, - "loss": 0.1283, + "epoch": 1.9183846371081614, + "grad_norm": 3.4720373716595976, + "learning_rate": 6.077601064934565e-06, + "loss": 0.5376, "step": 13586 }, { - "epoch": 2.8490249528202978, - "grad_norm": 3.54063830124864, - "learning_rate": 1.3255062885878967e-07, - "loss": 0.1102, + "epoch": 1.9185258401581473, + "grad_norm": 2.962221244630581, + "learning_rate": 6.0761988227074755e-06, + "loss": 0.4817, "step": 13587 }, { - "epoch": 2.8492346403858253, - "grad_norm": 5.733590426744869, - "learning_rate": 1.3218350110777656e-07, - "loss": 0.1542, + "epoch": 1.9186670432081332, + "grad_norm": 3.759782719783537, + "learning_rate": 6.074796671667829e-06, + "loss": 0.5656, "step": 13588 }, { - "epoch": 2.8494443279513524, - "grad_norm": 3.999563133469065, - "learning_rate": 1.3181687909947338e-07, - "loss": 0.1402, + "epoch": 1.918808246258119, + "grad_norm": 2.834152426238622, + "learning_rate": 6.073394611848211e-06, + "loss": 0.4787, "step": 13589 }, { - "epoch": 2.84965401551688, - "grad_norm": 4.774945477740314, - "learning_rate": 1.3145076285267177e-07, - "loss": 0.1453, + "epoch": 1.918949449308105, + "grad_norm": 3.5890071399747185, + "learning_rate": 6.071992643281209e-06, + "loss": 0.5092, "step": 13590 }, { - "epoch": 2.8498637030824074, - "grad_norm": 3.3548180552347384, - "learning_rate": 1.310851523861345e-07, - "loss": 0.1035, + "epoch": 1.9190906523580908, + "grad_norm": 3.076592295672931, + "learning_rate": 6.070590765999404e-06, + "loss": 0.4684, "step": 13591 }, { - "epoch": 2.8500733906479345, - "grad_norm": 4.133150867012464, - "learning_rate": 1.3072004771859992e-07, - "loss": 0.1038, + "epoch": 1.9192318554080767, + "grad_norm": 3.391661835505643, + "learning_rate": 6.06918898003537e-06, + "loss": 0.5366, "step": 13592 }, { - "epoch": 2.850283078213462, - "grad_norm": 4.094446470054902, - "learning_rate": 1.303554488687797e-07, - "loss": 0.1518, + "epoch": 1.9193730584580626, + "grad_norm": 3.389310149671312, + "learning_rate": 6.067787285421687e-06, + "loss": 0.5088, "step": 13593 }, { - "epoch": 2.8504927657789896, - "grad_norm": 3.290340668771646, - "learning_rate": 1.2999135585536006e-07, - "loss": 0.107, + "epoch": 1.9195142615080485, + "grad_norm": 3.5710960671210703, + "learning_rate": 6.066385682190928e-06, + "loss": 0.5519, "step": 13594 }, { - "epoch": 2.8507024533445167, - "grad_norm": 4.553580807567586, - "learning_rate": 1.2962776869700378e-07, - "loss": 0.1287, + "epoch": 1.9196554645580344, + "grad_norm": 3.712448714946225, + "learning_rate": 6.064984170375667e-06, + "loss": 0.5592, "step": 13595 }, { - "epoch": 2.850912140910044, - "grad_norm": 4.028074819887243, - "learning_rate": 1.2926468741234378e-07, - "loss": 0.1574, + "epoch": 1.9197966676080203, + "grad_norm": 4.762380080558613, + "learning_rate": 6.063582750008474e-06, + "loss": 0.6387, "step": 13596 }, { - "epoch": 2.8511218284755713, - "grad_norm": 3.457077367186731, - "learning_rate": 1.2890211201998736e-07, - "loss": 0.1066, + "epoch": 1.9199378706580061, + "grad_norm": 3.6402732569856093, + "learning_rate": 6.062181421121917e-06, + "loss": 0.5151, "step": 13597 }, { - "epoch": 2.851331516041099, - "grad_norm": 5.371522879016928, - "learning_rate": 1.285400425385186e-07, - "loss": 0.1499, + "epoch": 1.920079073707992, + "grad_norm": 3.6937359442486914, + "learning_rate": 6.0607801837485665e-06, + "loss": 0.5459, "step": 13598 }, { - "epoch": 2.851541203606626, - "grad_norm": 4.239696298104947, - "learning_rate": 1.2817847898649482e-07, - "loss": 0.1297, + "epoch": 1.920220276757978, + "grad_norm": 2.747885758529308, + "learning_rate": 6.059379037920985e-06, + "loss": 0.3568, "step": 13599 }, { - "epoch": 2.8517508911721534, - "grad_norm": 3.9951595769445616, - "learning_rate": 1.2781742138244345e-07, - "loss": 0.1426, + "epoch": 1.9203614798079638, + "grad_norm": 3.119520243051187, + "learning_rate": 6.05797798367173e-06, + "loss": 0.4758, "step": 13600 }, { - "epoch": 2.851960578737681, - "grad_norm": 4.466207240802085, - "learning_rate": 1.2745686974487413e-07, - "loss": 0.1571, + "epoch": 1.9205026828579497, + "grad_norm": 3.708726591510971, + "learning_rate": 6.056577021033364e-06, + "loss": 0.6379, "step": 13601 }, { - "epoch": 2.852170266303208, - "grad_norm": 4.169059567774628, - "learning_rate": 1.2709682409226099e-07, - "loss": 0.1251, + "epoch": 1.9206438859079356, + "grad_norm": 3.412951956597504, + "learning_rate": 6.055176150038445e-06, + "loss": 0.4454, "step": 13602 }, { - "epoch": 2.8523799538687356, - "grad_norm": 5.604564169424631, - "learning_rate": 1.2673728444306144e-07, - "loss": 0.1819, + "epoch": 1.9207850889579214, + "grad_norm": 4.210896875109653, + "learning_rate": 6.053775370719527e-06, + "loss": 0.634, "step": 13603 }, { - "epoch": 2.852589641434263, - "grad_norm": 4.982648487967696, - "learning_rate": 1.2637825081569855e-07, - "loss": 0.1782, + "epoch": 1.9209262920079073, + "grad_norm": 3.7734343576831892, + "learning_rate": 6.052374683109168e-06, + "loss": 0.5846, "step": 13604 }, { - "epoch": 2.85279932899979, - "grad_norm": 3.7567597521064466, - "learning_rate": 1.260197232285765e-07, - "loss": 0.099, + "epoch": 1.9210674950578932, + "grad_norm": 3.5785825821963813, + "learning_rate": 6.050974087239914e-06, + "loss": 0.5085, "step": 13605 }, { - "epoch": 2.8530090165653177, - "grad_norm": 5.773398639899188, - "learning_rate": 1.2566170170006831e-07, - "loss": 0.1778, + "epoch": 1.921208698107879, + "grad_norm": 3.1508824152331707, + "learning_rate": 6.049573583144317e-06, + "loss": 0.5047, "step": 13606 }, { - "epoch": 2.853218704130845, - "grad_norm": 3.6153543505579013, - "learning_rate": 1.2530418624852493e-07, - "loss": 0.1174, + "epoch": 1.921349901157865, + "grad_norm": 3.894142587765325, + "learning_rate": 6.04817317085493e-06, + "loss": 0.5758, "step": 13607 }, { - "epoch": 2.8534283916963723, - "grad_norm": 3.0286179382909086, - "learning_rate": 1.2494717689226942e-07, - "loss": 0.0961, + "epoch": 1.9214911042078509, + "grad_norm": 4.0886094707083345, + "learning_rate": 6.0467728504042844e-06, + "loss": 0.6074, "step": 13608 }, { - "epoch": 2.8536380792619, - "grad_norm": 3.7187505126499976, - "learning_rate": 1.245906736495972e-07, - "loss": 0.1099, + "epoch": 1.9216323072578367, + "grad_norm": 3.0423790584947112, + "learning_rate": 6.045372621824933e-06, + "loss": 0.4421, "step": 13609 }, { - "epoch": 2.8538477668274274, - "grad_norm": 3.848745846292349, - "learning_rate": 1.242346765387803e-07, - "loss": 0.1015, + "epoch": 1.9217735103078226, + "grad_norm": 3.2756103373293435, + "learning_rate": 6.043972485149414e-06, + "loss": 0.4782, "step": 13610 }, { - "epoch": 2.8540574543929544, - "grad_norm": 5.346604607808305, - "learning_rate": 1.2387918557806632e-07, - "loss": 0.1211, + "epoch": 1.9219147133578085, + "grad_norm": 3.4497795253288377, + "learning_rate": 6.042572440410263e-06, + "loss": 0.4279, "step": 13611 }, { - "epoch": 2.854267141958482, - "grad_norm": 4.931097679466003, - "learning_rate": 1.2352420078567295e-07, - "loss": 0.1491, + "epoch": 1.9220559164077944, + "grad_norm": 4.1314523430301415, + "learning_rate": 6.041172487640024e-06, + "loss": 0.6322, "step": 13612 }, { - "epoch": 2.8544768295240095, - "grad_norm": 4.234824569570955, - "learning_rate": 1.231697221797945e-07, - "loss": 0.118, + "epoch": 1.9221971194577803, + "grad_norm": 3.8334415761867664, + "learning_rate": 6.039772626871226e-06, + "loss": 0.5951, "step": 13613 }, { - "epoch": 2.8546865170895366, - "grad_norm": 4.376378728016561, - "learning_rate": 1.2281574977859755e-07, - "loss": 0.1345, + "epoch": 1.9223383225077662, + "grad_norm": 3.238188820751821, + "learning_rate": 6.038372858136401e-06, + "loss": 0.4152, "step": 13614 }, { - "epoch": 2.854896204655064, - "grad_norm": 5.14643806422206, - "learning_rate": 1.2246228360022538e-07, - "loss": 0.1657, + "epoch": 1.922479525557752, + "grad_norm": 3.725332506953525, + "learning_rate": 6.036973181468078e-06, + "loss": 0.6329, "step": 13615 }, { - "epoch": 2.855105892220591, - "grad_norm": 4.027745349310622, - "learning_rate": 1.2210932366279238e-07, - "loss": 0.1296, + "epoch": 1.922620728607738, + "grad_norm": 3.552989396765581, + "learning_rate": 6.035573596898789e-06, + "loss": 0.5787, "step": 13616 }, { - "epoch": 2.8553155797861187, - "grad_norm": 4.428884014545528, - "learning_rate": 1.2175686998438963e-07, - "loss": 0.1324, + "epoch": 1.9227619316577238, + "grad_norm": 3.8514277539827946, + "learning_rate": 6.034174104461058e-06, + "loss": 0.5596, "step": 13617 }, { - "epoch": 2.855525267351646, - "grad_norm": 3.2877188365182843, - "learning_rate": 1.2140492258307824e-07, - "loss": 0.1233, + "epoch": 1.9229031347077097, + "grad_norm": 4.189230346480115, + "learning_rate": 6.0327747041874075e-06, + "loss": 0.5708, "step": 13618 }, { - "epoch": 2.8557349549171733, - "grad_norm": 4.282642228408265, - "learning_rate": 1.2105348147689934e-07, - "loss": 0.1451, + "epoch": 1.9230443377576956, + "grad_norm": 3.949472483144072, + "learning_rate": 6.03137539611036e-06, + "loss": 0.601, "step": 13619 }, { - "epoch": 2.855944642482701, - "grad_norm": 4.714871118872217, - "learning_rate": 1.2070254668386517e-07, - "loss": 0.1614, + "epoch": 1.9231855408076814, + "grad_norm": 2.592700351334432, + "learning_rate": 6.029976180262431e-06, + "loss": 0.3689, "step": 13620 }, { - "epoch": 2.856154330048228, - "grad_norm": 4.1193856952778996, - "learning_rate": 1.2035211822195802e-07, - "loss": 0.1295, + "epoch": 1.9233267438576673, + "grad_norm": 3.3486171412005348, + "learning_rate": 6.028577056676144e-06, + "loss": 0.5323, "step": 13621 }, { - "epoch": 2.8563640176137555, - "grad_norm": 3.142993769562701, - "learning_rate": 1.200021961091402e-07, - "loss": 0.0862, + "epoch": 1.9234679469076532, + "grad_norm": 3.507860674298758, + "learning_rate": 6.027178025384011e-06, + "loss": 0.5221, "step": 13622 }, { - "epoch": 2.856573705179283, - "grad_norm": 4.5550732570915295, - "learning_rate": 1.1965278036334737e-07, - "loss": 0.1141, + "epoch": 1.923609149957639, + "grad_norm": 3.713206626805462, + "learning_rate": 6.025779086418542e-06, + "loss": 0.4849, "step": 13623 }, { - "epoch": 2.85678339274481, - "grad_norm": 4.487123890452544, - "learning_rate": 1.193038710024852e-07, - "loss": 0.1231, + "epoch": 1.923750353007625, + "grad_norm": 4.59671376097087, + "learning_rate": 6.024380239812258e-06, + "loss": 0.6278, "step": 13624 }, { - "epoch": 2.8569930803103376, - "grad_norm": 4.608936735912965, - "learning_rate": 1.1895546804443713e-07, - "loss": 0.1337, + "epoch": 1.9238915560576109, + "grad_norm": 3.592267677505597, + "learning_rate": 6.022981485597654e-06, + "loss": 0.5405, "step": 13625 }, { - "epoch": 2.857202767875865, - "grad_norm": 4.131497070113812, - "learning_rate": 1.1860757150705893e-07, - "loss": 0.1281, + "epoch": 1.9240327591075967, + "grad_norm": 2.902778541893034, + "learning_rate": 6.0215828238072446e-06, + "loss": 0.4044, "step": 13626 }, { - "epoch": 2.857412455441392, - "grad_norm": 4.512176705212993, - "learning_rate": 1.1826018140818074e-07, - "loss": 0.1844, + "epoch": 1.9241739621575826, + "grad_norm": 4.08972368951894, + "learning_rate": 6.020184254473532e-06, + "loss": 0.6653, "step": 13627 }, { - "epoch": 2.8576221430069197, - "grad_norm": 3.952044341528827, - "learning_rate": 1.1791329776560834e-07, - "loss": 0.1415, + "epoch": 1.9243151652075685, + "grad_norm": 3.0409383342439167, + "learning_rate": 6.018785777629017e-06, + "loss": 0.4693, "step": 13628 }, { - "epoch": 2.8578318305724473, - "grad_norm": 4.370192464467855, - "learning_rate": 1.175669205971186e-07, - "loss": 0.1167, + "epoch": 1.9244563682575544, + "grad_norm": 3.3176786739853785, + "learning_rate": 6.017387393306203e-06, + "loss": 0.3918, "step": 13629 }, { - "epoch": 2.8580415181379744, - "grad_norm": 4.6871348205020045, - "learning_rate": 1.1722104992046401e-07, - "loss": 0.1291, + "epoch": 1.9245975713075403, + "grad_norm": 3.303659159818569, + "learning_rate": 6.015989101537586e-06, + "loss": 0.4461, "step": 13630 }, { - "epoch": 2.858251205703502, - "grad_norm": 3.950759548623538, - "learning_rate": 1.1687568575337149e-07, - "loss": 0.1158, + "epoch": 1.9247387743575262, + "grad_norm": 3.5229710767531186, + "learning_rate": 6.014590902355662e-06, + "loss": 0.6161, "step": 13631 }, { - "epoch": 2.8584608932690294, - "grad_norm": 3.9022129223537605, - "learning_rate": 1.1653082811354132e-07, - "loss": 0.1351, + "epoch": 1.924879977407512, + "grad_norm": 3.182239001361612, + "learning_rate": 6.013192795792922e-06, + "loss": 0.4661, "step": 13632 }, { - "epoch": 2.8586705808345565, - "grad_norm": 4.150530887589807, - "learning_rate": 1.1618647701864938e-07, - "loss": 0.1423, + "epoch": 1.925021180457498, + "grad_norm": 3.3677548915421176, + "learning_rate": 6.011794781881866e-06, + "loss": 0.5341, "step": 13633 }, { - "epoch": 2.858880268400084, - "grad_norm": 3.8147957289538628, - "learning_rate": 1.1584263248634153e-07, - "loss": 0.1223, + "epoch": 1.9251623835074838, + "grad_norm": 2.917421332280244, + "learning_rate": 6.01039686065497e-06, + "loss": 0.4037, "step": 13634 }, { - "epoch": 2.859089955965611, - "grad_norm": 4.744537730792014, - "learning_rate": 1.1549929453424258e-07, - "loss": 0.1795, + "epoch": 1.9253035865574697, + "grad_norm": 3.124174648607816, + "learning_rate": 6.008999032144731e-06, + "loss": 0.4526, "step": 13635 }, { - "epoch": 2.8592996435311386, - "grad_norm": 3.6599870359446127, - "learning_rate": 1.1515646317994844e-07, - "loss": 0.1182, + "epoch": 1.9254447896074556, + "grad_norm": 4.548034251473878, + "learning_rate": 6.007601296383629e-06, + "loss": 0.6609, "step": 13636 }, { - "epoch": 2.8595093310966657, - "grad_norm": 4.4819778434380195, - "learning_rate": 1.1481413844103062e-07, - "loss": 0.1361, + "epoch": 1.9255859926574415, + "grad_norm": 4.525794141436628, + "learning_rate": 6.006203653404149e-06, + "loss": 0.7639, "step": 13637 }, { - "epoch": 2.8597190186621932, - "grad_norm": 4.825349831900849, - "learning_rate": 1.1447232033503286e-07, - "loss": 0.1793, + "epoch": 1.9257271957074273, + "grad_norm": 3.4029999943974265, + "learning_rate": 6.004806103238771e-06, + "loss": 0.5091, "step": 13638 }, { - "epoch": 2.8599287062277208, - "grad_norm": 5.164136180734384, - "learning_rate": 1.1413100887947448e-07, - "loss": 0.1636, + "epoch": 1.9258683987574132, + "grad_norm": 3.8394010238866545, + "learning_rate": 6.003408645919977e-06, + "loss": 0.6227, "step": 13639 }, { - "epoch": 2.860138393793248, - "grad_norm": 5.2535938873817996, - "learning_rate": 1.1379020409184704e-07, - "loss": 0.1819, + "epoch": 1.926009601807399, + "grad_norm": 3.06487560436677, + "learning_rate": 6.0020112814802355e-06, + "loss": 0.5039, "step": 13640 }, { - "epoch": 2.8603480813587754, - "grad_norm": 3.2605737583106236, - "learning_rate": 1.1344990598961992e-07, - "loss": 0.1109, + "epoch": 1.926150804857385, + "grad_norm": 3.9914153516355944, + "learning_rate": 6.000614009952033e-06, + "loss": 0.7057, "step": 13641 }, { - "epoch": 2.860557768924303, - "grad_norm": 5.3740688320670955, - "learning_rate": 1.1311011459023135e-07, - "loss": 0.1797, + "epoch": 1.9262920079073709, + "grad_norm": 3.0072049376254864, + "learning_rate": 5.9992168313678264e-06, + "loss": 0.4178, "step": 13642 }, { - "epoch": 2.86076745648983, - "grad_norm": 4.577988669941811, - "learning_rate": 1.1277082991109745e-07, - "loss": 0.1261, + "epoch": 1.9264332109573568, + "grad_norm": 3.7759269801323927, + "learning_rate": 5.997819745760094e-06, + "loss": 0.5494, "step": 13643 }, { - "epoch": 2.8609771440553575, - "grad_norm": 3.1814478824021264, - "learning_rate": 1.124320519696076e-07, - "loss": 0.095, + "epoch": 1.9265744140073426, + "grad_norm": 4.100227751364717, + "learning_rate": 5.996422753161304e-06, + "loss": 0.6016, "step": 13644 }, { - "epoch": 2.861186831620885, - "grad_norm": 5.147039954673129, - "learning_rate": 1.120937807831235e-07, - "loss": 0.1086, + "epoch": 1.9267156170573285, + "grad_norm": 2.9960460284968504, + "learning_rate": 5.99502585360392e-06, + "loss": 0.4263, "step": 13645 }, { - "epoch": 2.861396519186412, - "grad_norm": 5.340453266931193, - "learning_rate": 1.1175601636898348e-07, - "loss": 0.1562, + "epoch": 1.9268568201073144, + "grad_norm": 2.7244867516768427, + "learning_rate": 5.993629047120403e-06, + "loss": 0.4465, "step": 13646 }, { - "epoch": 2.8616062067519397, - "grad_norm": 3.6952859300944545, - "learning_rate": 1.1141875874449815e-07, - "loss": 0.1158, + "epoch": 1.9269980231573003, + "grad_norm": 4.183471642996471, + "learning_rate": 5.992232333743218e-06, + "loss": 0.5688, "step": 13647 }, { - "epoch": 2.861815894317467, - "grad_norm": 5.301684438619656, - "learning_rate": 1.1108200792695035e-07, - "loss": 0.1422, + "epoch": 1.9271392262072862, + "grad_norm": 3.439135611152873, + "learning_rate": 5.990835713504825e-06, + "loss": 0.517, "step": 13648 }, { - "epoch": 2.8620255818829943, - "grad_norm": 4.8367563037495795, - "learning_rate": 1.1074576393360292e-07, - "loss": 0.1494, + "epoch": 1.927280429257272, + "grad_norm": 3.911613043925635, + "learning_rate": 5.989439186437674e-06, + "loss": 0.5166, "step": 13649 }, { - "epoch": 2.862235269448522, - "grad_norm": 4.364164084450006, - "learning_rate": 1.1041002678168766e-07, - "loss": 0.1626, + "epoch": 1.927421632307258, + "grad_norm": 3.7405548788496303, + "learning_rate": 5.988042752574233e-06, + "loss": 0.492, "step": 13650 }, { - "epoch": 2.8624449570140493, - "grad_norm": 3.877871500441854, - "learning_rate": 1.1007479648840969e-07, - "loss": 0.1289, + "epoch": 1.9275628353572438, + "grad_norm": 2.9889449688637675, + "learning_rate": 5.9866464119469366e-06, + "loss": 0.4359, "step": 13651 }, { - "epoch": 2.8626546445795764, - "grad_norm": 3.969067891692186, - "learning_rate": 1.0974007307095192e-07, - "loss": 0.1265, + "epoch": 1.9277040384072297, + "grad_norm": 3.827210697257077, + "learning_rate": 5.9852501645882485e-06, + "loss": 0.5872, "step": 13652 }, { - "epoch": 2.862864332145104, - "grad_norm": 4.057390512293068, - "learning_rate": 1.0940585654646951e-07, - "loss": 0.1592, + "epoch": 1.9278452414572156, + "grad_norm": 3.3200150703833264, + "learning_rate": 5.983854010530612e-06, + "loss": 0.4655, "step": 13653 }, { - "epoch": 2.863074019710631, - "grad_norm": 3.7477973147476034, - "learning_rate": 1.09072146932091e-07, - "loss": 0.1117, + "epoch": 1.9279864445072015, + "grad_norm": 3.3511122022803232, + "learning_rate": 5.9824579498064725e-06, + "loss": 0.5472, "step": 13654 }, { - "epoch": 2.8632837072761586, - "grad_norm": 3.2109220481475242, - "learning_rate": 1.0873894424492049e-07, - "loss": 0.1131, + "epoch": 1.9281276475571874, + "grad_norm": 3.1094790300202604, + "learning_rate": 5.9810619824482775e-06, + "loss": 0.4448, "step": 13655 }, { - "epoch": 2.8634933948416856, - "grad_norm": 3.4511645709102434, - "learning_rate": 1.084062485020343e-07, - "loss": 0.1062, + "epoch": 1.9282688506071732, + "grad_norm": 3.367906720988765, + "learning_rate": 5.979666108488464e-06, + "loss": 0.5373, "step": 13656 }, { - "epoch": 2.863703082407213, - "grad_norm": 4.179331793199333, - "learning_rate": 1.0807405972048324e-07, - "loss": 0.1392, + "epoch": 1.9284100536571591, + "grad_norm": 3.347579850286576, + "learning_rate": 5.978270327959477e-06, + "loss": 0.4802, "step": 13657 }, { - "epoch": 2.8639127699727407, - "grad_norm": 4.738768710333367, - "learning_rate": 1.0774237791729481e-07, - "loss": 0.182, + "epoch": 1.928551256707145, + "grad_norm": 3.4698172935243554, + "learning_rate": 5.976874640893751e-06, + "loss": 0.579, "step": 13658 }, { - "epoch": 2.8641224575382678, - "grad_norm": 3.8175376001693713, - "learning_rate": 1.074112031094654e-07, - "loss": 0.113, + "epoch": 1.9286924597571309, + "grad_norm": 3.7598719262008236, + "learning_rate": 5.975479047323718e-06, + "loss": 0.6637, "step": 13659 }, { - "epoch": 2.8643321451037953, - "grad_norm": 4.626430669270818, - "learning_rate": 1.0708053531397034e-07, - "loss": 0.1467, + "epoch": 1.9288336628071168, + "grad_norm": 3.5544129080957063, + "learning_rate": 5.9740835472818145e-06, + "loss": 0.5076, "step": 13660 }, { - "epoch": 2.864541832669323, - "grad_norm": 5.131946922938686, - "learning_rate": 1.0675037454775606e-07, - "loss": 0.149, + "epoch": 1.9289748658571026, + "grad_norm": 3.9797898829366027, + "learning_rate": 5.972688140800471e-06, + "loss": 0.5499, "step": 13661 }, { - "epoch": 2.86475152023485, - "grad_norm": 5.36406272708449, - "learning_rate": 1.0642072082774458e-07, - "loss": 0.1221, + "epoch": 1.9291160689070885, + "grad_norm": 3.4268237200552174, + "learning_rate": 5.971292827912117e-06, + "loss": 0.4516, "step": 13662 }, { - "epoch": 2.8649612078003774, - "grad_norm": 3.4170416277557294, - "learning_rate": 1.0609157417083016e-07, - "loss": 0.1027, + "epoch": 1.9292572719570744, + "grad_norm": 3.6547781808334783, + "learning_rate": 5.9698976086491745e-06, + "loss": 0.5811, "step": 13663 }, { - "epoch": 2.865170895365905, - "grad_norm": 3.38670810976359, - "learning_rate": 1.0576293459388265e-07, - "loss": 0.0981, + "epoch": 1.9293984750070603, + "grad_norm": 3.298319348305385, + "learning_rate": 5.968502483044074e-06, + "loss": 0.4992, "step": 13664 }, { - "epoch": 2.865380582931432, - "grad_norm": 3.7759771735066, - "learning_rate": 1.0543480211374524e-07, - "loss": 0.12, + "epoch": 1.9295396780570462, + "grad_norm": 3.6373737522311647, + "learning_rate": 5.9671074511292335e-06, + "loss": 0.5531, "step": 13665 }, { - "epoch": 2.8655902704969596, - "grad_norm": 3.614647847213322, - "learning_rate": 1.0510717674723558e-07, - "loss": 0.1175, + "epoch": 1.929680881107032, + "grad_norm": 3.837363381971307, + "learning_rate": 5.965712512937074e-06, + "loss": 0.6243, "step": 13666 }, { - "epoch": 2.865799958062487, - "grad_norm": 4.336401271825973, - "learning_rate": 1.047800585111447e-07, - "loss": 0.1509, + "epoch": 1.929822084157018, + "grad_norm": 3.6588827163377275, + "learning_rate": 5.964317668500017e-06, + "loss": 0.5374, "step": 13667 }, { - "epoch": 2.866009645628014, - "grad_norm": 5.227772158608982, - "learning_rate": 1.0445344742223917e-07, - "loss": 0.1494, + "epoch": 1.9299632872070038, + "grad_norm": 2.9205619085357166, + "learning_rate": 5.962922917850471e-06, + "loss": 0.4447, "step": 13668 }, { - "epoch": 2.8662193331935417, - "grad_norm": 4.68598737073222, - "learning_rate": 1.0412734349725673e-07, - "loss": 0.1401, + "epoch": 1.9301044902569897, + "grad_norm": 3.725526912178215, + "learning_rate": 5.9615282610208535e-06, + "loss": 0.4864, "step": 13669 }, { - "epoch": 2.8664290207590692, - "grad_norm": 5.020608384899008, - "learning_rate": 1.0380174675291177e-07, - "loss": 0.1454, + "epoch": 1.9302456933069756, + "grad_norm": 3.603580866145728, + "learning_rate": 5.9601336980435756e-06, + "loss": 0.5965, "step": 13670 }, { - "epoch": 2.8666387083245963, - "grad_norm": 3.462378392602713, - "learning_rate": 1.0347665720589095e-07, - "loss": 0.1056, + "epoch": 1.9303868963569613, + "grad_norm": 3.4290863819229553, + "learning_rate": 5.958739228951043e-06, + "loss": 0.4589, "step": 13671 }, { - "epoch": 2.866848395890124, - "grad_norm": 3.6725823120002308, - "learning_rate": 1.0315207487285651e-07, - "loss": 0.1149, + "epoch": 1.9305280994069471, + "grad_norm": 3.9752371050789406, + "learning_rate": 5.957344853775668e-06, + "loss": 0.5655, "step": 13672 }, { - "epoch": 2.867058083455651, - "grad_norm": 4.507916037553875, - "learning_rate": 1.0282799977044289e-07, - "loss": 0.1566, + "epoch": 1.930669302456933, + "grad_norm": 3.665325945554052, + "learning_rate": 5.955950572549853e-06, + "loss": 0.4454, "step": 13673 }, { - "epoch": 2.8672677710211785, - "grad_norm": 5.161568807846467, - "learning_rate": 1.0250443191526016e-07, - "loss": 0.1412, + "epoch": 1.930810505506919, + "grad_norm": 3.5086278690845893, + "learning_rate": 5.954556385306001e-06, + "loss": 0.5546, "step": 13674 }, { - "epoch": 2.8674774585867056, - "grad_norm": 4.41848591076201, - "learning_rate": 1.0218137132389283e-07, - "loss": 0.1496, + "epoch": 1.9309517085569048, + "grad_norm": 3.2635311744751765, + "learning_rate": 5.953162292076512e-06, + "loss": 0.4707, "step": 13675 }, { - "epoch": 2.867687146152233, - "grad_norm": 7.149009844531486, - "learning_rate": 1.0185881801289543e-07, - "loss": 0.1928, + "epoch": 1.9310929116068907, + "grad_norm": 4.506963373349829, + "learning_rate": 5.9517682928937825e-06, + "loss": 0.66, "step": 13676 }, { - "epoch": 2.8678968337177606, - "grad_norm": 3.4195707639008748, - "learning_rate": 1.0153677199880141e-07, - "loss": 0.1018, + "epoch": 1.9312341146568766, + "grad_norm": 4.523539078707294, + "learning_rate": 5.950374387790207e-06, + "loss": 0.6848, "step": 13677 }, { - "epoch": 2.8681065212832877, - "grad_norm": 3.5456014790915598, - "learning_rate": 1.0121523329811756e-07, - "loss": 0.1182, + "epoch": 1.9313753177068624, + "grad_norm": 3.7986039775746328, + "learning_rate": 5.9489805767981845e-06, + "loss": 0.5167, "step": 13678 }, { - "epoch": 2.868316208848815, - "grad_norm": 3.9180992896039615, - "learning_rate": 1.0089420192731958e-07, - "loss": 0.1486, + "epoch": 1.9315165207568483, + "grad_norm": 3.428832680730377, + "learning_rate": 5.947586859950103e-06, + "loss": 0.4003, "step": 13679 }, { - "epoch": 2.8685258964143427, - "grad_norm": 4.356664955517061, - "learning_rate": 1.0057367790286321e-07, - "loss": 0.138, + "epoch": 1.9316577238068342, + "grad_norm": 3.972805162505941, + "learning_rate": 5.946193237278352e-06, + "loss": 0.6438, "step": 13680 }, { - "epoch": 2.86873558397987, - "grad_norm": 4.064940342487734, - "learning_rate": 1.0025366124117641e-07, - "loss": 0.1364, + "epoch": 1.93179892685682, + "grad_norm": 3.4645735752362086, + "learning_rate": 5.9447997088153205e-06, + "loss": 0.6047, "step": 13681 }, { - "epoch": 2.8689452715453974, - "grad_norm": 4.3236333787680525, - "learning_rate": 9.993415195865941e-08, - "loss": 0.1442, + "epoch": 1.931940129906806, + "grad_norm": 3.226147248193192, + "learning_rate": 5.9434062745933926e-06, + "loss": 0.5454, "step": 13682 }, { - "epoch": 2.869154959110925, - "grad_norm": 4.45191416256865, - "learning_rate": 9.961515007168687e-08, - "loss": 0.151, + "epoch": 1.9320813329567919, + "grad_norm": 2.8333540314768264, + "learning_rate": 5.942012934644949e-06, + "loss": 0.4576, "step": 13683 }, { - "epoch": 2.869364646676452, - "grad_norm": 3.294083362729306, - "learning_rate": 9.929665559661017e-08, - "loss": 0.0995, + "epoch": 1.9322225360067777, + "grad_norm": 3.8883253223838556, + "learning_rate": 5.9406196890023786e-06, + "loss": 0.5371, "step": 13684 }, { - "epoch": 2.8695743342419795, - "grad_norm": 4.2225052346037035, - "learning_rate": 9.89786685497518e-08, - "loss": 0.1357, + "epoch": 1.9323637390567636, + "grad_norm": 4.088583830091247, + "learning_rate": 5.9392265376980465e-06, + "loss": 0.6727, "step": 13685 }, { - "epoch": 2.869784021807507, - "grad_norm": 3.9305347718127757, - "learning_rate": 9.866118894740873e-08, - "loss": 0.1286, + "epoch": 1.9325049421067495, + "grad_norm": 3.815753034701038, + "learning_rate": 5.937833480764339e-06, + "loss": 0.5212, "step": 13686 }, { - "epoch": 2.869993709373034, - "grad_norm": 5.737158323653031, - "learning_rate": 9.834421680585349e-08, - "loss": 0.1399, + "epoch": 1.9326461451567354, + "grad_norm": 3.3486987726518684, + "learning_rate": 5.936440518233627e-06, + "loss": 0.5368, "step": 13687 }, { - "epoch": 2.8702033969385616, - "grad_norm": 4.756270010999447, - "learning_rate": 9.802775214132976e-08, - "loss": 0.1572, + "epoch": 1.9327873482067213, + "grad_norm": 3.126859204143963, + "learning_rate": 5.935047650138282e-06, + "loss": 0.4707, "step": 13688 }, { - "epoch": 2.870413084504089, - "grad_norm": 4.233421918282514, - "learning_rate": 9.77117949700579e-08, - "loss": 0.141, + "epoch": 1.9329285512567072, + "grad_norm": 3.327798063182012, + "learning_rate": 5.933654876510673e-06, + "loss": 0.4783, "step": 13689 }, { - "epoch": 2.8706227720696162, - "grad_norm": 4.952355570721876, - "learning_rate": 9.739634530823161e-08, - "loss": 0.1496, + "epoch": 1.933069754306693, + "grad_norm": 3.201636557914574, + "learning_rate": 5.93226219738317e-06, + "loss": 0.4515, "step": 13690 }, { - "epoch": 2.8708324596351438, - "grad_norm": 4.018547560313546, - "learning_rate": 9.708140317201798e-08, - "loss": 0.1279, + "epoch": 1.933210957356679, + "grad_norm": 3.754261697844495, + "learning_rate": 5.930869612788136e-06, + "loss": 0.5823, "step": 13691 }, { - "epoch": 2.871042147200671, - "grad_norm": 4.731739098056344, - "learning_rate": 9.676696857755741e-08, - "loss": 0.1572, + "epoch": 1.9333521604066648, + "grad_norm": 3.56281624782598, + "learning_rate": 5.929477122757938e-06, + "loss": 0.5222, "step": 13692 }, { - "epoch": 2.8712518347661984, - "grad_norm": 4.966048422968939, - "learning_rate": 9.645304154096702e-08, - "loss": 0.1618, + "epoch": 1.9334933634566507, + "grad_norm": 3.9402739103220457, + "learning_rate": 5.928084727324932e-06, + "loss": 0.6567, "step": 13693 }, { - "epoch": 2.8714615223317255, - "grad_norm": 5.2444004576972905, - "learning_rate": 9.613962207833394e-08, - "loss": 0.1775, + "epoch": 1.9336345665066366, + "grad_norm": 3.4920503233728337, + "learning_rate": 5.926692426521474e-06, + "loss": 0.4959, "step": 13694 }, { - "epoch": 2.871671209897253, - "grad_norm": 3.616721146755147, - "learning_rate": 9.582671020572421e-08, - "loss": 0.1033, + "epoch": 1.9337757695566224, + "grad_norm": 4.38278127963404, + "learning_rate": 5.92530022037993e-06, + "loss": 0.7585, "step": 13695 }, { - "epoch": 2.8718808974627805, - "grad_norm": 3.8408486694755655, - "learning_rate": 9.551430593917277e-08, - "loss": 0.104, + "epoch": 1.9339169726066083, + "grad_norm": 3.841475732843702, + "learning_rate": 5.923908108932648e-06, + "loss": 0.5602, "step": 13696 }, { - "epoch": 2.8720905850283076, - "grad_norm": 4.105991190431302, - "learning_rate": 9.520240929469127e-08, - "loss": 0.1328, + "epoch": 1.9340581756565942, + "grad_norm": 3.599058247707388, + "learning_rate": 5.9225160922119784e-06, + "loss": 0.5172, "step": 13697 }, { - "epoch": 2.872300272593835, - "grad_norm": 4.4469561334081, - "learning_rate": 9.489102028826469e-08, - "loss": 0.1413, + "epoch": 1.93419937870658, + "grad_norm": 3.77084930028437, + "learning_rate": 5.921124170250276e-06, + "loss": 0.5908, "step": 13698 }, { - "epoch": 2.8725099601593627, - "grad_norm": 4.618809256452602, - "learning_rate": 9.458013893585472e-08, - "loss": 0.1706, + "epoch": 1.934340581756566, + "grad_norm": 3.4877514279838095, + "learning_rate": 5.919732343079887e-06, + "loss": 0.5456, "step": 13699 }, { - "epoch": 2.8727196477248897, - "grad_norm": 3.7284777983080004, - "learning_rate": 9.426976525339082e-08, - "loss": 0.1176, + "epoch": 1.9344817848065519, + "grad_norm": 4.2002285181941685, + "learning_rate": 5.918340610733154e-06, + "loss": 0.6438, "step": 13700 }, { - "epoch": 2.8729293352904173, - "grad_norm": 3.3685397630278966, - "learning_rate": 9.39598992567825e-08, - "loss": 0.115, + "epoch": 1.9346229878565377, + "grad_norm": 4.108356153185096, + "learning_rate": 5.916948973242429e-06, + "loss": 0.6009, "step": 13701 }, { - "epoch": 2.873139022855945, - "grad_norm": 3.5640782762719314, - "learning_rate": 9.365054096191151e-08, - "loss": 0.1076, + "epoch": 1.9347641909065236, + "grad_norm": 4.16774114108925, + "learning_rate": 5.91555743064004e-06, + "loss": 0.6562, "step": 13702 }, { - "epoch": 2.873348710421472, - "grad_norm": 4.23863695515395, - "learning_rate": 9.33416903846307e-08, - "loss": 0.13, + "epoch": 1.9349053939565095, + "grad_norm": 3.645123953693673, + "learning_rate": 5.9141659829583345e-06, + "loss": 0.5664, "step": 13703 }, { - "epoch": 2.8735583979869994, - "grad_norm": 4.047976180621325, - "learning_rate": 9.303334754077075e-08, - "loss": 0.1148, + "epoch": 1.9350465970064954, + "grad_norm": 3.8031686917343843, + "learning_rate": 5.912774630229648e-06, + "loss": 0.6661, "step": 13704 }, { - "epoch": 2.873768085552527, - "grad_norm": 3.5901009762709357, - "learning_rate": 9.27255124461357e-08, - "loss": 0.1155, + "epoch": 1.935187800056481, + "grad_norm": 3.835673833354061, + "learning_rate": 5.911383372486312e-06, + "loss": 0.5729, "step": 13705 }, { - "epoch": 2.873977773118054, - "grad_norm": 4.966260848858032, - "learning_rate": 9.241818511649958e-08, - "loss": 0.1441, + "epoch": 1.935329003106467, + "grad_norm": 3.5932721949382684, + "learning_rate": 5.90999220976066e-06, + "loss": 0.5468, "step": 13706 }, { - "epoch": 2.8741874606835816, - "grad_norm": 3.886238328796598, - "learning_rate": 9.211136556761646e-08, - "loss": 0.1092, + "epoch": 1.9354702061564528, + "grad_norm": 4.118451596377352, + "learning_rate": 5.908601142085024e-06, + "loss": 0.6493, "step": 13707 }, { - "epoch": 2.874397148249109, - "grad_norm": 3.6683485106978684, - "learning_rate": 9.180505381521044e-08, - "loss": 0.1103, + "epoch": 1.9356114092064387, + "grad_norm": 3.5023250448517094, + "learning_rate": 5.9072101694917305e-06, + "loss": 0.5822, "step": 13708 }, { - "epoch": 2.874606835814636, - "grad_norm": 3.872272491107465, - "learning_rate": 9.149924987498004e-08, - "loss": 0.1304, + "epoch": 1.9357526122564246, + "grad_norm": 3.110987525553427, + "learning_rate": 5.905819292013107e-06, + "loss": 0.5146, "step": 13709 }, { - "epoch": 2.8748165233801637, - "grad_norm": 3.8082056448113555, - "learning_rate": 9.119395376259832e-08, - "loss": 0.1475, + "epoch": 1.9358938153064105, + "grad_norm": 3.7760083290904625, + "learning_rate": 5.904428509681473e-06, + "loss": 0.7114, "step": 13710 }, { - "epoch": 2.8750262109456908, - "grad_norm": 4.7427498974542095, - "learning_rate": 9.088916549371163e-08, - "loss": 0.1498, + "epoch": 1.9360350183563964, + "grad_norm": 2.9679014256788134, + "learning_rate": 5.903037822529151e-06, + "loss": 0.4703, "step": 13711 }, { - "epoch": 2.8752358985112183, - "grad_norm": 4.803805907711026, - "learning_rate": 9.058488508394304e-08, - "loss": 0.1642, + "epoch": 1.9361762214063822, + "grad_norm": 3.5198847816694525, + "learning_rate": 5.901647230588461e-06, + "loss": 0.591, "step": 13712 }, { - "epoch": 2.8754455860767454, - "grad_norm": 5.149907046021405, - "learning_rate": 9.02811125488856e-08, - "loss": 0.1628, + "epoch": 1.9363174244563681, + "grad_norm": 3.4707906704060014, + "learning_rate": 5.9002567338917204e-06, + "loss": 0.4225, "step": 13713 }, { - "epoch": 2.875655273642273, - "grad_norm": 4.874141858966118, - "learning_rate": 8.997784790410913e-08, - "loss": 0.1387, + "epoch": 1.936458627506354, + "grad_norm": 3.915006052111267, + "learning_rate": 5.898866332471241e-06, + "loss": 0.55, "step": 13714 }, { - "epoch": 2.8758649612078004, - "grad_norm": 4.594539811031173, - "learning_rate": 8.96750911651545e-08, - "loss": 0.1265, + "epoch": 1.9365998305563399, + "grad_norm": 3.7453986142283506, + "learning_rate": 5.897476026359339e-06, + "loss": 0.5473, "step": 13715 }, { - "epoch": 2.8760746487733275, - "grad_norm": 4.651032619188963, - "learning_rate": 8.937284234754151e-08, - "loss": 0.1228, + "epoch": 1.9367410336063258, + "grad_norm": 3.34776305764127, + "learning_rate": 5.896085815588321e-06, + "loss": 0.5279, "step": 13716 }, { - "epoch": 2.876284336338855, - "grad_norm": 5.052123448656687, - "learning_rate": 8.907110146676001e-08, - "loss": 0.1518, + "epoch": 1.9368822366563117, + "grad_norm": 3.311507511989576, + "learning_rate": 5.894695700190498e-06, + "loss": 0.5798, "step": 13717 }, { - "epoch": 2.8764940239043826, - "grad_norm": 4.707168220470122, - "learning_rate": 8.876986853827208e-08, - "loss": 0.1506, + "epoch": 1.9370234397062975, + "grad_norm": 3.3428506833632348, + "learning_rate": 5.893305680198175e-06, + "loss": 0.4197, "step": 13718 }, { - "epoch": 2.8767037114699097, - "grad_norm": 3.56967054969513, - "learning_rate": 8.84691435775198e-08, - "loss": 0.118, + "epoch": 1.9371646427562834, + "grad_norm": 4.760869814307227, + "learning_rate": 5.89191575564365e-06, + "loss": 0.609, "step": 13719 }, { - "epoch": 2.876913399035437, - "grad_norm": 4.519674345350076, - "learning_rate": 8.816892659991528e-08, - "loss": 0.1423, + "epoch": 1.9373058458062693, + "grad_norm": 3.0275866197835164, + "learning_rate": 5.8905259265592315e-06, + "loss": 0.5147, "step": 13720 }, { - "epoch": 2.8771230866009647, - "grad_norm": 4.455753854022332, - "learning_rate": 8.786921762084288e-08, - "loss": 0.1247, + "epoch": 1.9374470488562552, + "grad_norm": 3.8773540660910246, + "learning_rate": 5.889136192977216e-06, + "loss": 0.6017, "step": 13721 }, { - "epoch": 2.877332774166492, - "grad_norm": 3.527412898433023, - "learning_rate": 8.757001665566589e-08, - "loss": 0.12, + "epoch": 1.937588251906241, + "grad_norm": 3.4488683038071146, + "learning_rate": 5.887746554929899e-06, + "loss": 0.5665, "step": 13722 }, { - "epoch": 2.8775424617320193, - "grad_norm": 5.534074890372293, - "learning_rate": 8.727132371971758e-08, - "loss": 0.1695, + "epoch": 1.937729454956227, + "grad_norm": 3.520262265248116, + "learning_rate": 5.886357012449575e-06, + "loss": 0.5482, "step": 13723 }, { - "epoch": 2.877752149297547, - "grad_norm": 4.990068682814447, - "learning_rate": 8.697313882830683e-08, - "loss": 0.1522, + "epoch": 1.9378706580062128, + "grad_norm": 4.529110174513095, + "learning_rate": 5.884967565568539e-06, + "loss": 0.6764, "step": 13724 }, { - "epoch": 2.877961836863074, - "grad_norm": 4.334415369019435, - "learning_rate": 8.667546199671473e-08, - "loss": 0.1588, + "epoch": 1.9380118610561987, + "grad_norm": 3.723047054401143, + "learning_rate": 5.88357821431908e-06, + "loss": 0.5902, "step": 13725 }, { - "epoch": 2.8781715244286015, - "grad_norm": 2.98322187906449, - "learning_rate": 8.63782932402002e-08, - "loss": 0.102, + "epoch": 1.9381530641061846, + "grad_norm": 3.776142922145206, + "learning_rate": 5.882188958733488e-06, + "loss": 0.5692, "step": 13726 }, { - "epoch": 2.878381211994129, - "grad_norm": 4.484368980134519, - "learning_rate": 8.608163257399216e-08, - "loss": 0.1533, + "epoch": 1.9382942671561705, + "grad_norm": 2.8991646244531943, + "learning_rate": 5.880799798844044e-06, + "loss": 0.4128, "step": 13727 }, { - "epoch": 2.878590899559656, - "grad_norm": 4.7900535118592815, - "learning_rate": 8.578548001329622e-08, - "loss": 0.1409, + "epoch": 1.9384354702061564, + "grad_norm": 4.11725693549263, + "learning_rate": 5.87941073468303e-06, + "loss": 0.5433, "step": 13728 }, { - "epoch": 2.8788005871251836, - "grad_norm": 4.01954994588884, - "learning_rate": 8.548983557328805e-08, - "loss": 0.099, + "epoch": 1.9385766732561422, + "grad_norm": 3.2704463339613667, + "learning_rate": 5.878021766282734e-06, + "loss": 0.4857, "step": 13729 }, { - "epoch": 2.8790102746907107, - "grad_norm": 5.4917598870929325, - "learning_rate": 8.519469926912327e-08, - "loss": 0.1627, + "epoch": 1.9387178763061281, + "grad_norm": 3.488287231351765, + "learning_rate": 5.876632893675432e-06, + "loss": 0.5272, "step": 13730 }, { - "epoch": 2.879219962256238, - "grad_norm": 4.984366279029392, - "learning_rate": 8.490007111592536e-08, - "loss": 0.1383, + "epoch": 1.938859079356114, + "grad_norm": 3.8175470893915726, + "learning_rate": 5.875244116893399e-06, + "loss": 0.536, "step": 13731 }, { - "epoch": 2.8794296498217653, - "grad_norm": 3.6714831612389447, - "learning_rate": 8.460595112879555e-08, - "loss": 0.0931, + "epoch": 1.9390002824061, + "grad_norm": 3.865514025634565, + "learning_rate": 5.873855435968913e-06, + "loss": 0.6378, "step": 13732 }, { - "epoch": 2.879639337387293, - "grad_norm": 3.946642438472358, - "learning_rate": 8.431233932280958e-08, - "loss": 0.1052, + "epoch": 1.9391414854560858, + "grad_norm": 3.2647132980381084, + "learning_rate": 5.872466850934247e-06, + "loss": 0.5069, "step": 13733 }, { - "epoch": 2.8798490249528204, - "grad_norm": 4.045919873937914, - "learning_rate": 8.401923571301207e-08, - "loss": 0.1302, + "epoch": 1.9392826885060717, + "grad_norm": 4.357754156770756, + "learning_rate": 5.871078361821667e-06, + "loss": 0.6475, "step": 13734 }, { - "epoch": 2.8800587125183474, - "grad_norm": 3.8497475079643655, - "learning_rate": 8.372664031442768e-08, - "loss": 0.0944, + "epoch": 1.9394238915560575, + "grad_norm": 3.8057649193976437, + "learning_rate": 5.869689968663444e-06, + "loss": 0.5281, "step": 13735 }, { - "epoch": 2.880268400083875, - "grad_norm": 3.6240896143379615, - "learning_rate": 8.343455314205329e-08, - "loss": 0.1475, + "epoch": 1.9395650946060434, + "grad_norm": 3.153409297977442, + "learning_rate": 5.8683016714918416e-06, + "loss": 0.4916, "step": 13736 }, { - "epoch": 2.8804780876494025, - "grad_norm": 3.891431118500464, - "learning_rate": 8.314297421085693e-08, - "loss": 0.125, + "epoch": 1.9397062976560293, + "grad_norm": 3.392707086978055, + "learning_rate": 5.866913470339122e-06, + "loss": 0.4773, "step": 13737 }, { - "epoch": 2.8806877752149296, - "grad_norm": 4.9170641642873845, - "learning_rate": 8.28519035357822e-08, - "loss": 0.1436, + "epoch": 1.9398475007060152, + "grad_norm": 4.129633386137401, + "learning_rate": 5.86552536523755e-06, + "loss": 0.6654, "step": 13738 }, { - "epoch": 2.880897462780457, - "grad_norm": 4.474136144980559, - "learning_rate": 8.25613411317483e-08, - "loss": 0.1383, + "epoch": 1.939988703756001, + "grad_norm": 3.117092952212741, + "learning_rate": 5.864137356219381e-06, + "loss": 0.4832, "step": 13739 }, { - "epoch": 2.8811071503459846, - "grad_norm": 5.154780107564171, - "learning_rate": 8.227128701364662e-08, - "loss": 0.1351, + "epoch": 1.940129906805987, + "grad_norm": 3.851070025677055, + "learning_rate": 5.8627494433168756e-06, + "loss": 0.6835, "step": 13740 }, { - "epoch": 2.8813168379115117, - "grad_norm": 5.117974511806987, - "learning_rate": 8.198174119634306e-08, - "loss": 0.1823, + "epoch": 1.9402711098559728, + "grad_norm": 4.383728421216885, + "learning_rate": 5.861361626562285e-06, + "loss": 0.7755, "step": 13741 }, { - "epoch": 2.8815265254770392, - "grad_norm": 3.690876287181319, - "learning_rate": 8.169270369467685e-08, - "loss": 0.1077, + "epoch": 1.9404123129059587, + "grad_norm": 3.4332066454145416, + "learning_rate": 5.859973905987866e-06, + "loss": 0.4209, "step": 13742 }, { - "epoch": 2.8817362130425668, - "grad_norm": 4.823497337386282, - "learning_rate": 8.140417452346283e-08, - "loss": 0.1821, + "epoch": 1.9405535159559446, + "grad_norm": 3.0376027226100155, + "learning_rate": 5.858586281625864e-06, + "loss": 0.4269, "step": 13743 }, { - "epoch": 2.881945900608094, - "grad_norm": 6.941536931131761, - "learning_rate": 8.111615369748693e-08, - "loss": 0.1342, + "epoch": 1.9406947190059305, + "grad_norm": 3.683301580150677, + "learning_rate": 5.8571987535085286e-06, + "loss": 0.5402, "step": 13744 }, { - "epoch": 2.8821555881736214, - "grad_norm": 3.437278536143222, - "learning_rate": 8.08286412315118e-08, - "loss": 0.1154, + "epoch": 1.9408359220559164, + "grad_norm": 3.011082985749689, + "learning_rate": 5.855811321668104e-06, + "loss": 0.4849, "step": 13745 }, { - "epoch": 2.882365275739149, - "grad_norm": 4.598256447624964, - "learning_rate": 8.05416371402723e-08, - "loss": 0.1275, + "epoch": 1.9409771251059023, + "grad_norm": 3.959578105879021, + "learning_rate": 5.8544239861368346e-06, + "loss": 0.72, "step": 13746 }, { - "epoch": 2.882574963304676, - "grad_norm": 3.8816519812415597, - "learning_rate": 8.02551414384789e-08, - "loss": 0.1171, + "epoch": 1.9411183281558881, + "grad_norm": 3.193291572622729, + "learning_rate": 5.853036746946961e-06, + "loss": 0.4906, "step": 13747 }, { - "epoch": 2.8827846508702035, - "grad_norm": 3.6155420120416086, - "learning_rate": 7.99691541408143e-08, - "loss": 0.1164, + "epoch": 1.941259531205874, + "grad_norm": 4.355281954004002, + "learning_rate": 5.851649604130723e-06, + "loss": 0.5843, "step": 13748 }, { - "epoch": 2.8829943384357306, - "grad_norm": 4.289405757122124, - "learning_rate": 7.968367526193676e-08, - "loss": 0.1515, + "epoch": 1.94140073425586, + "grad_norm": 4.615059538508084, + "learning_rate": 5.850262557720355e-06, + "loss": 0.6055, "step": 13749 }, { - "epoch": 2.883204026001258, - "grad_norm": 3.8146134816238515, - "learning_rate": 7.939870481647571e-08, - "loss": 0.1102, + "epoch": 1.9415419373058458, + "grad_norm": 3.8032804362228143, + "learning_rate": 5.848875607748095e-06, + "loss": 0.5844, "step": 13750 }, { - "epoch": 2.883413713566785, - "grad_norm": 3.686923433923327, - "learning_rate": 7.911424281903723e-08, - "loss": 0.1196, + "epoch": 1.9416831403558317, + "grad_norm": 4.671596297743548, + "learning_rate": 5.847488754246173e-06, + "loss": 0.7691, "step": 13751 }, { - "epoch": 2.8836234011323127, - "grad_norm": 3.4201529018802317, - "learning_rate": 7.883028928420188e-08, - "loss": 0.1105, + "epoch": 1.9418243434058176, + "grad_norm": 3.178304624653826, + "learning_rate": 5.846101997246822e-06, + "loss": 0.5129, "step": 13752 }, { - "epoch": 2.8838330886978403, - "grad_norm": 4.684602793377559, - "learning_rate": 7.854684422652026e-08, - "loss": 0.135, + "epoch": 1.9419655464558034, + "grad_norm": 3.1632836317268738, + "learning_rate": 5.844715336782263e-06, + "loss": 0.492, "step": 13753 }, { - "epoch": 2.8840427762633674, - "grad_norm": 3.3743559909457534, - "learning_rate": 7.826390766052184e-08, - "loss": 0.1093, + "epoch": 1.9421067495057893, + "grad_norm": 3.2310499502562737, + "learning_rate": 5.8433287728847244e-06, + "loss": 0.4564, "step": 13754 }, { - "epoch": 2.884252463828895, - "grad_norm": 5.07677136829841, - "learning_rate": 7.798147960070612e-08, - "loss": 0.1603, + "epoch": 1.9422479525557752, + "grad_norm": 2.466316171533824, + "learning_rate": 5.841942305586432e-06, + "loss": 0.3552, "step": 13755 }, { - "epoch": 2.8844621513944224, - "grad_norm": 4.319795751580264, - "learning_rate": 7.769956006154821e-08, - "loss": 0.1548, + "epoch": 1.942389155605761, + "grad_norm": 2.8165633566021757, + "learning_rate": 5.840555934919604e-06, + "loss": 0.3617, "step": 13756 }, { - "epoch": 2.8846718389599495, - "grad_norm": 4.139001292683012, - "learning_rate": 7.741814905749767e-08, - "loss": 0.1317, + "epoch": 1.942530358655747, + "grad_norm": 3.4886370767727666, + "learning_rate": 5.839169660916457e-06, + "loss": 0.5568, "step": 13757 }, { - "epoch": 2.884881526525477, - "grad_norm": 3.6525670994330754, - "learning_rate": 7.713724660297628e-08, - "loss": 0.12, + "epoch": 1.9426715617057329, + "grad_norm": 3.317207974281502, + "learning_rate": 5.837783483609214e-06, + "loss": 0.5092, "step": 13758 }, { - "epoch": 2.8850912140910046, - "grad_norm": 5.127382731285871, - "learning_rate": 7.685685271238141e-08, - "loss": 0.1588, + "epoch": 1.9428127647557187, + "grad_norm": 4.399203870737688, + "learning_rate": 5.836397403030084e-06, + "loss": 0.6128, "step": 13759 }, { - "epoch": 2.8853009016565316, - "grad_norm": 3.766361395381949, - "learning_rate": 7.65769674000849e-08, - "loss": 0.1171, + "epoch": 1.9429539678057046, + "grad_norm": 3.2075078474275722, + "learning_rate": 5.835011419211285e-06, + "loss": 0.5159, "step": 13760 }, { - "epoch": 2.885510589222059, - "grad_norm": 4.290458624274155, - "learning_rate": 7.629759068042863e-08, - "loss": 0.1138, + "epoch": 1.9430951708556905, + "grad_norm": 3.022133997885696, + "learning_rate": 5.833625532185013e-06, + "loss": 0.4589, "step": 13761 }, { - "epoch": 2.8857202767875867, - "grad_norm": 3.971951778673911, - "learning_rate": 7.601872256773335e-08, - "loss": 0.1314, + "epoch": 1.9432363739056764, + "grad_norm": 3.1820024107020703, + "learning_rate": 5.83223974198349e-06, + "loss": 0.4969, "step": 13762 }, { - "epoch": 2.8859299643531138, - "grad_norm": 4.6176523666054425, - "learning_rate": 7.574036307629096e-08, - "loss": 0.146, + "epoch": 1.9433775769556623, + "grad_norm": 4.08775149623513, + "learning_rate": 5.830854048638913e-06, + "loss": 0.5815, "step": 13763 }, { - "epoch": 2.8861396519186413, - "grad_norm": 3.9580045566408972, - "learning_rate": 7.546251222036671e-08, - "loss": 0.1262, + "epoch": 1.9435187800056481, + "grad_norm": 3.4101267454326067, + "learning_rate": 5.829468452183489e-06, + "loss": 0.4807, "step": 13764 }, { - "epoch": 2.886349339484169, - "grad_norm": 4.600645487202965, - "learning_rate": 7.518517001420256e-08, - "loss": 0.1318, + "epoch": 1.943659983055634, + "grad_norm": 3.153640654124991, + "learning_rate": 5.828082952649416e-06, + "loss": 0.49, "step": 13765 }, { - "epoch": 2.886559027049696, - "grad_norm": 4.560487759266229, - "learning_rate": 7.490833647201268e-08, - "loss": 0.1682, + "epoch": 1.94380118610562, + "grad_norm": 3.747866943841111, + "learning_rate": 5.8266975500688945e-06, + "loss": 0.5642, "step": 13766 }, { - "epoch": 2.8867687146152234, - "grad_norm": 4.061911558898941, - "learning_rate": 7.463201160798461e-08, - "loss": 0.112, + "epoch": 1.9439423891556058, + "grad_norm": 3.771895744198372, + "learning_rate": 5.825312244474115e-06, + "loss": 0.5716, "step": 13767 }, { - "epoch": 2.8869784021807505, - "grad_norm": 4.184090925820003, - "learning_rate": 7.435619543627925e-08, - "loss": 0.1418, + "epoch": 1.9440835922055917, + "grad_norm": 3.8694386174022037, + "learning_rate": 5.823927035897285e-06, + "loss": 0.5732, "step": 13768 }, { - "epoch": 2.887188089746278, - "grad_norm": 5.73613067416364, - "learning_rate": 7.40808879710353e-08, - "loss": 0.1338, + "epoch": 1.9442247952555776, + "grad_norm": 3.40566607824833, + "learning_rate": 5.822541924370583e-06, + "loss": 0.5514, "step": 13769 }, { - "epoch": 2.887397777311805, - "grad_norm": 3.5627548911905484, - "learning_rate": 7.380608922636145e-08, - "loss": 0.1053, + "epoch": 1.9443659983055634, + "grad_norm": 4.102690030683655, + "learning_rate": 5.821156909926202e-06, + "loss": 0.7516, "step": 13770 }, { - "epoch": 2.8876074648773327, - "grad_norm": 4.208702908196676, - "learning_rate": 7.353179921634091e-08, - "loss": 0.1197, + "epoch": 1.9445072013555493, + "grad_norm": 3.1714121307007863, + "learning_rate": 5.8197719925963305e-06, + "loss": 0.4997, "step": 13771 }, { - "epoch": 2.88781715244286, - "grad_norm": 5.590035603354481, - "learning_rate": 7.325801795503351e-08, - "loss": 0.1418, + "epoch": 1.9446484044055352, + "grad_norm": 2.9174763733733333, + "learning_rate": 5.818387172413151e-06, + "loss": 0.4195, "step": 13772 }, { - "epoch": 2.8880268400083873, - "grad_norm": 4.084587563667208, - "learning_rate": 7.298474545646917e-08, - "loss": 0.116, + "epoch": 1.944789607455521, + "grad_norm": 3.99556137492448, + "learning_rate": 5.817002449408844e-06, + "loss": 0.6678, "step": 13773 }, { - "epoch": 2.888236527573915, - "grad_norm": 4.686438539645786, - "learning_rate": 7.271198173465332e-08, - "loss": 0.0872, + "epoch": 1.944930810505507, + "grad_norm": 2.7582950023349646, + "learning_rate": 5.815617823615599e-06, + "loss": 0.3958, "step": 13774 }, { - "epoch": 2.8884462151394423, - "grad_norm": 3.822956589497696, - "learning_rate": 7.243972680356814e-08, - "loss": 0.1371, + "epoch": 1.9450720135554929, + "grad_norm": 4.246422025136108, + "learning_rate": 5.814233295065588e-06, + "loss": 0.6632, "step": 13775 }, { - "epoch": 2.8886559027049694, - "grad_norm": 5.197993450817628, - "learning_rate": 7.216798067716357e-08, - "loss": 0.1737, + "epoch": 1.9452132166054787, + "grad_norm": 3.178236633369428, + "learning_rate": 5.812848863790988e-06, + "loss": 0.5369, "step": 13776 }, { - "epoch": 2.888865590270497, - "grad_norm": 4.259208436369405, - "learning_rate": 7.189674336936958e-08, - "loss": 0.1197, + "epoch": 1.9453544196554646, + "grad_norm": 3.483258584170309, + "learning_rate": 5.811464529823975e-06, + "loss": 0.494, "step": 13777 }, { - "epoch": 2.8890752778360245, - "grad_norm": 4.649931417656924, - "learning_rate": 7.162601489408728e-08, - "loss": 0.1443, + "epoch": 1.9454956227054505, + "grad_norm": 3.5350551065851006, + "learning_rate": 5.810080293196709e-06, + "loss": 0.6006, "step": 13778 }, { - "epoch": 2.8892849654015516, - "grad_norm": 3.85257100949017, - "learning_rate": 7.135579526519109e-08, - "loss": 0.1632, + "epoch": 1.9456368257554364, + "grad_norm": 3.5691012710099614, + "learning_rate": 5.808696153941374e-06, + "loss": 0.5441, "step": 13779 }, { - "epoch": 2.889494652967079, - "grad_norm": 3.811742704204405, - "learning_rate": 7.108608449653109e-08, - "loss": 0.1138, + "epoch": 1.9457780288054223, + "grad_norm": 3.374503810040913, + "learning_rate": 5.807312112090129e-06, + "loss": 0.496, "step": 13780 }, { - "epoch": 2.8897043405326066, - "grad_norm": 3.778131224519266, - "learning_rate": 7.081688260193065e-08, - "loss": 0.1406, + "epoch": 1.9459192318554082, + "grad_norm": 3.6227972384810387, + "learning_rate": 5.805928167675139e-06, + "loss": 0.5632, "step": 13781 }, { - "epoch": 2.8899140280981337, - "grad_norm": 3.79256460308423, - "learning_rate": 7.054818959518538e-08, - "loss": 0.1309, + "epoch": 1.946060434905394, + "grad_norm": 3.652366831131586, + "learning_rate": 5.804544320728567e-06, + "loss": 0.5508, "step": 13782 }, { - "epoch": 2.890123715663661, - "grad_norm": 4.793540105986759, - "learning_rate": 7.028000549006764e-08, - "loss": 0.1595, + "epoch": 1.94620163795538, + "grad_norm": 3.5063688863046467, + "learning_rate": 5.8031605712825736e-06, + "loss": 0.4985, "step": 13783 }, { - "epoch": 2.8903334032291887, - "grad_norm": 3.659232194122678, - "learning_rate": 7.001233030032195e-08, - "loss": 0.1216, + "epoch": 1.9463428410053658, + "grad_norm": 3.7240665469447016, + "learning_rate": 5.801776919369317e-06, + "loss": 0.4996, "step": 13784 }, { - "epoch": 2.890543090794716, - "grad_norm": 5.305010152163205, - "learning_rate": 6.974516403966624e-08, - "loss": 0.1375, + "epoch": 1.9464840440553517, + "grad_norm": 3.8861110331045547, + "learning_rate": 5.800393365020951e-06, + "loss": 0.5642, "step": 13785 }, { - "epoch": 2.8907527783602434, - "grad_norm": 4.532380999215023, - "learning_rate": 6.947850672179512e-08, - "loss": 0.117, + "epoch": 1.9466252471053376, + "grad_norm": 3.5806370757476405, + "learning_rate": 5.799009908269629e-06, + "loss": 0.578, "step": 13786 }, { - "epoch": 2.8909624659257704, - "grad_norm": 5.073945022028327, - "learning_rate": 6.921235836037321e-08, - "loss": 0.1645, + "epoch": 1.9467664501553235, + "grad_norm": 3.6624356225320756, + "learning_rate": 5.797626549147504e-06, + "loss": 0.5771, "step": 13787 }, { - "epoch": 2.891172153491298, - "grad_norm": 5.683442423631372, - "learning_rate": 6.894671896904292e-08, - "loss": 0.1768, + "epoch": 1.9469076532053093, + "grad_norm": 3.3105885611206203, + "learning_rate": 5.7962432876867215e-06, + "loss": 0.4901, "step": 13788 }, { - "epoch": 2.891381841056825, - "grad_norm": 4.370577394239539, - "learning_rate": 6.868158856141671e-08, - "loss": 0.1251, + "epoch": 1.9470488562552952, + "grad_norm": 3.206785698449427, + "learning_rate": 5.79486012391943e-06, + "loss": 0.4367, "step": 13789 }, { - "epoch": 2.8915915286223526, - "grad_norm": 3.681890435938822, - "learning_rate": 6.841696715108481e-08, - "loss": 0.1079, + "epoch": 1.947190059305281, + "grad_norm": 3.4711654354660717, + "learning_rate": 5.79347705787777e-06, + "loss": 0.5143, "step": 13790 }, { - "epoch": 2.89180121618788, - "grad_norm": 4.356078192171075, - "learning_rate": 6.81528547516086e-08, - "loss": 0.184, + "epoch": 1.947331262355267, + "grad_norm": 3.4816488543827124, + "learning_rate": 5.7920940895938896e-06, + "loss": 0.5275, "step": 13791 }, { - "epoch": 2.892010903753407, - "grad_norm": 5.938306152051643, - "learning_rate": 6.788925137652391e-08, - "loss": 0.1676, + "epoch": 1.9474724654052529, + "grad_norm": 3.916240785387009, + "learning_rate": 5.790711219099925e-06, + "loss": 0.4985, "step": 13792 }, { - "epoch": 2.8922205913189347, - "grad_norm": 4.354426777975696, - "learning_rate": 6.762615703934217e-08, - "loss": 0.1464, + "epoch": 1.9476136684552388, + "grad_norm": 3.52374684398223, + "learning_rate": 5.789328446428014e-06, + "loss": 0.6075, "step": 13793 }, { - "epoch": 2.8924302788844622, - "grad_norm": 4.491786503723854, - "learning_rate": 6.736357175354702e-08, - "loss": 0.1375, + "epoch": 1.9477548715052246, + "grad_norm": 3.709756373628427, + "learning_rate": 5.787945771610296e-06, + "loss": 0.5579, "step": 13794 }, { - "epoch": 2.8926399664499893, - "grad_norm": 4.523607806926432, - "learning_rate": 6.710149553259548e-08, - "loss": 0.131, + "epoch": 1.9478960745552105, + "grad_norm": 3.819090608473388, + "learning_rate": 5.786563194678892e-06, + "loss": 0.5557, "step": 13795 }, { - "epoch": 2.892849654015517, - "grad_norm": 4.52541757002651, - "learning_rate": 6.683992838992015e-08, - "loss": 0.1218, + "epoch": 1.9480372776051964, + "grad_norm": 4.114388115171828, + "learning_rate": 5.785180715665941e-06, + "loss": 0.6161, "step": 13796 }, { - "epoch": 2.8930593415810444, - "grad_norm": 4.748950922398838, - "learning_rate": 6.657887033892695e-08, - "loss": 0.1455, + "epoch": 1.9481784806551823, + "grad_norm": 4.558704542463181, + "learning_rate": 5.783798334603572e-06, + "loss": 0.6786, "step": 13797 }, { - "epoch": 2.8932690291465715, - "grad_norm": 4.290681213145596, - "learning_rate": 6.631832139299521e-08, - "loss": 0.1551, + "epoch": 1.9483196837051682, + "grad_norm": 3.1923698140343726, + "learning_rate": 5.782416051523909e-06, + "loss": 0.4566, "step": 13798 }, { - "epoch": 2.893478716712099, - "grad_norm": 5.030716401845468, - "learning_rate": 6.605828156547866e-08, - "loss": 0.1692, + "epoch": 1.948460886755154, + "grad_norm": 3.5577115019385945, + "learning_rate": 5.781033866459076e-06, + "loss": 0.5477, "step": 13799 }, { - "epoch": 2.8936884042776265, - "grad_norm": 4.357220335680682, - "learning_rate": 6.579875086970444e-08, - "loss": 0.1397, + "epoch": 1.94860208980514, + "grad_norm": 5.025136758062498, + "learning_rate": 5.779651779441192e-06, + "loss": 0.6211, "step": 13800 }, { - "epoch": 2.8938980918431536, - "grad_norm": 3.429329552577863, - "learning_rate": 6.553972931897412e-08, - "loss": 0.1189, + "epoch": 1.9487432928551258, + "grad_norm": 3.935652991753846, + "learning_rate": 5.778269790502379e-06, + "loss": 0.6761, "step": 13801 }, { - "epoch": 2.894107779408681, - "grad_norm": 3.297131085647814, - "learning_rate": 6.528121692656375e-08, - "loss": 0.0964, + "epoch": 1.9488844959051117, + "grad_norm": 4.10907752744415, + "learning_rate": 5.776887899674752e-06, + "loss": 0.5951, "step": 13802 }, { - "epoch": 2.8943174669742087, - "grad_norm": 4.453298450216547, - "learning_rate": 6.502321370572162e-08, - "loss": 0.1459, + "epoch": 1.9490256989550976, + "grad_norm": 3.3688068119751344, + "learning_rate": 5.775506106990427e-06, + "loss": 0.5124, "step": 13803 }, { - "epoch": 2.8945271545397357, - "grad_norm": 4.725714378085602, - "learning_rate": 6.47657196696705e-08, - "loss": 0.142, + "epoch": 1.9491669020050835, + "grad_norm": 4.0128604043697855, + "learning_rate": 5.774124412481517e-06, + "loss": 0.563, "step": 13804 }, { - "epoch": 2.8947368421052633, - "grad_norm": 4.80071577620342, - "learning_rate": 6.450873483160869e-08, - "loss": 0.1317, + "epoch": 1.9493081050550694, + "grad_norm": 3.082276612774261, + "learning_rate": 5.77274281618013e-06, + "loss": 0.4383, "step": 13805 }, { - "epoch": 2.8949465296707904, - "grad_norm": 4.1107128031699895, - "learning_rate": 6.425225920470679e-08, - "loss": 0.0985, + "epoch": 1.9494493081050552, + "grad_norm": 3.1943219807835965, + "learning_rate": 5.771361318118374e-06, + "loss": 0.4556, "step": 13806 }, { - "epoch": 2.895156217236318, - "grad_norm": 3.6194322138437522, - "learning_rate": 6.399629280210873e-08, - "loss": 0.1375, + "epoch": 1.949590511155041, + "grad_norm": 3.3401408724581962, + "learning_rate": 5.76997991832835e-06, + "loss": 0.4892, "step": 13807 }, { - "epoch": 2.8953659048018454, - "grad_norm": 4.882275750413403, - "learning_rate": 6.374083563693401e-08, - "loss": 0.1952, + "epoch": 1.9497317142050268, + "grad_norm": 3.6066079437612433, + "learning_rate": 5.76859861684217e-06, + "loss": 0.4805, "step": 13808 }, { - "epoch": 2.8955755923673725, - "grad_norm": 5.317718561990381, - "learning_rate": 6.348588772227548e-08, - "loss": 0.1493, + "epoch": 1.9498729172550127, + "grad_norm": 3.484788107325088, + "learning_rate": 5.767217413691933e-06, + "loss": 0.5242, "step": 13809 }, { - "epoch": 2.8957852799329, - "grad_norm": 3.6934150767753158, - "learning_rate": 6.323144907119827e-08, - "loss": 0.1394, + "epoch": 1.9500141203049985, + "grad_norm": 3.5791471640248163, + "learning_rate": 5.765836308909731e-06, + "loss": 0.5442, "step": 13810 }, { - "epoch": 2.895994967498427, - "grad_norm": 3.5514128854193987, - "learning_rate": 6.297751969674415e-08, - "loss": 0.128, + "epoch": 1.9501553233549844, + "grad_norm": 3.7059679000377486, + "learning_rate": 5.764455302527671e-06, + "loss": 0.5531, "step": 13811 }, { - "epoch": 2.8962046550639546, - "grad_norm": 4.516790239362552, - "learning_rate": 6.272409961192716e-08, - "loss": 0.1338, + "epoch": 1.9502965264049703, + "grad_norm": 3.248891575953556, + "learning_rate": 5.763074394577835e-06, + "loss": 0.5501, "step": 13812 }, { - "epoch": 2.896414342629482, - "grad_norm": 3.5341204487815614, - "learning_rate": 6.247118882973468e-08, - "loss": 0.1209, + "epoch": 1.9504377294549562, + "grad_norm": 3.7001767864955935, + "learning_rate": 5.761693585092321e-06, + "loss": 0.5228, "step": 13813 }, { - "epoch": 2.8966240301950092, - "grad_norm": 3.9490917623555966, - "learning_rate": 6.22187873631297e-08, - "loss": 0.1036, + "epoch": 1.950578932504942, + "grad_norm": 2.877351702339207, + "learning_rate": 5.760312874103219e-06, + "loss": 0.3711, "step": 13814 }, { - "epoch": 2.8968337177605368, - "grad_norm": 4.233294170353973, - "learning_rate": 6.196689522504962e-08, - "loss": 0.1395, + "epoch": 1.950720135554928, + "grad_norm": 3.964246159195276, + "learning_rate": 5.7589322616426135e-06, + "loss": 0.6033, "step": 13815 }, { - "epoch": 2.8970434053260643, - "grad_norm": 4.446203336134346, - "learning_rate": 6.17155124284019e-08, - "loss": 0.151, + "epoch": 1.9508613386049138, + "grad_norm": 3.930087773436957, + "learning_rate": 5.757551747742592e-06, + "loss": 0.5532, "step": 13816 }, { - "epoch": 2.8972530928915914, - "grad_norm": 3.5325822238506888, - "learning_rate": 6.146463898607069e-08, - "loss": 0.1092, + "epoch": 1.9510025416548997, + "grad_norm": 3.2780870321423166, + "learning_rate": 5.756171332435237e-06, + "loss": 0.4787, "step": 13817 }, { - "epoch": 2.897462780457119, - "grad_norm": 4.846831844204268, - "learning_rate": 6.121427491091458e-08, - "loss": 0.1551, + "epoch": 1.9511437447048856, + "grad_norm": 3.7816514407170203, + "learning_rate": 5.754791015752624e-06, + "loss": 0.4823, "step": 13818 }, { - "epoch": 2.8976724680226464, - "grad_norm": 4.000238338136815, - "learning_rate": 6.096442021576443e-08, - "loss": 0.1474, + "epoch": 1.9512849477548715, + "grad_norm": 3.8796314979021242, + "learning_rate": 5.753410797726837e-06, + "loss": 0.4861, "step": 13819 }, { - "epoch": 2.8978821555881735, - "grad_norm": 4.955699476281586, - "learning_rate": 6.071507491342665e-08, - "loss": 0.1372, + "epoch": 1.9514261508048574, + "grad_norm": 3.20094998785402, + "learning_rate": 5.752030678389948e-06, + "loss": 0.4464, "step": 13820 }, { - "epoch": 2.898091843153701, - "grad_norm": 3.542579338749896, - "learning_rate": 6.046623901667992e-08, - "loss": 0.1156, + "epoch": 1.9515673538548433, + "grad_norm": 3.702219167992484, + "learning_rate": 5.750650657774032e-06, + "loss": 0.5074, "step": 13821 }, { - "epoch": 2.8983015307192286, - "grad_norm": 3.7799674950957827, - "learning_rate": 6.021791253827736e-08, - "loss": 0.1193, + "epoch": 1.9517085569048291, + "grad_norm": 3.176676291575915, + "learning_rate": 5.749270735911159e-06, + "loss": 0.4893, "step": 13822 }, { - "epoch": 2.8985112182847557, - "grad_norm": 4.202857556972337, - "learning_rate": 5.997009549094768e-08, - "loss": 0.1248, + "epoch": 1.951849759954815, + "grad_norm": 3.8205795923620065, + "learning_rate": 5.7478909128334e-06, + "loss": 0.5648, "step": 13823 }, { - "epoch": 2.898720905850283, - "grad_norm": 4.364355450884744, - "learning_rate": 5.972278788738961e-08, - "loss": 0.1512, + "epoch": 1.951990963004801, + "grad_norm": 3.5363117043692824, + "learning_rate": 5.746511188572814e-06, + "loss": 0.4924, "step": 13824 }, { - "epoch": 2.8989305934158107, - "grad_norm": 2.853207299829324, - "learning_rate": 5.947598974027968e-08, - "loss": 0.0813, + "epoch": 1.9521321660547868, + "grad_norm": 3.4600751105115455, + "learning_rate": 5.745131563161475e-06, + "loss": 0.4855, "step": 13825 }, { - "epoch": 2.899140280981338, - "grad_norm": 4.566298793849188, - "learning_rate": 5.922970106226555e-08, - "loss": 0.1471, + "epoch": 1.9522733691047727, + "grad_norm": 3.9018142772415043, + "learning_rate": 5.743752036631443e-06, + "loss": 0.6898, "step": 13826 }, { - "epoch": 2.8993499685468653, - "grad_norm": 5.040366639444228, - "learning_rate": 5.898392186597157e-08, - "loss": 0.1305, + "epoch": 1.9524145721547586, + "grad_norm": 3.714293682673438, + "learning_rate": 5.742372609014773e-06, + "loss": 0.5784, "step": 13827 }, { - "epoch": 2.8995596561123924, - "grad_norm": 4.525005780057943, - "learning_rate": 5.873865216399322e-08, - "loss": 0.1175, + "epoch": 1.9525557752047444, + "grad_norm": 4.485189287932414, + "learning_rate": 5.740993280343529e-06, + "loss": 0.669, "step": 13828 }, { - "epoch": 2.89976934367792, - "grad_norm": 4.590534552067123, - "learning_rate": 5.849389196890043e-08, - "loss": 0.1329, + "epoch": 1.9526969782547303, + "grad_norm": 3.155621531524472, + "learning_rate": 5.739614050649756e-06, + "loss": 0.5118, "step": 13829 }, { - "epoch": 2.899979031243447, - "grad_norm": 4.094531859106882, - "learning_rate": 5.8249641293238733e-08, - "loss": 0.1175, + "epoch": 1.9528381813047162, + "grad_norm": 4.1372806227464265, + "learning_rate": 5.738234919965516e-06, + "loss": 0.5238, "step": 13830 }, { - "epoch": 2.9001887188089746, - "grad_norm": 3.823303721530756, - "learning_rate": 5.80059001495259e-08, - "loss": 0.1231, + "epoch": 1.952979384354702, + "grad_norm": 3.4569778488545895, + "learning_rate": 5.736855888322854e-06, + "loss": 0.5069, "step": 13831 }, { - "epoch": 2.900398406374502, - "grad_norm": 4.307276652204052, - "learning_rate": 5.776266855025525e-08, - "loss": 0.1367, + "epoch": 1.953120587404688, + "grad_norm": 3.029349127087165, + "learning_rate": 5.735476955753821e-06, + "loss": 0.4925, "step": 13832 }, { - "epoch": 2.900608093940029, - "grad_norm": 3.6817124414143034, - "learning_rate": 5.751994650789017e-08, - "loss": 0.1153, + "epoch": 1.9532617904546739, + "grad_norm": 3.1786968758401417, + "learning_rate": 5.734098122290463e-06, + "loss": 0.5633, "step": 13833 }, { - "epoch": 2.9008177815055567, - "grad_norm": 4.0263924498439785, - "learning_rate": 5.727773403487291e-08, - "loss": 0.1505, + "epoch": 1.9534029935046597, + "grad_norm": 4.170378422052908, + "learning_rate": 5.732719387964822e-06, + "loss": 0.5949, "step": 13834 }, { - "epoch": 2.901027469071084, - "grad_norm": 4.945908552617482, - "learning_rate": 5.7036031143616887e-08, - "loss": 0.1921, + "epoch": 1.9535441965546456, + "grad_norm": 4.898545234731712, + "learning_rate": 5.731340752808939e-06, + "loss": 0.6024, "step": 13835 }, { - "epoch": 2.9012371566366113, - "grad_norm": 4.108744952475568, - "learning_rate": 5.679483784650996e-08, - "loss": 0.1076, + "epoch": 1.9536853996046315, + "grad_norm": 3.0028863105505628, + "learning_rate": 5.729962216854853e-06, + "loss": 0.4309, "step": 13836 }, { - "epoch": 2.901446844202139, - "grad_norm": 4.4400271410893835, - "learning_rate": 5.655415415591226e-08, - "loss": 0.1371, + "epoch": 1.9538266026546174, + "grad_norm": 4.375925170640629, + "learning_rate": 5.728583780134601e-06, + "loss": 0.5303, "step": 13837 }, { - "epoch": 2.9016565317676664, - "grad_norm": 4.797076049437198, - "learning_rate": 5.631398008415945e-08, - "loss": 0.1434, + "epoch": 1.9539678057046033, + "grad_norm": 2.8615918615571716, + "learning_rate": 5.727205442680218e-06, + "loss": 0.4777, "step": 13838 }, { - "epoch": 2.9018662193331934, - "grad_norm": 3.7751024552391326, - "learning_rate": 5.60743156435628e-08, - "loss": 0.1315, + "epoch": 1.9541090087545891, + "grad_norm": 3.5659509572053074, + "learning_rate": 5.725827204523735e-06, + "loss": 0.5963, "step": 13839 }, { - "epoch": 2.902075906898721, - "grad_norm": 4.186904641720187, - "learning_rate": 5.583516084640472e-08, - "loss": 0.1533, + "epoch": 1.954250211804575, + "grad_norm": 3.2799098644343894, + "learning_rate": 5.724449065697182e-06, + "loss": 0.4677, "step": 13840 }, { - "epoch": 2.9022855944642485, - "grad_norm": 5.739363903527498, - "learning_rate": 5.559651570494096e-08, - "loss": 0.1479, + "epoch": 1.9543914148545607, + "grad_norm": 3.5694294594028744, + "learning_rate": 5.723071026232582e-06, + "loss": 0.5882, "step": 13841 }, { - "epoch": 2.9024952820297756, - "grad_norm": 4.2788848719969135, - "learning_rate": 5.535838023140394e-08, - "loss": 0.1403, + "epoch": 1.9545326179045466, + "grad_norm": 4.425207179441219, + "learning_rate": 5.721693086161968e-06, + "loss": 0.5561, "step": 13842 }, { - "epoch": 2.902704969595303, - "grad_norm": 3.430011620513265, - "learning_rate": 5.5120754437997245e-08, - "loss": 0.092, + "epoch": 1.9546738209545325, + "grad_norm": 3.583081729841661, + "learning_rate": 5.7203152455173595e-06, + "loss": 0.555, "step": 13843 }, { - "epoch": 2.9029146571608306, - "grad_norm": 4.518674634722797, - "learning_rate": 5.4883638336901135e-08, - "loss": 0.1527, + "epoch": 1.9548150240045183, + "grad_norm": 3.4216192149915376, + "learning_rate": 5.7189375043307775e-06, + "loss": 0.4768, "step": 13844 }, { - "epoch": 2.9031243447263577, - "grad_norm": 4.162951439508755, - "learning_rate": 5.46470319402681e-08, - "loss": 0.1324, + "epoch": 1.9549562270545042, + "grad_norm": 4.079995758515541, + "learning_rate": 5.717559862634241e-06, + "loss": 0.6431, "step": 13845 }, { - "epoch": 2.9033340322918852, - "grad_norm": 4.386123205141205, - "learning_rate": 5.4410935260222894e-08, - "loss": 0.1002, + "epoch": 1.95509743010449, + "grad_norm": 3.119579100780536, + "learning_rate": 5.71618232045976e-06, + "loss": 0.5184, "step": 13846 }, { - "epoch": 2.9035437198574123, - "grad_norm": 4.0480535192231715, - "learning_rate": 5.417534830886695e-08, - "loss": 0.14, + "epoch": 1.955238633154476, + "grad_norm": 3.495508010562949, + "learning_rate": 5.714804877839348e-06, + "loss": 0.5472, "step": 13847 }, { - "epoch": 2.90375340742294, - "grad_norm": 3.7826651089442933, - "learning_rate": 5.394027109827504e-08, - "loss": 0.1208, + "epoch": 1.9553798362044619, + "grad_norm": 3.966578011661628, + "learning_rate": 5.713427534805025e-06, + "loss": 0.6499, "step": 13848 }, { - "epoch": 2.903963094988467, - "grad_norm": 3.678151201637555, - "learning_rate": 5.3705703640495324e-08, - "loss": 0.1318, + "epoch": 1.9555210392544478, + "grad_norm": 3.9938511691205254, + "learning_rate": 5.712050291388795e-06, + "loss": 0.575, "step": 13849 }, { - "epoch": 2.9041727825539945, - "grad_norm": 3.5182865330739075, - "learning_rate": 5.347164594754817e-08, - "loss": 0.1136, + "epoch": 1.9556622423044336, + "grad_norm": 3.9389384865325394, + "learning_rate": 5.710673147622664e-06, + "loss": 0.6028, "step": 13850 }, { - "epoch": 2.904382470119522, - "grad_norm": 4.064808322572299, - "learning_rate": 5.3238098031430655e-08, - "loss": 0.1105, + "epoch": 1.9558034453544195, + "grad_norm": 3.112825666880522, + "learning_rate": 5.709296103538636e-06, + "loss": 0.4854, "step": 13851 }, { - "epoch": 2.904592157685049, - "grad_norm": 5.281592986083384, - "learning_rate": 5.30050599041132e-08, - "loss": 0.1616, + "epoch": 1.9559446484044054, + "grad_norm": 3.551370600151852, + "learning_rate": 5.707919159168714e-06, + "loss": 0.5187, "step": 13852 }, { - "epoch": 2.9048018452505766, - "grad_norm": 5.943235721674502, - "learning_rate": 5.2772531577538475e-08, - "loss": 0.1454, + "epoch": 1.9560858514543913, + "grad_norm": 3.1280639918963087, + "learning_rate": 5.7065423145448974e-06, + "loss": 0.4599, "step": 13853 }, { - "epoch": 2.905011532816104, - "grad_norm": 3.5064856709648153, - "learning_rate": 5.254051306362362e-08, - "loss": 0.1006, + "epoch": 1.9562270545043772, + "grad_norm": 2.9386347629922898, + "learning_rate": 5.7051655696991825e-06, + "loss": 0.4716, "step": 13854 }, { - "epoch": 2.905221220381631, - "grad_norm": 5.404028522927519, - "learning_rate": 5.230900437426134e-08, - "loss": 0.18, + "epoch": 1.956368257554363, + "grad_norm": 4.787573359976234, + "learning_rate": 5.703788924663566e-06, + "loss": 0.7514, "step": 13855 }, { - "epoch": 2.9054309079471587, - "grad_norm": 6.1948843858583835, - "learning_rate": 5.2078005521315477e-08, - "loss": 0.1746, + "epoch": 1.956509460604349, + "grad_norm": 3.6040572966167015, + "learning_rate": 5.702412379470039e-06, + "loss": 0.5617, "step": 13856 }, { - "epoch": 2.9056405955126863, - "grad_norm": 3.4122949095594595, - "learning_rate": 5.184751651662545e-08, - "loss": 0.0912, + "epoch": 1.9566506636543348, + "grad_norm": 3.961236249244886, + "learning_rate": 5.701035934150592e-06, + "loss": 0.6041, "step": 13857 }, { - "epoch": 2.9058502830782134, - "grad_norm": 5.30846854473473, - "learning_rate": 5.161753737200403e-08, - "loss": 0.1461, + "epoch": 1.9567918667043207, + "grad_norm": 4.296631759491418, + "learning_rate": 5.6996595887372145e-06, + "loss": 0.7383, "step": 13858 }, { - "epoch": 2.906059970643741, - "grad_norm": 3.9785285506134485, - "learning_rate": 5.1388068099238465e-08, - "loss": 0.1301, + "epoch": 1.9569330697543066, + "grad_norm": 3.292921823211725, + "learning_rate": 5.698283343261885e-06, + "loss": 0.4993, "step": 13859 }, { - "epoch": 2.9062696582092684, - "grad_norm": 4.20944891432764, - "learning_rate": 5.115910871009045e-08, - "loss": 0.1384, + "epoch": 1.9570742728042925, + "grad_norm": 4.45270915797289, + "learning_rate": 5.696907197756598e-06, + "loss": 0.7326, "step": 13860 }, { - "epoch": 2.9064793457747955, - "grad_norm": 4.480922277547946, - "learning_rate": 5.093065921629281e-08, - "loss": 0.1462, + "epoch": 1.9572154758542784, + "grad_norm": 4.577343328969044, + "learning_rate": 5.695531152253328e-06, + "loss": 0.5969, "step": 13861 }, { - "epoch": 2.906689033340323, - "grad_norm": 4.278201349934148, - "learning_rate": 5.070271962955509e-08, - "loss": 0.1201, + "epoch": 1.9573566789042642, + "grad_norm": 3.682938802122358, + "learning_rate": 5.694155206784061e-06, + "loss": 0.5649, "step": 13862 }, { - "epoch": 2.9068987209058506, - "grad_norm": 4.651361254412583, - "learning_rate": 5.0475289961557926e-08, - "loss": 0.141, + "epoch": 1.9574978819542501, + "grad_norm": 3.8205525043946285, + "learning_rate": 5.692779361380762e-06, + "loss": 0.4869, "step": 13863 }, { - "epoch": 2.9071084084713776, - "grad_norm": 4.22654048225129, - "learning_rate": 5.0248370223959786e-08, - "loss": 0.1356, + "epoch": 1.957639085004236, + "grad_norm": 3.8363379839323826, + "learning_rate": 5.691403616075409e-06, + "loss": 0.5763, "step": 13864 }, { - "epoch": 2.907318096036905, - "grad_norm": 3.24158329116449, - "learning_rate": 5.0021960428390245e-08, - "loss": 0.1053, + "epoch": 1.9577802880542219, + "grad_norm": 2.8956428080460306, + "learning_rate": 5.690027970899976e-06, + "loss": 0.4346, "step": 13865 }, { - "epoch": 2.9075277836024322, - "grad_norm": 4.4276829740959185, - "learning_rate": 4.9796060586451146e-08, - "loss": 0.147, + "epoch": 1.9579214911042078, + "grad_norm": 3.619008266533831, + "learning_rate": 5.688652425886434e-06, + "loss": 0.5115, "step": 13866 }, { - "epoch": 2.9077374711679598, - "grad_norm": 4.670308421373421, - "learning_rate": 4.95706707097221e-08, - "loss": 0.1478, + "epoch": 1.9580626941541937, + "grad_norm": 3.8200475597530823, + "learning_rate": 5.687276981066746e-06, + "loss": 0.5654, "step": 13867 }, { - "epoch": 2.907947158733487, - "grad_norm": 4.05677502817684, - "learning_rate": 4.934579080975499e-08, - "loss": 0.1283, + "epoch": 1.9582038972041795, + "grad_norm": 3.179062620492597, + "learning_rate": 5.6859016364728795e-06, + "loss": 0.6054, "step": 13868 }, { - "epoch": 2.9081568462990144, - "grad_norm": 4.501895783570169, - "learning_rate": 4.9121420898073924e-08, - "loss": 0.1794, + "epoch": 1.9583451002541654, + "grad_norm": 3.40841421157824, + "learning_rate": 5.684526392136795e-06, + "loss": 0.5713, "step": 13869 }, { - "epoch": 2.908366533864542, - "grad_norm": 4.106850386948959, - "learning_rate": 4.8897560986179706e-08, - "loss": 0.1453, + "epoch": 1.9584863033041513, + "grad_norm": 3.525822698580449, + "learning_rate": 5.683151248090455e-06, + "loss": 0.484, "step": 13870 }, { - "epoch": 2.908576221430069, - "grad_norm": 4.2627163870116815, - "learning_rate": 4.867421108554427e-08, - "loss": 0.1363, + "epoch": 1.9586275063541372, + "grad_norm": 4.3823293831094166, + "learning_rate": 5.681776204365814e-06, + "loss": 0.7148, "step": 13871 }, { - "epoch": 2.9087859089955965, - "grad_norm": 3.5400293298784837, - "learning_rate": 4.845137120761512e-08, - "loss": 0.0897, + "epoch": 1.958768709404123, + "grad_norm": 4.239649864259085, + "learning_rate": 5.68040126099483e-06, + "loss": 0.8033, "step": 13872 }, { - "epoch": 2.908995596561124, - "grad_norm": 5.8029248200901025, - "learning_rate": 4.822904136381312e-08, - "loss": 0.2023, + "epoch": 1.958909912454109, + "grad_norm": 3.7789079900543023, + "learning_rate": 5.679026418009455e-06, + "loss": 0.6037, "step": 13873 }, { - "epoch": 2.909205284126651, - "grad_norm": 3.275673715138521, - "learning_rate": 4.800722156553361e-08, - "loss": 0.0994, + "epoch": 1.9590511155040948, + "grad_norm": 3.4733570263843503, + "learning_rate": 5.67765167544164e-06, + "loss": 0.5751, "step": 13874 }, { - "epoch": 2.9094149716921787, - "grad_norm": 4.386439585549887, - "learning_rate": 4.778591182414527e-08, - "loss": 0.1242, + "epoch": 1.9591923185540807, + "grad_norm": 3.2631223086709347, + "learning_rate": 5.676277033323333e-06, + "loss": 0.462, "step": 13875 }, { - "epoch": 2.909624659257706, - "grad_norm": 4.7506408851180035, - "learning_rate": 4.756511215098902e-08, - "loss": 0.163, + "epoch": 1.9593335216040666, + "grad_norm": 4.229301642082372, + "learning_rate": 5.674902491686477e-06, + "loss": 0.5815, "step": 13876 }, { - "epoch": 2.9098343468232333, - "grad_norm": 4.985320959921978, - "learning_rate": 4.734482255738359e-08, - "loss": 0.1488, + "epoch": 1.9594747246540525, + "grad_norm": 4.868895194444744, + "learning_rate": 5.673528050563023e-06, + "loss": 0.5287, "step": 13877 }, { - "epoch": 2.910044034388761, - "grad_norm": 5.124547729074882, - "learning_rate": 4.7125043054617734e-08, - "loss": 0.1471, + "epoch": 1.9596159277040384, + "grad_norm": 4.160999733429085, + "learning_rate": 5.672153709984909e-06, + "loss": 0.5596, "step": 13878 }, { - "epoch": 2.9102537219542883, - "grad_norm": 5.114608704899934, - "learning_rate": 4.6905773653955764e-08, - "loss": 0.136, + "epoch": 1.9597571307540242, + "grad_norm": 3.0742995196513743, + "learning_rate": 5.670779469984077e-06, + "loss": 0.4386, "step": 13879 }, { - "epoch": 2.9104634095198154, - "grad_norm": 4.506309649445351, - "learning_rate": 4.6687014366635363e-08, - "loss": 0.1436, + "epoch": 1.9598983338040101, + "grad_norm": 3.754947539688105, + "learning_rate": 5.669405330592457e-06, + "loss": 0.5171, "step": 13880 }, { - "epoch": 2.910673097085343, - "grad_norm": 4.053470722084693, - "learning_rate": 4.646876520386867e-08, - "loss": 0.1426, + "epoch": 1.960039536853996, + "grad_norm": 3.681520220121038, + "learning_rate": 5.668031291841982e-06, + "loss": 0.5486, "step": 13881 }, { - "epoch": 2.9108827846508705, - "grad_norm": 4.134292669077508, - "learning_rate": 4.6251026176841186e-08, - "loss": 0.1392, + "epoch": 1.960180739903982, + "grad_norm": 4.254995752281185, + "learning_rate": 5.666657353764594e-06, + "loss": 0.7035, "step": 13882 }, { - "epoch": 2.9110924722163976, - "grad_norm": 3.9967663684574695, - "learning_rate": 4.603379729671287e-08, - "loss": 0.1261, + "epoch": 1.9603219429539678, + "grad_norm": 3.3751069009941186, + "learning_rate": 5.6652835163922175e-06, + "loss": 0.5481, "step": 13883 }, { - "epoch": 2.911302159781925, - "grad_norm": 3.758979553684162, - "learning_rate": 4.5817078574615926e-08, - "loss": 0.1299, + "epoch": 1.9604631460039537, + "grad_norm": 3.0393066593083886, + "learning_rate": 5.66390977975678e-06, + "loss": 0.4687, "step": 13884 }, { - "epoch": 2.911511847347452, - "grad_norm": 3.982747293125947, - "learning_rate": 4.560087002165925e-08, - "loss": 0.1208, + "epoch": 1.9606043490539395, + "grad_norm": 3.4159779750162085, + "learning_rate": 5.662536143890207e-06, + "loss": 0.4796, "step": 13885 }, { - "epoch": 2.9117215349129797, - "grad_norm": 4.438562871389678, - "learning_rate": 4.5385171648922864e-08, - "loss": 0.1231, + "epoch": 1.9607455521039254, + "grad_norm": 4.033926258600158, + "learning_rate": 5.66116260882442e-06, + "loss": 0.5418, "step": 13886 }, { - "epoch": 2.9119312224785068, - "grad_norm": 3.768071921602696, - "learning_rate": 4.516998346746126e-08, - "loss": 0.1314, + "epoch": 1.9608867551539113, + "grad_norm": 3.2682517018447785, + "learning_rate": 5.65978917459134e-06, + "loss": 0.503, "step": 13887 }, { - "epoch": 2.9121409100440343, - "grad_norm": 5.303100985996429, - "learning_rate": 4.495530548830451e-08, - "loss": 0.1632, + "epoch": 1.9610279582038972, + "grad_norm": 4.835351631156877, + "learning_rate": 5.658415841222887e-06, + "loss": 0.7347, "step": 13888 }, { - "epoch": 2.912350597609562, - "grad_norm": 3.6550054292674794, - "learning_rate": 4.474113772245381e-08, - "loss": 0.1212, + "epoch": 1.961169161253883, + "grad_norm": 3.618412470826112, + "learning_rate": 5.657042608750973e-06, + "loss": 0.5863, "step": 13889 }, { - "epoch": 2.912560285175089, - "grad_norm": 3.974327290387412, - "learning_rate": 4.452748018088704e-08, - "loss": 0.1219, + "epoch": 1.961310364303869, + "grad_norm": 3.7088054160165806, + "learning_rate": 5.655669477207513e-06, + "loss": 0.4978, "step": 13890 }, { - "epoch": 2.9127699727406164, - "grad_norm": 5.213483999082169, - "learning_rate": 4.431433287455322e-08, - "loss": 0.1525, + "epoch": 1.9614515673538548, + "grad_norm": 3.1364006258765063, + "learning_rate": 5.6542964466244185e-06, + "loss": 0.4523, "step": 13891 }, { - "epoch": 2.912979660306144, - "grad_norm": 3.425745334716992, - "learning_rate": 4.4101695814378066e-08, - "loss": 0.1329, + "epoch": 1.9615927704038407, + "grad_norm": 3.764609404713001, + "learning_rate": 5.6529235170335995e-06, + "loss": 0.639, "step": 13892 }, { - "epoch": 2.913189347871671, - "grad_norm": 3.5104345243880095, - "learning_rate": 4.3889569011257296e-08, - "loss": 0.0966, + "epoch": 1.9617339734538266, + "grad_norm": 3.1898886127301855, + "learning_rate": 5.651550688466956e-06, + "loss": 0.5032, "step": 13893 }, { - "epoch": 2.9133990354371986, - "grad_norm": 4.969545049795125, - "learning_rate": 4.367795247606443e-08, - "loss": 0.1629, + "epoch": 1.9618751765038125, + "grad_norm": 4.015065969944727, + "learning_rate": 5.650177960956399e-06, + "loss": 0.5573, "step": 13894 }, { - "epoch": 2.913608723002726, - "grad_norm": 3.4012278642741545, - "learning_rate": 4.346684621964636e-08, - "loss": 0.1196, + "epoch": 1.9620163795537984, + "grad_norm": 2.809500306937714, + "learning_rate": 5.648805334533829e-06, + "loss": 0.3814, "step": 13895 }, { - "epoch": 2.913818410568253, - "grad_norm": 4.59595447939931, - "learning_rate": 4.3256250252818874e-08, - "loss": 0.1269, + "epoch": 1.9621575826037843, + "grad_norm": 2.877753316208816, + "learning_rate": 5.647432809231147e-06, + "loss": 0.4398, "step": 13896 }, { - "epoch": 2.9140280981337807, - "grad_norm": 4.255845890378312, - "learning_rate": 4.304616458637889e-08, - "loss": 0.1475, + "epoch": 1.9622987856537701, + "grad_norm": 3.7623274377365536, + "learning_rate": 5.6460603850802435e-06, + "loss": 0.5072, "step": 13897 }, { - "epoch": 2.9142377856993082, - "grad_norm": 4.566365290259029, - "learning_rate": 4.283658923109335e-08, - "loss": 0.1582, + "epoch": 1.962439988703756, + "grad_norm": 3.3750408930654276, + "learning_rate": 5.644688062113012e-06, + "loss": 0.4831, "step": 13898 }, { - "epoch": 2.9144474732648353, - "grad_norm": 4.368190747496577, - "learning_rate": 4.262752419770033e-08, - "loss": 0.1482, + "epoch": 1.962581191753742, + "grad_norm": 3.9120817849490495, + "learning_rate": 5.6433158403613516e-06, + "loss": 0.6725, "step": 13899 }, { - "epoch": 2.914657160830363, - "grad_norm": 4.859932985690303, - "learning_rate": 4.241896949691793e-08, - "loss": 0.1771, + "epoch": 1.9627223948037278, + "grad_norm": 3.421279744670933, + "learning_rate": 5.6419437198571525e-06, + "loss": 0.5124, "step": 13900 }, { - "epoch": 2.9148668483958904, - "grad_norm": 3.5907133471378576, - "learning_rate": 4.2210925139434255e-08, - "loss": 0.1185, + "epoch": 1.9628635978537137, + "grad_norm": 3.1077835342019053, + "learning_rate": 5.640571700632298e-06, + "loss": 0.389, "step": 13901 }, { - "epoch": 2.9150765359614175, - "grad_norm": 7.081136643831514, - "learning_rate": 4.200339113591079e-08, - "loss": 0.1642, + "epoch": 1.9630048009036996, + "grad_norm": 3.5840775895832673, + "learning_rate": 5.639199782718674e-06, + "loss": 0.5772, "step": 13902 }, { - "epoch": 2.915286223526945, - "grad_norm": 4.187403115532427, - "learning_rate": 4.1796367496985677e-08, - "loss": 0.1366, + "epoch": 1.9631460039536854, + "grad_norm": 4.651180117211612, + "learning_rate": 5.6378279661481635e-06, + "loss": 0.7408, "step": 13903 }, { - "epoch": 2.915495911092472, - "grad_norm": 5.160077260832875, - "learning_rate": 4.1589854233268225e-08, - "loss": 0.1804, + "epoch": 1.9632872070036713, + "grad_norm": 4.408671616319314, + "learning_rate": 5.6364562509526474e-06, + "loss": 0.7948, "step": 13904 }, { - "epoch": 2.9157055986579996, - "grad_norm": 4.262358913060092, - "learning_rate": 4.138385135534218e-08, - "loss": 0.1318, + "epoch": 1.9634284100536572, + "grad_norm": 4.1238726425020324, + "learning_rate": 5.635084637164005e-06, + "loss": 0.4954, "step": 13905 }, { - "epoch": 2.9159152862235267, - "grad_norm": 3.5770074583303066, - "learning_rate": 4.117835887376576e-08, - "loss": 0.1054, + "epoch": 1.963569613103643, + "grad_norm": 2.8992526370140292, + "learning_rate": 5.6337131248141085e-06, + "loss": 0.4686, "step": 13906 }, { - "epoch": 2.916124973789054, - "grad_norm": 3.050250517123977, - "learning_rate": 4.0973376799072764e-08, - "loss": 0.0912, + "epoch": 1.963710816153629, + "grad_norm": 3.407495166592999, + "learning_rate": 5.632341713934835e-06, + "loss": 0.4823, "step": 13907 }, { - "epoch": 2.9163346613545817, - "grad_norm": 3.860480659267379, - "learning_rate": 4.0768905141765905e-08, - "loss": 0.1206, + "epoch": 1.9638520192036149, + "grad_norm": 3.595777180267852, + "learning_rate": 5.630970404558053e-06, + "loss": 0.503, "step": 13908 }, { - "epoch": 2.916544348920109, - "grad_norm": 4.584263218329184, - "learning_rate": 4.0564943912325686e-08, - "loss": 0.1423, + "epoch": 1.9639932222536007, + "grad_norm": 4.500689128465977, + "learning_rate": 5.629599196715631e-06, + "loss": 0.7194, "step": 13909 }, { - "epoch": 2.9167540364856364, - "grad_norm": 3.5705741797122394, - "learning_rate": 4.0361493121207074e-08, - "loss": 0.1154, + "epoch": 1.9641344253035866, + "grad_norm": 3.8618813448864673, + "learning_rate": 5.628228090439434e-06, + "loss": 0.5356, "step": 13910 }, { - "epoch": 2.916963724051164, - "grad_norm": 5.100120065401551, - "learning_rate": 4.015855277883507e-08, - "loss": 0.2026, + "epoch": 1.9642756283535725, + "grad_norm": 2.9372484931710106, + "learning_rate": 5.626857085761331e-06, + "loss": 0.4678, "step": 13911 }, { - "epoch": 2.917173411616691, - "grad_norm": 4.196606669747442, - "learning_rate": 3.995612289561246e-08, - "loss": 0.126, + "epoch": 1.9644168314035584, + "grad_norm": 3.9847835672732965, + "learning_rate": 5.625486182713181e-06, + "loss": 0.6024, "step": 13912 }, { - "epoch": 2.9173830991822185, - "grad_norm": 3.589288420317563, - "learning_rate": 3.975420348191317e-08, - "loss": 0.131, + "epoch": 1.9645580344535443, + "grad_norm": 3.7275966429078693, + "learning_rate": 5.624115381326847e-06, + "loss": 0.5785, "step": 13913 }, { - "epoch": 2.917592786747746, - "grad_norm": 3.7852268865655376, - "learning_rate": 3.955279454808558e-08, - "loss": 0.1126, + "epoch": 1.9646992375035301, + "grad_norm": 4.195000586687316, + "learning_rate": 5.622744681634178e-06, + "loss": 0.6171, "step": 13914 }, { - "epoch": 2.917802474313273, - "grad_norm": 4.663914447100712, - "learning_rate": 3.935189610445367e-08, - "loss": 0.1307, + "epoch": 1.964840440553516, + "grad_norm": 3.992996347644937, + "learning_rate": 5.6213740836670285e-06, + "loss": 0.6246, "step": 13915 }, { - "epoch": 2.9180121618788006, - "grad_norm": 4.7435244379552755, - "learning_rate": 3.915150816131252e-08, - "loss": 0.1392, + "epoch": 1.964981643603502, + "grad_norm": 3.8691525332238594, + "learning_rate": 5.620003587457258e-06, + "loss": 0.604, "step": 13916 }, { - "epoch": 2.918221849444328, - "grad_norm": 3.76151980783133, - "learning_rate": 3.895163072893282e-08, - "loss": 0.0946, + "epoch": 1.9651228466534878, + "grad_norm": 3.208862281212403, + "learning_rate": 5.618633193036712e-06, + "loss": 0.4634, "step": 13917 }, { - "epoch": 2.9184315370098552, - "grad_norm": 4.2346506394165715, - "learning_rate": 3.8752263817558586e-08, - "loss": 0.1546, + "epoch": 1.9652640497034737, + "grad_norm": 3.3222301186778687, + "learning_rate": 5.617262900437239e-06, + "loss": 0.4698, "step": 13918 }, { - "epoch": 2.9186412245753828, - "grad_norm": 3.7144545055107625, - "learning_rate": 3.855340743740832e-08, - "loss": 0.1204, + "epoch": 1.9654052527534596, + "grad_norm": 3.166429665404633, + "learning_rate": 5.615892709690682e-06, + "loss": 0.4683, "step": 13919 }, { - "epoch": 2.9188509121409103, - "grad_norm": 3.5046941695673888, - "learning_rate": 3.835506159867164e-08, - "loss": 0.1198, + "epoch": 1.9655464558034454, + "grad_norm": 2.863714359211657, + "learning_rate": 5.6145226208288875e-06, + "loss": 0.4232, "step": 13920 }, { - "epoch": 2.9190605997064374, - "grad_norm": 4.6708525694880585, - "learning_rate": 3.815722631151708e-08, - "loss": 0.1518, + "epoch": 1.9656876588534313, + "grad_norm": 4.100573021444812, + "learning_rate": 5.613152633883692e-06, + "loss": 0.6357, "step": 13921 }, { - "epoch": 2.919270287271965, - "grad_norm": 4.127380332846929, - "learning_rate": 3.7959901586082096e-08, - "loss": 0.1363, + "epoch": 1.9658288619034172, + "grad_norm": 3.5722495907989646, + "learning_rate": 5.611782748886935e-06, + "loss": 0.502, "step": 13922 }, { - "epoch": 2.919479974837492, - "grad_norm": 4.66279199582513, - "learning_rate": 3.7763087432480805e-08, - "loss": 0.1692, + "epoch": 1.965970064953403, + "grad_norm": 3.3816632883810063, + "learning_rate": 5.610412965870451e-06, + "loss": 0.4894, "step": 13923 }, { - "epoch": 2.9196896624030195, - "grad_norm": 4.947903623887908, - "learning_rate": 3.7566783860798485e-08, - "loss": 0.1626, + "epoch": 1.966111268003389, + "grad_norm": 2.8731571946390018, + "learning_rate": 5.609043284866076e-06, + "loss": 0.3761, "step": 13924 }, { - "epoch": 2.9198993499685466, - "grad_norm": 4.0221018937736, - "learning_rate": 3.73709908810993e-08, - "loss": 0.137, + "epoch": 1.9662524710533749, + "grad_norm": 4.693626033876007, + "learning_rate": 5.607673705905636e-06, + "loss": 0.7438, "step": 13925 }, { - "epoch": 2.920109037534074, - "grad_norm": 3.910044626830158, - "learning_rate": 3.717570850341523e-08, - "loss": 0.1048, + "epoch": 1.9663936741033607, + "grad_norm": 3.3760792116896168, + "learning_rate": 5.606304229020965e-06, + "loss": 0.5802, "step": 13926 }, { - "epoch": 2.9203187250996017, - "grad_norm": 4.458567824605001, - "learning_rate": 3.698093673775605e-08, - "loss": 0.1181, + "epoch": 1.9665348771533466, + "grad_norm": 2.86667764898897, + "learning_rate": 5.604934854243879e-06, + "loss": 0.4117, "step": 13927 }, { - "epoch": 2.9205284126651287, - "grad_norm": 5.213427102620994, - "learning_rate": 3.678667559410376e-08, - "loss": 0.1819, + "epoch": 1.9666760802033325, + "grad_norm": 4.072060239343097, + "learning_rate": 5.603565581606216e-06, + "loss": 0.5244, "step": 13928 }, { - "epoch": 2.9207381002306563, - "grad_norm": 5.659916239836797, - "learning_rate": 3.659292508241485e-08, - "loss": 0.1268, + "epoch": 1.9668172832533184, + "grad_norm": 3.449454347487301, + "learning_rate": 5.602196411139793e-06, + "loss": 0.4985, "step": 13929 }, { - "epoch": 2.920947787796184, - "grad_norm": 4.477291676708218, - "learning_rate": 3.639968521261805e-08, - "loss": 0.1506, + "epoch": 1.9669584863033043, + "grad_norm": 3.415834885954082, + "learning_rate": 5.600827342876422e-06, + "loss": 0.5998, "step": 13930 }, { - "epoch": 2.921157475361711, - "grad_norm": 5.449949705498544, - "learning_rate": 3.620695599461987e-08, - "loss": 0.1412, + "epoch": 1.9670996893532902, + "grad_norm": 3.528506396413141, + "learning_rate": 5.599458376847924e-06, + "loss": 0.4795, "step": 13931 }, { - "epoch": 2.9213671629272384, - "grad_norm": 4.394465920280594, - "learning_rate": 3.601473743829576e-08, - "loss": 0.1508, + "epoch": 1.967240892403276, + "grad_norm": 4.002089303746843, + "learning_rate": 5.598089513086108e-06, + "loss": 0.6146, "step": 13932 }, { - "epoch": 2.921576850492766, - "grad_norm": 2.816249277826936, - "learning_rate": 3.582302955349781e-08, - "loss": 0.1038, + "epoch": 1.967382095453262, + "grad_norm": 3.0284453771532127, + "learning_rate": 5.596720751622795e-06, + "loss": 0.5652, "step": 13933 }, { - "epoch": 2.921786538058293, - "grad_norm": 4.175153740127488, - "learning_rate": 3.563183235005263e-08, - "loss": 0.1137, + "epoch": 1.9675232985032478, + "grad_norm": 3.3330797818966245, + "learning_rate": 5.595352092489791e-06, + "loss": 0.473, "step": 13934 }, { - "epoch": 2.9219962256238206, - "grad_norm": 3.33408377772412, - "learning_rate": 3.544114583775682e-08, - "loss": 0.1333, + "epoch": 1.9676645015532337, + "grad_norm": 4.421924599067661, + "learning_rate": 5.593983535718901e-06, + "loss": 0.6596, "step": 13935 }, { - "epoch": 2.922205913189348, - "grad_norm": 4.340920796641978, - "learning_rate": 3.525097002638589e-08, - "loss": 0.1344, + "epoch": 1.9678057046032196, + "grad_norm": 2.9881172561662677, + "learning_rate": 5.592615081341932e-06, + "loss": 0.4221, "step": 13936 }, { - "epoch": 2.922415600754875, - "grad_norm": 3.6627366153789804, - "learning_rate": 3.506130492568649e-08, - "loss": 0.1045, + "epoch": 1.9679469076532055, + "grad_norm": 3.2234892751422155, + "learning_rate": 5.591246729390685e-06, + "loss": 0.5177, "step": 13937 }, { - "epoch": 2.9226252883204027, - "grad_norm": 2.814032885182091, - "learning_rate": 3.48721505453764e-08, - "loss": 0.0935, + "epoch": 1.9680881107031913, + "grad_norm": 3.3884810705597057, + "learning_rate": 5.589878479896959e-06, + "loss": 0.5358, "step": 13938 }, { - "epoch": 2.92283497588593, - "grad_norm": 6.412692403706989, - "learning_rate": 3.468350689515343e-08, - "loss": 0.1779, + "epoch": 1.9682293137531772, + "grad_norm": 3.0319844307912023, + "learning_rate": 5.588510332892553e-06, + "loss": 0.4164, "step": 13939 }, { - "epoch": 2.9230446634514573, - "grad_norm": 4.920794573151626, - "learning_rate": 3.4495373984685385e-08, - "loss": 0.1331, + "epoch": 1.968370516803163, + "grad_norm": 4.580975329435757, + "learning_rate": 5.587142288409262e-06, + "loss": 0.739, "step": 13940 }, { - "epoch": 2.923254351016985, - "grad_norm": 4.904411245461035, - "learning_rate": 3.4307751823613454e-08, - "loss": 0.1597, + "epoch": 1.968511719853149, + "grad_norm": 3.5589939560250414, + "learning_rate": 5.585774346478878e-06, + "loss": 0.4759, "step": 13941 }, { - "epoch": 2.923464038582512, - "grad_norm": 4.414442687097795, - "learning_rate": 3.412064042155327e-08, - "loss": 0.1225, + "epoch": 1.9686529229031349, + "grad_norm": 3.354972960077759, + "learning_rate": 5.584406507133191e-06, + "loss": 0.4603, "step": 13942 }, { - "epoch": 2.9236737261480394, - "grad_norm": 5.052710193565847, - "learning_rate": 3.393403978809495e-08, - "loss": 0.1619, + "epoch": 1.9687941259531205, + "grad_norm": 4.237736647935482, + "learning_rate": 5.583038770403991e-06, + "loss": 0.6349, "step": 13943 }, { - "epoch": 2.9238834137135665, - "grad_norm": 3.4007471547207544, - "learning_rate": 3.374794993280306e-08, - "loss": 0.1331, + "epoch": 1.9689353290031064, + "grad_norm": 3.055197444779421, + "learning_rate": 5.581671136323059e-06, + "loss": 0.4982, "step": 13944 }, { - "epoch": 2.924093101279094, - "grad_norm": 3.748072517271522, - "learning_rate": 3.356237086521441e-08, - "loss": 0.1373, + "epoch": 1.9690765320530923, + "grad_norm": 4.055883558537785, + "learning_rate": 5.580303604922185e-06, + "loss": 0.603, "step": 13945 }, { - "epoch": 2.9243027888446216, - "grad_norm": 3.7353635029820103, - "learning_rate": 3.3377302594839176e-08, - "loss": 0.1249, + "epoch": 1.9692177351030782, + "grad_norm": 3.8350020412000654, + "learning_rate": 5.578936176233149e-06, + "loss": 0.5332, "step": 13946 }, { - "epoch": 2.9245124764101487, - "grad_norm": 3.416482306283054, - "learning_rate": 3.3192745131164216e-08, - "loss": 0.1291, + "epoch": 1.969358938153064, + "grad_norm": 4.16051544313356, + "learning_rate": 5.577568850287725e-06, + "loss": 0.5598, "step": 13947 }, { - "epoch": 2.924722163975676, - "grad_norm": 6.730602859460566, - "learning_rate": 3.300869848364752e-08, - "loss": 0.1552, + "epoch": 1.96950014120305, + "grad_norm": 2.9042158298248864, + "learning_rate": 5.576201627117691e-06, + "loss": 0.416, "step": 13948 }, { - "epoch": 2.9249318515412037, - "grad_norm": 4.316560544410731, - "learning_rate": 3.282516266172264e-08, - "loss": 0.1118, + "epoch": 1.9696413442530358, + "grad_norm": 4.018647805313028, + "learning_rate": 5.574834506754821e-06, + "loss": 0.7465, "step": 13949 }, { - "epoch": 2.925141539106731, - "grad_norm": 3.5773092827211177, - "learning_rate": 3.264213767479429e-08, - "loss": 0.1204, + "epoch": 1.9697825473030217, + "grad_norm": 4.392224314694918, + "learning_rate": 5.573467489230879e-06, + "loss": 0.6769, "step": 13950 }, { - "epoch": 2.9253512266722583, - "grad_norm": 2.937889099626319, - "learning_rate": 3.2459623532243856e-08, - "loss": 0.0917, + "epoch": 1.9699237503530076, + "grad_norm": 3.3874195985228392, + "learning_rate": 5.572100574577648e-06, + "loss": 0.5016, "step": 13951 }, { - "epoch": 2.925560914237786, - "grad_norm": 3.825910843634259, - "learning_rate": 3.227762024342607e-08, - "loss": 0.1049, + "epoch": 1.9700649534029935, + "grad_norm": 3.3222848503476783, + "learning_rate": 5.5707337628268864e-06, + "loss": 0.431, "step": 13952 }, { - "epoch": 2.925770601803313, - "grad_norm": 4.3420650901909905, - "learning_rate": 3.209612781766791e-08, - "loss": 0.142, + "epoch": 1.9702061564529794, + "grad_norm": 3.174616566109569, + "learning_rate": 5.56936705401036e-06, + "loss": 0.4875, "step": 13953 }, { - "epoch": 2.9259802893688405, - "grad_norm": 3.7042870581956504, - "learning_rate": 3.191514626427195e-08, - "loss": 0.0924, + "epoch": 1.9703473595029652, + "grad_norm": 3.0603360462735836, + "learning_rate": 5.568000448159828e-06, + "loss": 0.4486, "step": 13954 }, { - "epoch": 2.926189976934368, - "grad_norm": 4.123512173707741, - "learning_rate": 3.173467559251298e-08, - "loss": 0.113, + "epoch": 1.9704885625529511, + "grad_norm": 3.299887407522103, + "learning_rate": 5.566633945307053e-06, + "loss": 0.5137, "step": 13955 }, { - "epoch": 2.926399664499895, - "grad_norm": 3.389649384580066, - "learning_rate": 3.155471581164138e-08, - "loss": 0.1012, + "epoch": 1.970629765602937, + "grad_norm": 3.450401027187521, + "learning_rate": 5.56526754548379e-06, + "loss": 0.419, "step": 13956 }, { - "epoch": 2.9266093520654226, - "grad_norm": 5.268738340406482, - "learning_rate": 3.1375266930880884e-08, - "loss": 0.1404, + "epoch": 1.970770968652923, + "grad_norm": 3.2015975919602893, + "learning_rate": 5.5639012487217945e-06, + "loss": 0.4849, "step": 13957 }, { - "epoch": 2.92681903963095, - "grad_norm": 3.2101388296053615, - "learning_rate": 3.1196328959426367e-08, - "loss": 0.1056, + "epoch": 1.9709121717029088, + "grad_norm": 3.527122845361631, + "learning_rate": 5.562535055052818e-06, + "loss": 0.4604, "step": 13958 }, { - "epoch": 2.927028727196477, - "grad_norm": 4.122237853021114, - "learning_rate": 3.101790190645049e-08, - "loss": 0.1306, + "epoch": 1.9710533747528947, + "grad_norm": 3.054213717330431, + "learning_rate": 5.56116896450861e-06, + "loss": 0.3933, "step": 13959 }, { - "epoch": 2.9272384147620047, - "grad_norm": 3.9863838285827797, - "learning_rate": 3.0839985781098144e-08, - "loss": 0.1207, + "epoch": 1.9711945778028805, + "grad_norm": 3.7400792040283166, + "learning_rate": 5.559802977120918e-06, + "loss": 0.535, "step": 13960 }, { - "epoch": 2.927448102327532, - "grad_norm": 4.4089049057234, - "learning_rate": 3.0662580592486503e-08, - "loss": 0.148, + "epoch": 1.9713357808528664, + "grad_norm": 4.0698205520909925, + "learning_rate": 5.5584370929214874e-06, + "loss": 0.6218, "step": 13961 }, { - "epoch": 2.9276577898930594, - "grad_norm": 3.9135444398438355, - "learning_rate": 3.048568634970828e-08, - "loss": 0.1343, + "epoch": 1.9714769839028523, + "grad_norm": 3.94278740280085, + "learning_rate": 5.557071311942057e-06, + "loss": 0.5543, "step": 13962 }, { - "epoch": 2.9278674774585864, - "grad_norm": 5.186688028911975, - "learning_rate": 3.030930306182955e-08, - "loss": 0.1549, + "epoch": 1.9716181869528382, + "grad_norm": 3.9508432938343194, + "learning_rate": 5.555705634214379e-06, + "loss": 0.5875, "step": 13963 }, { - "epoch": 2.928077165024114, - "grad_norm": 6.640647039937862, - "learning_rate": 3.013343073789088e-08, - "loss": 0.1689, + "epoch": 1.971759390002824, + "grad_norm": 3.2862039895587145, + "learning_rate": 5.554340059770178e-06, + "loss": 0.492, "step": 13964 }, { - "epoch": 2.9282868525896415, - "grad_norm": 3.464229211126237, - "learning_rate": 2.995806938690615e-08, - "loss": 0.1127, + "epoch": 1.97190059305281, + "grad_norm": 3.2279175421057773, + "learning_rate": 5.552974588641194e-06, + "loss": 0.5059, "step": 13965 }, { - "epoch": 2.9284965401551686, - "grad_norm": 3.4243503063960214, - "learning_rate": 2.978321901786263e-08, - "loss": 0.0996, + "epoch": 1.9720417961027958, + "grad_norm": 4.09232669890172, + "learning_rate": 5.55160922085916e-06, + "loss": 0.6931, "step": 13966 }, { - "epoch": 2.928706227720696, - "grad_norm": 4.351702447743387, - "learning_rate": 2.9608879639722032e-08, - "loss": 0.1344, + "epoch": 1.9721829991527817, + "grad_norm": 3.1460761943533098, + "learning_rate": 5.550243956455803e-06, + "loss": 0.5708, "step": 13967 }, { - "epoch": 2.9289159152862236, - "grad_norm": 3.3542825491752444, - "learning_rate": 2.9435051261417213e-08, - "loss": 0.1062, + "epoch": 1.9723242022027676, + "grad_norm": 4.265408895571942, + "learning_rate": 5.54887879546286e-06, + "loss": 0.6534, "step": 13968 }, { - "epoch": 2.9291256028517507, - "grad_norm": 4.08233877682267, - "learning_rate": 2.9261733891861043e-08, - "loss": 0.1421, + "epoch": 1.9724654052527535, + "grad_norm": 4.556159982190725, + "learning_rate": 5.5475137379120495e-06, + "loss": 0.6344, "step": 13969 }, { - "epoch": 2.9293352904172782, - "grad_norm": 3.474413169464223, - "learning_rate": 2.9088927539934197e-08, - "loss": 0.0908, + "epoch": 1.9726066083027394, + "grad_norm": 4.348446896672211, + "learning_rate": 5.546148783835097e-06, + "loss": 0.5987, "step": 13970 }, { - "epoch": 2.9295449779828058, - "grad_norm": 3.3475353472019425, - "learning_rate": 2.8916632214491813e-08, - "loss": 0.1253, + "epoch": 1.9727478113527253, + "grad_norm": 4.257840678802924, + "learning_rate": 5.5447839332637244e-06, + "loss": 0.6468, "step": 13971 }, { - "epoch": 2.929754665548333, - "grad_norm": 4.933767218081028, - "learning_rate": 2.8744847924366825e-08, - "loss": 0.1778, + "epoch": 1.9728890144027111, + "grad_norm": 3.9390308186014926, + "learning_rate": 5.5434191862296485e-06, + "loss": 0.5694, "step": 13972 }, { - "epoch": 2.9299643531138604, - "grad_norm": 4.478609169159007, - "learning_rate": 2.8573574678363304e-08, - "loss": 0.1525, + "epoch": 1.973030217452697, + "grad_norm": 3.3903924051176504, + "learning_rate": 5.542054542764586e-06, + "loss": 0.5587, "step": 13973 }, { - "epoch": 2.930174040679388, - "grad_norm": 3.3138384669875895, - "learning_rate": 2.840281248525867e-08, - "loss": 0.099, + "epoch": 1.973171420502683, + "grad_norm": 3.7751479427479837, + "learning_rate": 5.5406900029002504e-06, + "loss": 0.6604, "step": 13974 }, { - "epoch": 2.930383728244915, - "grad_norm": 3.3888198157895295, - "learning_rate": 2.82325613538037e-08, - "loss": 0.1241, + "epoch": 1.9733126235526688, + "grad_norm": 3.6552286972476113, + "learning_rate": 5.539325566668354e-06, + "loss": 0.5074, "step": 13975 }, { - "epoch": 2.9305934158104425, - "grad_norm": 3.7459067474753205, - "learning_rate": 2.8062821292725862e-08, - "loss": 0.1008, + "epoch": 1.9734538266026547, + "grad_norm": 3.1378279759055383, + "learning_rate": 5.5379612341006054e-06, + "loss": 0.4482, "step": 13976 }, { - "epoch": 2.93080310337597, - "grad_norm": 5.009107959812513, - "learning_rate": 2.789359231072486e-08, - "loss": 0.1735, + "epoch": 1.9735950296526403, + "grad_norm": 3.345437192555646, + "learning_rate": 5.53659700522871e-06, + "loss": 0.4989, "step": 13977 }, { - "epoch": 2.931012790941497, - "grad_norm": 3.656686547140454, - "learning_rate": 2.7724874416471536e-08, - "loss": 0.1044, + "epoch": 1.9737362327026262, + "grad_norm": 2.8573092317516964, + "learning_rate": 5.535232880084373e-06, + "loss": 0.3755, "step": 13978 }, { - "epoch": 2.9312224785070247, - "grad_norm": 4.434217021526881, - "learning_rate": 2.755666761861453e-08, - "loss": 0.1709, + "epoch": 1.973877435752612, + "grad_norm": 3.9713853927219116, + "learning_rate": 5.533868858699291e-06, + "loss": 0.5883, "step": 13979 }, { - "epoch": 2.9314321660725517, - "grad_norm": 4.5129939733528825, - "learning_rate": 2.738897192577472e-08, - "loss": 0.1376, + "epoch": 1.974018638802598, + "grad_norm": 3.3196485438358536, + "learning_rate": 5.532504941105176e-06, + "loss": 0.5752, "step": 13980 }, { - "epoch": 2.9316418536380793, - "grad_norm": 5.075825057230401, - "learning_rate": 2.7221787346547455e-08, - "loss": 0.1238, + "epoch": 1.9741598418525839, + "grad_norm": 3.1296260513888785, + "learning_rate": 5.5311411273337124e-06, + "loss": 0.4651, "step": 13981 }, { - "epoch": 2.9318515412036064, - "grad_norm": 3.3560917976551896, - "learning_rate": 2.7055113889499218e-08, - "loss": 0.0918, + "epoch": 1.9743010449025697, + "grad_norm": 3.323207368866015, + "learning_rate": 5.529777417416599e-06, + "loss": 0.5259, "step": 13982 }, { - "epoch": 2.932061228769134, - "grad_norm": 4.188881502857614, - "learning_rate": 2.6888951563175393e-08, - "loss": 0.1188, + "epoch": 1.9744422479525556, + "grad_norm": 4.118726109045666, + "learning_rate": 5.528413811385529e-06, + "loss": 0.6308, "step": 13983 }, { - "epoch": 2.9322709163346614, - "grad_norm": 4.896688883086082, - "learning_rate": 2.6723300376088058e-08, - "loss": 0.1726, + "epoch": 1.9745834510025415, + "grad_norm": 3.926211046539317, + "learning_rate": 5.527050309272187e-06, + "loss": 0.696, "step": 13984 }, { - "epoch": 2.9324806039001885, - "grad_norm": 3.5600686145757066, - "learning_rate": 2.6558160336731533e-08, - "loss": 0.1198, + "epoch": 1.9747246540525274, + "grad_norm": 3.35776541472489, + "learning_rate": 5.525686911108268e-06, + "loss": 0.5293, "step": 13985 }, { - "epoch": 2.932690291465716, - "grad_norm": 3.838743330537526, - "learning_rate": 2.6393531453565713e-08, - "loss": 0.128, + "epoch": 1.9748658571025133, + "grad_norm": 4.160218210045014, + "learning_rate": 5.524323616925452e-06, + "loss": 0.7608, "step": 13986 }, { - "epoch": 2.9328999790312436, - "grad_norm": 4.078330470646543, - "learning_rate": 2.6229413735030517e-08, - "loss": 0.1148, + "epoch": 1.9750070601524992, + "grad_norm": 4.087195533204512, + "learning_rate": 5.5229604267554235e-06, + "loss": 0.5117, "step": 13987 }, { - "epoch": 2.9331096665967706, - "grad_norm": 3.642321848227449, - "learning_rate": 2.6065807189535885e-08, - "loss": 0.1263, + "epoch": 1.975148263202485, + "grad_norm": 3.1495730488383806, + "learning_rate": 5.52159734062986e-06, + "loss": 0.4888, "step": 13988 }, { - "epoch": 2.933319354162298, - "grad_norm": 3.1172119729884744, - "learning_rate": 2.5902711825468442e-08, - "loss": 0.1291, + "epoch": 1.975289466252471, + "grad_norm": 3.1699354246854483, + "learning_rate": 5.520234358580441e-06, + "loss": 0.4805, "step": 13989 }, { - "epoch": 2.9335290417278257, - "grad_norm": 4.684465519876334, - "learning_rate": 2.574012765118594e-08, - "loss": 0.1429, + "epoch": 1.9754306693024568, + "grad_norm": 3.295478958188065, + "learning_rate": 5.5188714806388395e-06, + "loss": 0.4913, "step": 13990 }, { - "epoch": 2.9337387292933528, - "grad_norm": 3.01121543863825, - "learning_rate": 2.5578054675020613e-08, - "loss": 0.0866, + "epoch": 1.9755718723524427, + "grad_norm": 3.9948356257364153, + "learning_rate": 5.5175087068367315e-06, + "loss": 0.5636, "step": 13991 }, { - "epoch": 2.9339484168588803, - "grad_norm": 5.473591564101314, - "learning_rate": 2.5416492905280255e-08, - "loss": 0.1556, + "epoch": 1.9757130754024286, + "grad_norm": 3.379309028112032, + "learning_rate": 5.516146037205784e-06, + "loss": 0.4732, "step": 13992 }, { - "epoch": 2.934158104424408, - "grad_norm": 3.653811067465383, - "learning_rate": 2.5255442350244906e-08, - "loss": 0.1204, + "epoch": 1.9758542784524145, + "grad_norm": 3.6819397293516825, + "learning_rate": 5.514783471777667e-06, + "loss": 0.5421, "step": 13993 }, { - "epoch": 2.934367791989935, - "grad_norm": 3.698297934846077, - "learning_rate": 2.509490301816908e-08, - "loss": 0.1095, + "epoch": 1.9759954815024003, + "grad_norm": 4.014609068069663, + "learning_rate": 5.513421010584044e-06, + "loss": 0.6035, "step": 13994 }, { - "epoch": 2.9345774795554624, - "grad_norm": 3.5545899276913646, - "learning_rate": 2.4934874917279528e-08, - "loss": 0.1111, + "epoch": 1.9761366845523862, + "grad_norm": 4.125623895962094, + "learning_rate": 5.51205865365658e-06, + "loss": 0.6262, "step": 13995 }, { - "epoch": 2.93478716712099, - "grad_norm": 4.037017916389461, - "learning_rate": 2.4775358055777464e-08, - "loss": 0.1256, + "epoch": 1.976277887602372, + "grad_norm": 3.001239070149158, + "learning_rate": 5.51069640102693e-06, + "loss": 0.4428, "step": 13996 }, { - "epoch": 2.934996854686517, - "grad_norm": 5.076727774124706, - "learning_rate": 2.4616352441839685e-08, - "loss": 0.1411, + "epoch": 1.976419090652358, + "grad_norm": 4.119641359494949, + "learning_rate": 5.509334252726765e-06, + "loss": 0.5928, "step": 13997 }, { - "epoch": 2.9352065422520446, - "grad_norm": 3.7402751409933503, - "learning_rate": 2.4457858083616337e-08, - "loss": 0.1196, + "epoch": 1.9765602937023439, + "grad_norm": 3.021756214654428, + "learning_rate": 5.507972208787728e-06, + "loss": 0.4938, "step": 13998 }, { - "epoch": 2.9354162298175717, - "grad_norm": 4.331800431877805, - "learning_rate": 2.42998749892287e-08, - "loss": 0.1427, + "epoch": 1.9767014967523298, + "grad_norm": 3.3080244667737237, + "learning_rate": 5.506610269241479e-06, + "loss": 0.4522, "step": 13999 }, { - "epoch": 2.935625917383099, - "grad_norm": 3.1987137567685977, - "learning_rate": 2.414240316677474e-08, - "loss": 0.1106, + "epoch": 1.9768426998023156, + "grad_norm": 4.165893007373089, + "learning_rate": 5.505248434119666e-06, + "loss": 0.6541, "step": 14000 }, { - "epoch": 2.9358356049486263, - "grad_norm": 5.136016044489025, - "learning_rate": 2.3985442624325782e-08, - "loss": 0.121, + "epoch": 1.9769839028523015, + "grad_norm": 4.242147836666587, + "learning_rate": 5.503886703453933e-06, + "loss": 0.6373, "step": 14001 }, { - "epoch": 2.936045292514154, - "grad_norm": 5.107095871544669, - "learning_rate": 2.382899336992428e-08, - "loss": 0.1619, + "epoch": 1.9771251059022874, + "grad_norm": 3.7347190776120067, + "learning_rate": 5.502525077275937e-06, + "loss": 0.4888, "step": 14002 }, { - "epoch": 2.9362549800796813, - "grad_norm": 6.264521096526768, - "learning_rate": 2.3673055411589374e-08, - "loss": 0.1813, + "epoch": 1.9772663089522733, + "grad_norm": 3.6385314055490747, + "learning_rate": 5.501163555617315e-06, + "loss": 0.5523, "step": 14003 }, { - "epoch": 2.9364646676452084, - "grad_norm": 4.228474019707086, - "learning_rate": 2.351762875731467e-08, - "loss": 0.1397, + "epoch": 1.9774075120022592, + "grad_norm": 3.533821249691839, + "learning_rate": 5.499802138509709e-06, + "loss": 0.5368, "step": 14004 }, { - "epoch": 2.936674355210736, - "grad_norm": 4.310940557719353, - "learning_rate": 2.33627134150638e-08, - "loss": 0.1753, + "epoch": 1.977548715052245, + "grad_norm": 3.2414158760505045, + "learning_rate": 5.498440825984758e-06, + "loss": 0.4289, "step": 14005 }, { - "epoch": 2.9368840427762635, - "grad_norm": 4.057385504818411, - "learning_rate": 2.3208309392777073e-08, - "loss": 0.1197, + "epoch": 1.977689918102231, + "grad_norm": 3.3532772461544567, + "learning_rate": 5.497079618074097e-06, + "loss": 0.5046, "step": 14006 }, { - "epoch": 2.9370937303417906, - "grad_norm": 4.243241990474552, - "learning_rate": 2.3054416698369274e-08, - "loss": 0.1796, + "epoch": 1.9778311211522168, + "grad_norm": 3.738283293758161, + "learning_rate": 5.495718514809363e-06, + "loss": 0.532, "step": 14007 }, { - "epoch": 2.937303417907318, - "grad_norm": 4.631136099005787, - "learning_rate": 2.290103533972632e-08, - "loss": 0.1454, + "epoch": 1.9779723242022027, + "grad_norm": 3.3230503998844543, + "learning_rate": 5.494357516222184e-06, + "loss": 0.5178, "step": 14008 }, { - "epoch": 2.9375131054728456, - "grad_norm": 4.2891667512738, - "learning_rate": 2.2748165324709692e-08, - "loss": 0.1203, + "epoch": 1.9781135272521886, + "grad_norm": 3.81292161830942, + "learning_rate": 5.492996622344191e-06, + "loss": 0.5485, "step": 14009 }, { - "epoch": 2.9377227930383727, - "grad_norm": 4.203436378862493, - "learning_rate": 2.259580666115535e-08, - "loss": 0.1324, + "epoch": 1.9782547303021745, + "grad_norm": 3.3562904374958493, + "learning_rate": 5.49163583320701e-06, + "loss": 0.5002, "step": 14010 }, { - "epoch": 2.9379324806039, - "grad_norm": 4.369069255835128, - "learning_rate": 2.244395935686927e-08, - "loss": 0.1396, + "epoch": 1.9783959333521604, + "grad_norm": 3.4987435860341902, + "learning_rate": 5.490275148842266e-06, + "loss": 0.542, "step": 14011 }, { - "epoch": 2.9381421681694277, - "grad_norm": 4.0064812653629565, - "learning_rate": 2.229262341963634e-08, - "loss": 0.1403, + "epoch": 1.9785371364021462, + "grad_norm": 3.6270815904742557, + "learning_rate": 5.488914569281578e-06, + "loss": 0.4715, "step": 14012 }, { - "epoch": 2.938351855734955, - "grad_norm": 5.066171107022914, - "learning_rate": 2.214179885721146e-08, - "loss": 0.1717, + "epoch": 1.9786783394521321, + "grad_norm": 4.730952537891852, + "learning_rate": 5.487554094556565e-06, + "loss": 0.6648, "step": 14013 }, { - "epoch": 2.9385615433004824, - "grad_norm": 3.3948684547943353, - "learning_rate": 2.199148567732512e-08, - "loss": 0.11, + "epoch": 1.978819542502118, + "grad_norm": 3.7968177603514417, + "learning_rate": 5.486193724698854e-06, + "loss": 0.5803, "step": 14014 }, { - "epoch": 2.93877123086601, - "grad_norm": 3.706344215985088, - "learning_rate": 2.1841683887682263e-08, - "loss": 0.0959, + "epoch": 1.9789607455521039, + "grad_norm": 3.6202468321052903, + "learning_rate": 5.484833459740047e-06, + "loss": 0.6018, "step": 14015 }, { - "epoch": 2.938980918431537, - "grad_norm": 4.378779376299231, - "learning_rate": 2.1692393495957864e-08, - "loss": 0.1173, + "epoch": 1.9791019486020898, + "grad_norm": 3.914613135146988, + "learning_rate": 5.48347329971176e-06, + "loss": 0.5756, "step": 14016 }, { - "epoch": 2.9391906059970645, - "grad_norm": 4.003900627969656, - "learning_rate": 2.1543614509805798e-08, - "loss": 0.1193, + "epoch": 1.9792431516520756, + "grad_norm": 3.9688751436293495, + "learning_rate": 5.482113244645604e-06, + "loss": 0.5549, "step": 14017 }, { - "epoch": 2.9394002935625916, - "grad_norm": 3.541209324869028, - "learning_rate": 2.139534693684886e-08, - "loss": 0.0994, + "epoch": 1.9793843547020615, + "grad_norm": 3.547649666202111, + "learning_rate": 5.48075329457318e-06, + "loss": 0.507, "step": 14018 }, { - "epoch": 2.939609981128119, - "grad_norm": 4.313493241728477, - "learning_rate": 2.1247590784688743e-08, - "loss": 0.1296, + "epoch": 1.9795255577520474, + "grad_norm": 4.173737951982291, + "learning_rate": 5.479393449526102e-06, + "loss": 0.6814, "step": 14019 }, { - "epoch": 2.939819668693646, - "grad_norm": 3.5703947216391145, - "learning_rate": 2.110034606089717e-08, - "loss": 0.1053, + "epoch": 1.9796667608020333, + "grad_norm": 3.5105366007060823, + "learning_rate": 5.478033709535968e-06, + "loss": 0.4048, "step": 14020 }, { - "epoch": 2.9400293562591737, - "grad_norm": 3.8103718959416963, - "learning_rate": 2.0953612773019217e-08, - "loss": 0.1283, + "epoch": 1.9798079638520192, + "grad_norm": 3.278474580171805, + "learning_rate": 5.476674074634378e-06, + "loss": 0.5636, "step": 14021 }, { - "epoch": 2.9402390438247012, - "grad_norm": 4.448374640631488, - "learning_rate": 2.0807390928576645e-08, - "loss": 0.151, + "epoch": 1.979949166902005, + "grad_norm": 3.66360812446567, + "learning_rate": 5.4753145448529284e-06, + "loss": 0.5968, "step": 14022 }, { - "epoch": 2.9404487313902283, - "grad_norm": 4.0347241627781205, - "learning_rate": 2.0661680535063454e-08, - "loss": 0.1411, + "epoch": 1.980090369951991, + "grad_norm": 3.1068984240121607, + "learning_rate": 5.4739551202232155e-06, + "loss": 0.4718, "step": 14023 }, { - "epoch": 2.940658418955756, - "grad_norm": 3.6302195825880235, - "learning_rate": 2.0516481599948124e-08, - "loss": 0.0954, + "epoch": 1.9802315730019768, + "grad_norm": 3.529896546528489, + "learning_rate": 5.472595800776831e-06, + "loss": 0.5054, "step": 14024 }, { - "epoch": 2.9408681065212834, - "grad_norm": 5.134454722476279, - "learning_rate": 2.0371794130671362e-08, - "loss": 0.1561, + "epoch": 1.9803727760519627, + "grad_norm": 3.0773333615397527, + "learning_rate": 5.4712365865453645e-06, + "loss": 0.4459, "step": 14025 }, { - "epoch": 2.9410777940868105, - "grad_norm": 3.8021449726776955, - "learning_rate": 2.0227618134648352e-08, - "loss": 0.1075, + "epoch": 1.9805139791019486, + "grad_norm": 3.7622421946209057, + "learning_rate": 5.469877477560403e-06, + "loss": 0.5472, "step": 14026 }, { - "epoch": 2.941287481652338, - "grad_norm": 4.238122889201493, - "learning_rate": 2.0083953619268737e-08, - "loss": 0.1622, + "epoch": 1.9806551821519345, + "grad_norm": 3.5612871264039563, + "learning_rate": 5.468518473853535e-06, + "loss": 0.5836, "step": 14027 }, { - "epoch": 2.9414971692178655, - "grad_norm": 5.570521877234014, - "learning_rate": 1.9940800591896627e-08, - "loss": 0.1787, + "epoch": 1.9807963852019204, + "grad_norm": 3.8982104912496576, + "learning_rate": 5.467159575456338e-06, + "loss": 0.5565, "step": 14028 }, { - "epoch": 2.9417068567833926, - "grad_norm": 4.8336469262760104, - "learning_rate": 1.979815905986726e-08, - "loss": 0.1358, + "epoch": 1.9809375882519062, + "grad_norm": 3.4771847286658883, + "learning_rate": 5.465800782400397e-06, + "loss": 0.5007, "step": 14029 }, { - "epoch": 2.94191654434892, - "grad_norm": 3.48769105268091, - "learning_rate": 1.9656029030492574e-08, - "loss": 0.102, + "epoch": 1.9810787913018921, + "grad_norm": 3.8599443277703513, + "learning_rate": 5.464442094717281e-06, + "loss": 0.6166, "step": 14030 }, { - "epoch": 2.9421262319144477, - "grad_norm": 2.743218031227074, - "learning_rate": 1.9514410511056737e-08, - "loss": 0.1179, + "epoch": 1.981219994351878, + "grad_norm": 3.55249159001819, + "learning_rate": 5.463083512438582e-06, + "loss": 0.5029, "step": 14031 }, { - "epoch": 2.9423359194799747, - "grad_norm": 3.9727427171713825, - "learning_rate": 1.9373303508817275e-08, - "loss": 0.1436, + "epoch": 1.981361197401864, + "grad_norm": 3.7410048001243377, + "learning_rate": 5.46172503559586e-06, + "loss": 0.5911, "step": 14032 }, { - "epoch": 2.9425456070455023, - "grad_norm": 4.040769724649306, - "learning_rate": 1.9232708031006187e-08, - "loss": 0.0859, + "epoch": 1.9815024004518498, + "grad_norm": 3.329967938711971, + "learning_rate": 5.460366664220686e-06, + "loss": 0.4355, "step": 14033 }, { - "epoch": 2.94275529461103, - "grad_norm": 4.462170073294156, - "learning_rate": 1.9092624084828815e-08, - "loss": 0.1528, + "epoch": 1.9816436035018357, + "grad_norm": 3.2978784073699976, + "learning_rate": 5.45900839834463e-06, + "loss": 0.463, "step": 14034 }, { - "epoch": 2.942964982176557, - "grad_norm": 6.2021401053450615, - "learning_rate": 1.8953051677467192e-08, - "loss": 0.1729, + "epoch": 1.9817848065518215, + "grad_norm": 2.7781668243490087, + "learning_rate": 5.457650237999255e-06, + "loss": 0.4184, "step": 14035 }, { - "epoch": 2.9431746697420844, - "grad_norm": 4.065509747590375, - "learning_rate": 1.881399081607227e-08, - "loss": 0.112, + "epoch": 1.9819260096018074, + "grad_norm": 3.1362133022042977, + "learning_rate": 5.45629218321613e-06, + "loss": 0.4867, "step": 14036 }, { - "epoch": 2.9433843573076115, - "grad_norm": 3.945715062688238, - "learning_rate": 1.8675441507771673e-08, - "loss": 0.1386, + "epoch": 1.9820672126517933, + "grad_norm": 3.874773400876753, + "learning_rate": 5.454934234026813e-06, + "loss": 0.5584, "step": 14037 }, { - "epoch": 2.943594044873139, - "grad_norm": 4.69755821704635, - "learning_rate": 1.8537403759666394e-08, - "loss": 0.1485, + "epoch": 1.9822084157017792, + "grad_norm": 3.305887450305212, + "learning_rate": 5.453576390462861e-06, + "loss": 0.531, "step": 14038 }, { - "epoch": 2.943803732438666, - "grad_norm": 4.869885747025418, - "learning_rate": 1.839987757883077e-08, - "loss": 0.1437, + "epoch": 1.982349618751765, + "grad_norm": 3.8341483263123646, + "learning_rate": 5.452218652555831e-06, + "loss": 0.5236, "step": 14039 }, { - "epoch": 2.9440134200041936, - "grad_norm": 4.028640331619228, - "learning_rate": 1.826286297231472e-08, - "loss": 0.1249, + "epoch": 1.982490821801751, + "grad_norm": 4.18518586779701, + "learning_rate": 5.4508610203372794e-06, + "loss": 0.5643, "step": 14040 }, { - "epoch": 2.944223107569721, - "grad_norm": 3.782761214663777, - "learning_rate": 1.8126359947138184e-08, - "loss": 0.1034, + "epoch": 1.9826320248517368, + "grad_norm": 3.666619966205456, + "learning_rate": 5.449503493838744e-06, + "loss": 0.5491, "step": 14041 }, { - "epoch": 2.9444327951352482, - "grad_norm": 3.5617392100415697, - "learning_rate": 1.7990368510298896e-08, - "loss": 0.1192, + "epoch": 1.9827732279017227, + "grad_norm": 3.3240560366357976, + "learning_rate": 5.448146073091787e-06, + "loss": 0.4848, "step": 14042 }, { - "epoch": 2.9446424827007758, - "grad_norm": 4.643180751184854, - "learning_rate": 1.7854888668766834e-08, - "loss": 0.127, + "epoch": 1.9829144309517086, + "grad_norm": 3.1035495248420335, + "learning_rate": 5.44678875812795e-06, + "loss": 0.5086, "step": 14043 }, { - "epoch": 2.9448521702663033, - "grad_norm": 3.8499578217733665, - "learning_rate": 1.7719920429484228e-08, - "loss": 0.1208, + "epoch": 1.9830556340016945, + "grad_norm": 3.534598092378652, + "learning_rate": 5.445431548978775e-06, + "loss": 0.466, "step": 14044 }, { - "epoch": 2.9450618578318304, - "grad_norm": 4.272185846791371, - "learning_rate": 1.7585463799368875e-08, - "loss": 0.1415, + "epoch": 1.9831968370516804, + "grad_norm": 3.3719351643908397, + "learning_rate": 5.444074445675803e-06, + "loss": 0.4456, "step": 14045 }, { - "epoch": 2.945271545397358, - "grad_norm": 4.159644292393944, - "learning_rate": 1.7451518785311928e-08, - "loss": 0.1657, + "epoch": 1.9833380401016663, + "grad_norm": 3.4647707238229177, + "learning_rate": 5.442717448250574e-06, + "loss": 0.4944, "step": 14046 }, { - "epoch": 2.9454812329628854, - "grad_norm": 4.192730621289511, - "learning_rate": 1.7318085394179008e-08, - "loss": 0.1391, + "epoch": 1.9834792431516521, + "grad_norm": 3.1201164964440826, + "learning_rate": 5.44136055673462e-06, + "loss": 0.4482, "step": 14047 }, { - "epoch": 2.9456909205284125, - "grad_norm": 3.7863515557501097, - "learning_rate": 1.7185163632809087e-08, - "loss": 0.1228, + "epoch": 1.983620446201638, + "grad_norm": 3.766005685384043, + "learning_rate": 5.440003771159485e-06, + "loss": 0.5591, "step": 14048 }, { - "epoch": 2.94590060809394, - "grad_norm": 3.9476606396926823, - "learning_rate": 1.7052753508012276e-08, - "loss": 0.1083, + "epoch": 1.983761649251624, + "grad_norm": 3.762544476774224, + "learning_rate": 5.43864709155669e-06, + "loss": 0.5945, "step": 14049 }, { - "epoch": 2.9461102956594676, - "grad_norm": 3.785811694255531, - "learning_rate": 1.6920855026577588e-08, - "loss": 0.1083, + "epoch": 1.9839028523016098, + "grad_norm": 4.008481457946485, + "learning_rate": 5.437290517957767e-06, + "loss": 0.5293, "step": 14050 }, { - "epoch": 2.9463199832249947, - "grad_norm": 4.2238728495838584, - "learning_rate": 1.6789468195262948e-08, - "loss": 0.142, + "epoch": 1.9840440553515957, + "grad_norm": 3.8424270293349254, + "learning_rate": 5.435934050394242e-06, + "loss": 0.596, "step": 14051 }, { - "epoch": 2.946529670790522, - "grad_norm": 4.182371135740404, - "learning_rate": 1.665859302080297e-08, - "loss": 0.1362, + "epoch": 1.9841852584015816, + "grad_norm": 4.043495423807216, + "learning_rate": 5.434577688897637e-06, + "loss": 0.6161, "step": 14052 }, { - "epoch": 2.9467393583560497, - "grad_norm": 3.6252832551261953, - "learning_rate": 1.6528229509906735e-08, - "loss": 0.128, + "epoch": 1.9843264614515674, + "grad_norm": 3.40787160400942, + "learning_rate": 5.433221433499472e-06, + "loss": 0.4502, "step": 14053 }, { - "epoch": 2.946949045921577, - "grad_norm": 4.5244449688114985, - "learning_rate": 1.639837766925334e-08, - "loss": 0.1448, + "epoch": 1.9844676645015533, + "grad_norm": 3.720288836568041, + "learning_rate": 5.4318652842312725e-06, + "loss": 0.5279, "step": 14054 }, { - "epoch": 2.9471587334871043, - "grad_norm": 4.734922864621065, - "learning_rate": 1.6269037505497464e-08, - "loss": 0.1081, + "epoch": 1.9846088675515392, + "grad_norm": 3.9433751948255775, + "learning_rate": 5.430509241124551e-06, + "loss": 0.5263, "step": 14055 }, { - "epoch": 2.9473684210526314, - "grad_norm": 4.06027416831395, - "learning_rate": 1.614020902527158e-08, - "loss": 0.1326, + "epoch": 1.984750070601525, + "grad_norm": 3.0761589086409225, + "learning_rate": 5.429153304210821e-06, + "loss": 0.4472, "step": 14056 }, { - "epoch": 2.947578108618159, - "grad_norm": 3.8486219024600357, - "learning_rate": 1.6011892235174853e-08, - "loss": 0.1228, + "epoch": 1.984891273651511, + "grad_norm": 4.264568296097902, + "learning_rate": 5.427797473521599e-06, + "loss": 0.5856, "step": 14057 }, { - "epoch": 2.9477877961836865, - "grad_norm": 4.277160497554599, - "learning_rate": 1.5884087141785354e-08, - "loss": 0.1505, + "epoch": 1.9850324767014969, + "grad_norm": 3.2570349594000865, + "learning_rate": 5.42644174908838e-06, + "loss": 0.5996, "step": 14058 }, { - "epoch": 2.9479974837492136, - "grad_norm": 4.148364684822753, - "learning_rate": 1.575679375165229e-08, - "loss": 0.1122, + "epoch": 1.9851736797514827, + "grad_norm": 4.745779234966659, + "learning_rate": 5.4250861309426835e-06, + "loss": 0.5911, "step": 14059 }, { - "epoch": 2.948207171314741, - "grad_norm": 3.5426302058610206, - "learning_rate": 1.5630012071301548e-08, - "loss": 0.1369, + "epoch": 1.9853148828014686, + "grad_norm": 4.143206267300422, + "learning_rate": 5.42373061911601e-06, + "loss": 0.6037, "step": 14060 }, { - "epoch": 2.948416858880268, - "grad_norm": 3.528340764967035, - "learning_rate": 1.550374210722905e-08, - "loss": 0.1037, + "epoch": 1.9854560858514545, + "grad_norm": 3.5195158044242754, + "learning_rate": 5.422375213639861e-06, + "loss": 0.4757, "step": 14061 }, { - "epoch": 2.9486265464457957, - "grad_norm": 4.41991677865507, - "learning_rate": 1.5377983865906276e-08, - "loss": 0.1427, + "epoch": 1.9855972889014404, + "grad_norm": 4.538327683816909, + "learning_rate": 5.421019914545735e-06, + "loss": 0.6427, "step": 14062 }, { - "epoch": 2.948836234011323, - "grad_norm": 4.275846745295044, - "learning_rate": 1.52527373537803e-08, - "loss": 0.1592, + "epoch": 1.9857384919514263, + "grad_norm": 3.672037315512907, + "learning_rate": 5.419664721865129e-06, + "loss": 0.4594, "step": 14063 }, { - "epoch": 2.9490459215768503, - "grad_norm": 5.542345568275729, - "learning_rate": 1.5128002577269317e-08, - "loss": 0.1605, + "epoch": 1.9858796950014121, + "grad_norm": 4.043021930528464, + "learning_rate": 5.418309635629536e-06, + "loss": 0.5672, "step": 14064 }, { - "epoch": 2.949255609142378, - "grad_norm": 4.757814127431632, - "learning_rate": 1.5003779542764884e-08, - "loss": 0.1538, + "epoch": 1.986020898051398, + "grad_norm": 3.6387995293735744, + "learning_rate": 5.416954655870449e-06, + "loss": 0.5245, "step": 14065 }, { - "epoch": 2.9494652967079054, - "grad_norm": 4.428855808668618, - "learning_rate": 1.4880068256636349e-08, - "loss": 0.1107, + "epoch": 1.986162101101384, + "grad_norm": 4.0935576725852085, + "learning_rate": 5.415599782619356e-06, + "loss": 0.5474, "step": 14066 }, { - "epoch": 2.9496749842734324, - "grad_norm": 5.027763117273203, - "learning_rate": 1.4756868725220863e-08, - "loss": 0.1609, + "epoch": 1.9863033041513698, + "grad_norm": 4.945419850549816, + "learning_rate": 5.414245015907744e-06, + "loss": 0.6251, "step": 14067 }, { - "epoch": 2.94988467183896, - "grad_norm": 4.988007408196683, - "learning_rate": 1.4634180954835598e-08, - "loss": 0.1483, + "epoch": 1.9864445072013557, + "grad_norm": 4.378137502899994, + "learning_rate": 5.412890355767098e-06, + "loss": 0.7481, "step": 14068 }, { - "epoch": 2.9500943594044875, - "grad_norm": 4.980686909348065, - "learning_rate": 1.4512004951767745e-08, - "loss": 0.1595, + "epoch": 1.9865857102513416, + "grad_norm": 4.114005344599044, + "learning_rate": 5.411535802228899e-06, + "loss": 0.6113, "step": 14069 }, { - "epoch": 2.9503040469700146, - "grad_norm": 4.178873859518184, - "learning_rate": 1.4390340722277851e-08, - "loss": 0.1319, + "epoch": 1.9867269133013274, + "grad_norm": 3.799682313176289, + "learning_rate": 5.410181355324622e-06, + "loss": 0.5621, "step": 14070 }, { - "epoch": 2.950513734535542, - "grad_norm": 4.488116823658961, - "learning_rate": 1.4269188272602042e-08, - "loss": 0.1454, + "epoch": 1.9868681163513133, + "grad_norm": 3.305948947377797, + "learning_rate": 5.408827015085753e-06, + "loss": 0.4899, "step": 14071 }, { - "epoch": 2.9507234221010696, - "grad_norm": 4.355603178584133, - "learning_rate": 1.4148547608949793e-08, - "loss": 0.1493, + "epoch": 1.9870093194012992, + "grad_norm": 3.178203680661457, + "learning_rate": 5.407472781543761e-06, + "loss": 0.4897, "step": 14072 }, { - "epoch": 2.9509331096665967, - "grad_norm": 3.304392985046987, - "learning_rate": 1.4028418737505045e-08, - "loss": 0.0967, + "epoch": 1.987150522451285, + "grad_norm": 3.469910386822805, + "learning_rate": 5.406118654730117e-06, + "loss": 0.4612, "step": 14073 }, { - "epoch": 2.9511427972321242, - "grad_norm": 5.439950625921383, - "learning_rate": 1.3908801664422878e-08, - "loss": 0.1273, + "epoch": 1.987291725501271, + "grad_norm": 3.5526577549284584, + "learning_rate": 5.404764634676297e-06, + "loss": 0.5904, "step": 14074 }, { - "epoch": 2.9513524847976518, - "grad_norm": 5.422996009175124, - "learning_rate": 1.378969639583394e-08, - "loss": 0.1905, + "epoch": 1.9874329285512569, + "grad_norm": 3.636965306656946, + "learning_rate": 5.403410721413754e-06, + "loss": 0.5814, "step": 14075 }, { - "epoch": 2.951562172363179, - "grad_norm": 3.6524470416038537, - "learning_rate": 1.367110293784335e-08, - "loss": 0.1201, + "epoch": 1.9875741316012427, + "grad_norm": 3.9798626218358555, + "learning_rate": 5.402056914973965e-06, + "loss": 0.5199, "step": 14076 }, { - "epoch": 2.9517718599287064, - "grad_norm": 3.8917884818864064, - "learning_rate": 1.3553021296530688e-08, - "loss": 0.1563, + "epoch": 1.9877153346512286, + "grad_norm": 3.1541254382885207, + "learning_rate": 5.400703215388387e-06, + "loss": 0.4532, "step": 14077 }, { - "epoch": 2.9519815474942335, - "grad_norm": 6.399820970154888, - "learning_rate": 1.343545147794445e-08, - "loss": 0.191, + "epoch": 1.9878565377012145, + "grad_norm": 4.103584707812915, + "learning_rate": 5.399349622688479e-06, + "loss": 0.6473, "step": 14078 }, { - "epoch": 2.952191235059761, - "grad_norm": 4.182213950976563, - "learning_rate": 1.3318393488113147e-08, - "loss": 0.1704, + "epoch": 1.9879977407512002, + "grad_norm": 3.6446340827844828, + "learning_rate": 5.397996136905701e-06, + "loss": 0.4868, "step": 14079 }, { - "epoch": 2.952400922625288, - "grad_norm": 4.504965990511345, - "learning_rate": 1.3201847333035311e-08, - "loss": 0.1658, + "epoch": 1.988138943801186, + "grad_norm": 3.6568943418663924, + "learning_rate": 5.3966427580715044e-06, + "loss": 0.5624, "step": 14080 }, { - "epoch": 2.9526106101908156, - "grad_norm": 4.25142980376893, - "learning_rate": 1.3085813018683946e-08, - "loss": 0.1263, + "epoch": 1.988280146851172, + "grad_norm": 3.0657729728592136, + "learning_rate": 5.395289486217341e-06, + "loss": 0.4499, "step": 14081 }, { - "epoch": 2.952820297756343, - "grad_norm": 3.7597530610010845, - "learning_rate": 1.2970290551005404e-08, - "loss": 0.1263, + "epoch": 1.9884213499011578, + "grad_norm": 3.6040588953093002, + "learning_rate": 5.393936321374663e-06, + "loss": 0.5029, "step": 14082 }, { - "epoch": 2.95302998532187, - "grad_norm": 4.213781412407541, - "learning_rate": 1.2855279935921617e-08, - "loss": 0.1641, + "epoch": 1.9885625529511437, + "grad_norm": 3.176063999664221, + "learning_rate": 5.392583263574915e-06, + "loss": 0.439, "step": 14083 }, { - "epoch": 2.9532396728873977, - "grad_norm": 3.51960655122851, - "learning_rate": 1.2740781179325646e-08, - "loss": 0.0979, + "epoch": 1.9887037560011296, + "grad_norm": 3.7458423890007975, + "learning_rate": 5.391230312849543e-06, + "loss": 0.5877, "step": 14084 }, { - "epoch": 2.9534493604529253, - "grad_norm": 4.7330294194293225, - "learning_rate": 1.2626794287088351e-08, - "loss": 0.1282, + "epoch": 1.9888449590511155, + "grad_norm": 4.4646967106741755, + "learning_rate": 5.389877469229987e-06, + "loss": 0.6864, "step": 14085 }, { - "epoch": 2.9536590480184524, - "grad_norm": 4.515445262019607, - "learning_rate": 1.2513319265049507e-08, - "loss": 0.1276, + "epoch": 1.9889861621011014, + "grad_norm": 4.578808672263429, + "learning_rate": 5.388524732747688e-06, + "loss": 0.5813, "step": 14086 }, { - "epoch": 2.95386873558398, - "grad_norm": 3.5936529415956, - "learning_rate": 1.2400356119024459e-08, - "loss": 0.1234, + "epoch": 1.9891273651510872, + "grad_norm": 3.243696386494515, + "learning_rate": 5.387172103434078e-06, + "loss": 0.5088, "step": 14087 }, { - "epoch": 2.9540784231495074, - "grad_norm": 6.248758685403473, - "learning_rate": 1.2287904854804134e-08, - "loss": 0.1204, + "epoch": 1.9892685682010731, + "grad_norm": 4.345284359394688, + "learning_rate": 5.385819581320602e-06, + "loss": 0.6071, "step": 14088 }, { - "epoch": 2.9542881107150345, - "grad_norm": 3.4180275791418784, - "learning_rate": 1.2175965478152807e-08, - "loss": 0.1245, + "epoch": 1.989409771251059, + "grad_norm": 4.463065808901525, + "learning_rate": 5.384467166438685e-06, + "loss": 0.553, "step": 14089 }, { - "epoch": 2.954497798280562, - "grad_norm": 4.7399059175177864, - "learning_rate": 1.2064537994804782e-08, - "loss": 0.1285, + "epoch": 1.9895509743010449, + "grad_norm": 4.663979135640216, + "learning_rate": 5.3831148588197615e-06, + "loss": 0.827, "step": 14090 }, { - "epoch": 2.9547074858460896, - "grad_norm": 3.0976946437219386, - "learning_rate": 1.1953622410473264e-08, - "loss": 0.0932, + "epoch": 1.9896921773510308, + "grad_norm": 3.430253573371457, + "learning_rate": 5.3817626584952506e-06, + "loss": 0.532, "step": 14091 }, { - "epoch": 2.9549171734116166, - "grad_norm": 3.702908563299613, - "learning_rate": 1.1843218730841488e-08, - "loss": 0.1505, + "epoch": 1.9898333804010166, + "grad_norm": 3.8808844403018106, + "learning_rate": 5.3804105654965784e-06, + "loss": 0.5927, "step": 14092 }, { - "epoch": 2.955126860977144, - "grad_norm": 3.87931292743932, - "learning_rate": 1.1733326961568259e-08, - "loss": 0.1378, + "epoch": 1.9899745834510025, + "grad_norm": 3.4537343764441726, + "learning_rate": 5.379058579855172e-06, + "loss": 0.473, "step": 14093 }, { - "epoch": 2.9553365485426717, - "grad_norm": 3.215499501005404, - "learning_rate": 1.1623947108285738e-08, - "loss": 0.1046, + "epoch": 1.9901157865009884, + "grad_norm": 3.734558252415471, + "learning_rate": 5.3777067016024495e-06, + "loss": 0.5917, "step": 14094 }, { - "epoch": 2.9555462361081988, - "grad_norm": 3.3500192644438074, - "learning_rate": 1.1515079176600552e-08, - "loss": 0.0771, + "epoch": 1.9902569895509743, + "grad_norm": 3.51712448839059, + "learning_rate": 5.376354930769827e-06, + "loss": 0.612, "step": 14095 }, { - "epoch": 2.9557559236737263, - "grad_norm": 2.8545588453916455, - "learning_rate": 1.1406723172091572e-08, - "loss": 0.0918, + "epoch": 1.9903981926009602, + "grad_norm": 3.6222833097605363, + "learning_rate": 5.375003267388719e-06, + "loss": 0.5816, "step": 14096 }, { - "epoch": 2.9559656112392534, - "grad_norm": 4.2568969985816425, - "learning_rate": 1.1298879100311022e-08, - "loss": 0.1514, + "epoch": 1.990539395650946, + "grad_norm": 3.331599129932487, + "learning_rate": 5.373651711490538e-06, + "loss": 0.5331, "step": 14097 }, { - "epoch": 2.956175298804781, - "grad_norm": 4.316764277393882, - "learning_rate": 1.1191546966788925e-08, - "loss": 0.1323, + "epoch": 1.990680598700932, + "grad_norm": 3.5198798999637924, + "learning_rate": 5.372300263106693e-06, + "loss": 0.545, "step": 14098 }, { - "epoch": 2.956384986370308, - "grad_norm": 4.420689405349377, - "learning_rate": 1.1084726777024213e-08, - "loss": 0.1353, + "epoch": 1.9908218017509178, + "grad_norm": 4.05490579461452, + "learning_rate": 5.370948922268591e-06, + "loss": 0.5371, "step": 14099 }, { - "epoch": 2.9565946739358355, - "grad_norm": 4.3683750946763595, - "learning_rate": 1.0978418536492507e-08, - "loss": 0.1445, + "epoch": 1.9909630048009037, + "grad_norm": 4.003866362314488, + "learning_rate": 5.3695976890076375e-06, + "loss": 0.6583, "step": 14100 }, { - "epoch": 2.956804361501363, - "grad_norm": 4.703686206228155, - "learning_rate": 1.0872622250641674e-08, - "loss": 0.1331, + "epoch": 1.9911042078508896, + "grad_norm": 5.208400190194892, + "learning_rate": 5.368246563355234e-06, + "loss": 0.5748, "step": 14101 }, { - "epoch": 2.95701404906689, - "grad_norm": 3.125036009342627, - "learning_rate": 1.0767337924894038e-08, - "loss": 0.0897, + "epoch": 1.9912454109008755, + "grad_norm": 3.0637301416234424, + "learning_rate": 5.3668955453427795e-06, + "loss": 0.3986, "step": 14102 }, { - "epoch": 2.9572237366324177, - "grad_norm": 3.130697772858216, - "learning_rate": 1.0662565564646398e-08, - "loss": 0.1158, + "epoch": 1.9913866139508614, + "grad_norm": 3.2876650918687678, + "learning_rate": 5.365544635001671e-06, + "loss": 0.5307, "step": 14103 }, { - "epoch": 2.957433424197945, - "grad_norm": 3.875383946649768, - "learning_rate": 1.0558305175266682e-08, - "loss": 0.1131, + "epoch": 1.9915278170008472, + "grad_norm": 3.574000436075264, + "learning_rate": 5.3641938323633025e-06, + "loss": 0.4753, "step": 14104 }, { - "epoch": 2.9576431117634723, - "grad_norm": 4.237132588760174, - "learning_rate": 1.045455676210061e-08, - "loss": 0.1401, + "epoch": 1.9916690200508331, + "grad_norm": 4.096727148201548, + "learning_rate": 5.362843137459068e-06, + "loss": 0.6974, "step": 14105 }, { - "epoch": 2.957852799329, - "grad_norm": 5.153738027330773, - "learning_rate": 1.0351320330465043e-08, - "loss": 0.15, + "epoch": 1.991810223100819, + "grad_norm": 3.209796608290135, + "learning_rate": 5.3614925503203586e-06, + "loss": 0.4361, "step": 14106 }, { - "epoch": 2.9580624868945273, - "grad_norm": 3.967829597323568, - "learning_rate": 1.0248595885649081e-08, - "loss": 0.1511, + "epoch": 1.991951426150805, + "grad_norm": 3.230951473200884, + "learning_rate": 5.360142070978562e-06, + "loss": 0.4297, "step": 14107 }, { - "epoch": 2.9582721744600544, - "grad_norm": 4.6359349701507755, - "learning_rate": 1.0146383432919626e-08, - "loss": 0.1511, + "epoch": 1.9920926292007908, + "grad_norm": 3.5667161632738047, + "learning_rate": 5.358791699465056e-06, + "loss": 0.4929, "step": 14108 }, { - "epoch": 2.958481862025582, - "grad_norm": 3.881871876700195, - "learning_rate": 1.0044682977514708e-08, - "loss": 0.1357, + "epoch": 1.9922338322507767, + "grad_norm": 3.838898420857128, + "learning_rate": 5.357441435811224e-06, + "loss": 0.5739, "step": 14109 }, { - "epoch": 2.9586915495911095, - "grad_norm": 4.798127728287746, - "learning_rate": 9.943494524646825e-09, - "loss": 0.1509, + "epoch": 1.9923750353007625, + "grad_norm": 3.5813676547244966, + "learning_rate": 5.35609128004845e-06, + "loss": 0.5014, "step": 14110 }, { - "epoch": 2.9589012371566366, - "grad_norm": 4.273614643947953, - "learning_rate": 9.842818079500715e-09, - "loss": 0.1326, + "epoch": 1.9925162383507484, + "grad_norm": 3.815652857806033, + "learning_rate": 5.3547412322081095e-06, + "loss": 0.5552, "step": 14111 }, { - "epoch": 2.959110924722164, - "grad_norm": 5.6311653223125155, - "learning_rate": 9.74265364723781e-09, - "loss": 0.1109, + "epoch": 1.9926574414007343, + "grad_norm": 3.974073335817395, + "learning_rate": 5.353391292321577e-06, + "loss": 0.52, "step": 14112 }, { - "epoch": 2.9593206122876916, - "grad_norm": 5.534786551727605, - "learning_rate": 9.643001232990667e-09, - "loss": 0.1524, + "epoch": 1.99279864445072, + "grad_norm": 3.1315195713257484, + "learning_rate": 5.352041460420224e-06, + "loss": 0.4843, "step": 14113 }, { - "epoch": 2.9595302998532187, - "grad_norm": 4.833773908175576, - "learning_rate": 9.543860841867425e-09, - "loss": 0.128, + "epoch": 1.9929398475007059, + "grad_norm": 3.3439127415465673, + "learning_rate": 5.3506917365354205e-06, + "loss": 0.4724, "step": 14114 }, { - "epoch": 2.959739987418746, - "grad_norm": 4.801457682181429, - "learning_rate": 9.445232478949573e-09, - "loss": 0.1644, + "epoch": 1.9930810505506917, + "grad_norm": 3.0946264095713163, + "learning_rate": 5.349342120698533e-06, + "loss": 0.4855, "step": 14115 }, { - "epoch": 2.9599496749842733, - "grad_norm": 4.4077670662743245, - "learning_rate": 9.347116149289737e-09, - "loss": 0.1331, + "epoch": 1.9932222536006776, + "grad_norm": 3.4416027525340116, + "learning_rate": 5.347992612940927e-06, + "loss": 0.4581, "step": 14116 }, { - "epoch": 2.960159362549801, - "grad_norm": 3.9503032056599396, - "learning_rate": 9.249511857919446e-09, - "loss": 0.1171, + "epoch": 1.9933634566506635, + "grad_norm": 2.780677412453508, + "learning_rate": 5.346643213293962e-06, + "loss": 0.3918, "step": 14117 }, { - "epoch": 2.960369050115328, - "grad_norm": 4.159254517792313, - "learning_rate": 9.152419609839148e-09, - "loss": 0.1507, + "epoch": 1.9935046597006494, + "grad_norm": 3.1745876731408673, + "learning_rate": 5.345293921789e-06, + "loss": 0.4443, "step": 14118 }, { - "epoch": 2.9605787376808554, - "grad_norm": 3.2806793475210876, - "learning_rate": 9.055839410025969e-09, - "loss": 0.1176, + "epoch": 1.9936458627506353, + "grad_norm": 3.7227731812250924, + "learning_rate": 5.343944738457396e-06, + "loss": 0.6905, "step": 14119 }, { - "epoch": 2.960788425246383, - "grad_norm": 3.478096097793844, - "learning_rate": 8.959771263429285e-09, - "loss": 0.0945, + "epoch": 1.9937870658006211, + "grad_norm": 3.821771934729362, + "learning_rate": 5.3425956633305075e-06, + "loss": 0.5668, "step": 14120 }, { - "epoch": 2.96099811281191, - "grad_norm": 3.7308225943232443, - "learning_rate": 8.864215174972934e-09, - "loss": 0.1114, + "epoch": 1.993928268850607, + "grad_norm": 3.3362924667742666, + "learning_rate": 5.341246696439679e-06, + "loss": 0.429, "step": 14121 }, { - "epoch": 2.9612078003774376, - "grad_norm": 4.610521197152587, - "learning_rate": 8.769171149554111e-09, - "loss": 0.1355, + "epoch": 1.994069471900593, + "grad_norm": 3.7720870163213176, + "learning_rate": 5.3398978378162696e-06, + "loss": 0.5339, "step": 14122 }, { - "epoch": 2.961417487942965, - "grad_norm": 4.0872662001732865, - "learning_rate": 8.674639192044475e-09, - "loss": 0.1507, + "epoch": 1.9942106749505788, + "grad_norm": 4.021627448380206, + "learning_rate": 5.338549087491623e-06, + "loss": 0.6265, "step": 14123 }, { - "epoch": 2.961627175508492, - "grad_norm": 4.38588082377484, - "learning_rate": 8.58061930728793e-09, - "loss": 0.1438, + "epoch": 1.9943518780005647, + "grad_norm": 3.269514706156732, + "learning_rate": 5.337200445497084e-06, + "loss": 0.4721, "step": 14124 }, { - "epoch": 2.9618368630740197, - "grad_norm": 4.133623764095485, - "learning_rate": 8.487111500103951e-09, - "loss": 0.1331, + "epoch": 1.9944930810505506, + "grad_norm": 3.663667577988536, + "learning_rate": 5.3358519118639895e-06, + "loss": 0.5575, "step": 14125 }, { - "epoch": 2.9620465506395472, - "grad_norm": 4.2474664480122994, - "learning_rate": 8.394115775286481e-09, - "loss": 0.1216, + "epoch": 1.9946342841005364, + "grad_norm": 4.493244696682536, + "learning_rate": 5.33450348662368e-06, + "loss": 0.655, "step": 14126 }, { - "epoch": 2.9622562382050743, - "grad_norm": 4.422076686776864, - "learning_rate": 8.30163213759838e-09, - "loss": 0.1319, + "epoch": 1.9947754871505223, + "grad_norm": 3.494900174633416, + "learning_rate": 5.333155169807498e-06, + "loss": 0.5459, "step": 14127 }, { - "epoch": 2.962465925770602, - "grad_norm": 5.634129090279814, - "learning_rate": 8.20966059178252e-09, - "loss": 0.1585, + "epoch": 1.9949166902005082, + "grad_norm": 3.3629943872708643, + "learning_rate": 5.331806961446774e-06, + "loss": 0.4569, "step": 14128 }, { - "epoch": 2.9626756133361294, - "grad_norm": 3.956225010042866, - "learning_rate": 8.118201142550685e-09, - "loss": 0.13, + "epoch": 1.995057893250494, + "grad_norm": 3.776577727677247, + "learning_rate": 5.3304588615728405e-06, + "loss": 0.4909, "step": 14129 }, { - "epoch": 2.9628853009016565, - "grad_norm": 3.469172799246103, - "learning_rate": 8.02725379459135e-09, - "loss": 0.0961, + "epoch": 1.99519909630048, + "grad_norm": 3.2977141035818014, + "learning_rate": 5.3291108702170244e-06, + "loss": 0.535, "step": 14130 }, { - "epoch": 2.963094988467184, - "grad_norm": 5.339445172687056, - "learning_rate": 7.936818552565229e-09, - "loss": 0.118, + "epoch": 1.9953402993504659, + "grad_norm": 4.761668998421619, + "learning_rate": 5.327762987410657e-06, + "loss": 0.6586, "step": 14131 }, { - "epoch": 2.9633046760327115, - "grad_norm": 3.402977064657105, - "learning_rate": 7.846895421107504e-09, - "loss": 0.1091, + "epoch": 1.9954815024004517, + "grad_norm": 3.772342215839018, + "learning_rate": 5.3264152131850585e-06, + "loss": 0.5715, "step": 14132 }, { - "epoch": 2.9635143635982386, - "grad_norm": 5.4545633818913615, - "learning_rate": 7.75748440482671e-09, - "loss": 0.1335, + "epoch": 1.9956227054504376, + "grad_norm": 4.049886911514595, + "learning_rate": 5.325067547571554e-06, + "loss": 0.6804, "step": 14133 }, { - "epoch": 2.963724051163766, - "grad_norm": 4.749579349435152, - "learning_rate": 7.668585508305849e-09, - "loss": 0.136, + "epoch": 1.9957639085004235, + "grad_norm": 3.6322310435181837, + "learning_rate": 5.323719990601459e-06, + "loss": 0.4623, "step": 14134 }, { - "epoch": 2.963933738729293, - "grad_norm": 6.624755674368858, - "learning_rate": 7.580198736101273e-09, - "loss": 0.2214, + "epoch": 1.9959051115504094, + "grad_norm": 3.7530447264745255, + "learning_rate": 5.322372542306094e-06, + "loss": 0.674, "step": 14135 }, { - "epoch": 2.9641434262948207, - "grad_norm": 4.889040306502076, - "learning_rate": 7.492324092742697e-09, - "loss": 0.1428, + "epoch": 1.9960463146003953, + "grad_norm": 4.069142408141527, + "learning_rate": 5.321025202716769e-06, + "loss": 0.6105, "step": 14136 }, { - "epoch": 2.964353113860348, - "grad_norm": 4.254509176834944, - "learning_rate": 7.404961582732073e-09, - "loss": 0.1112, + "epoch": 1.9961875176503812, + "grad_norm": 3.4630871699004815, + "learning_rate": 5.319677971864798e-06, + "loss": 0.5535, "step": 14137 }, { - "epoch": 2.9645628014258754, - "grad_norm": 4.240742374038316, - "learning_rate": 7.318111210550261e-09, - "loss": 0.1358, + "epoch": 1.996328720700367, + "grad_norm": 4.394876662720301, + "learning_rate": 5.318330849781485e-06, + "loss": 0.6236, "step": 14138 }, { - "epoch": 2.964772488991403, - "grad_norm": 4.075283158601231, - "learning_rate": 7.2317729806448135e-09, - "loss": 0.0999, + "epoch": 1.996469923750353, + "grad_norm": 3.8526647510159124, + "learning_rate": 5.316983836498146e-06, + "loss": 0.5277, "step": 14139 }, { - "epoch": 2.96498217655693, - "grad_norm": 5.028783092365597, - "learning_rate": 7.145946897444411e-09, - "loss": 0.1376, + "epoch": 1.9966111268003388, + "grad_norm": 2.8911609682361368, + "learning_rate": 5.3156369320460796e-06, + "loss": 0.3831, "step": 14140 }, { - "epoch": 2.9651918641224575, - "grad_norm": 4.71427835216695, - "learning_rate": 7.060632965344427e-09, - "loss": 0.1208, + "epoch": 1.9967523298503247, + "grad_norm": 2.9663938944191317, + "learning_rate": 5.314290136456592e-06, + "loss": 0.3636, "step": 14141 }, { - "epoch": 2.965401551687985, - "grad_norm": 5.4726197349070755, - "learning_rate": 6.97583118872025e-09, - "loss": 0.1841, + "epoch": 1.9968935329003106, + "grad_norm": 4.7647975140723675, + "learning_rate": 5.312943449760975e-06, + "loss": 0.6218, "step": 14142 }, { - "epoch": 2.965611239253512, - "grad_norm": 4.362963347051351, - "learning_rate": 6.8915415719150726e-09, - "loss": 0.1277, + "epoch": 1.9970347359502965, + "grad_norm": 3.236661396881344, + "learning_rate": 5.311596871990527e-06, + "loss": 0.4487, "step": 14143 }, { - "epoch": 2.9658209268190396, - "grad_norm": 3.810859246137735, - "learning_rate": 6.807764119252102e-09, - "loss": 0.1462, + "epoch": 1.9971759390002823, + "grad_norm": 3.826382081715904, + "learning_rate": 5.31025040317654e-06, + "loss": 0.5552, "step": 14144 }, { - "epoch": 2.966030614384567, - "grad_norm": 4.489727302955854, - "learning_rate": 6.724498835022353e-09, - "loss": 0.138, + "epoch": 1.9973171420502682, + "grad_norm": 3.0129649704183246, + "learning_rate": 5.308904043350311e-06, + "loss": 0.3987, "step": 14145 }, { - "epoch": 2.9662403019500942, - "grad_norm": 3.7570083717167995, - "learning_rate": 6.64174572349574e-09, - "loss": 0.0769, + "epoch": 1.997458345100254, + "grad_norm": 4.04305886263828, + "learning_rate": 5.307557792543128e-06, + "loss": 0.584, "step": 14146 }, { - "epoch": 2.9664499895156218, - "grad_norm": 5.596001719452043, - "learning_rate": 6.559504788911098e-09, - "loss": 0.1526, + "epoch": 1.99759954815024, + "grad_norm": 3.882241816920631, + "learning_rate": 5.306211650786273e-06, + "loss": 0.5408, "step": 14147 }, { - "epoch": 2.9666596770811493, - "grad_norm": 3.282781314020852, - "learning_rate": 6.477776035485051e-09, - "loss": 0.1118, + "epoch": 1.9977407512002259, + "grad_norm": 3.7156242297894924, + "learning_rate": 5.304865618111034e-06, + "loss": 0.5256, "step": 14148 }, { - "epoch": 2.9668693646466764, - "grad_norm": 3.8763042191558386, - "learning_rate": 6.396559467406471e-09, - "loss": 0.1519, + "epoch": 1.9978819542502118, + "grad_norm": 3.347998948374895, + "learning_rate": 5.30351969454869e-06, + "loss": 0.4801, "step": 14149 }, { - "epoch": 2.967079052212204, - "grad_norm": 3.966179237688947, - "learning_rate": 6.315855088836476e-09, - "loss": 0.1213, + "epoch": 1.9980231573001976, + "grad_norm": 4.0121535600349425, + "learning_rate": 5.302173880130519e-06, + "loss": 0.5453, "step": 14150 }, { - "epoch": 2.9672887397777314, - "grad_norm": 4.5402958041005625, - "learning_rate": 6.235662903911754e-09, - "loss": 0.1317, + "epoch": 1.9981643603501835, + "grad_norm": 3.525963863183588, + "learning_rate": 5.3008281748878e-06, + "loss": 0.4893, "step": 14151 }, { - "epoch": 2.9674984273432585, - "grad_norm": 4.6955379480158355, - "learning_rate": 6.155982916743463e-09, - "loss": 0.1793, + "epoch": 1.9983055634001694, + "grad_norm": 3.614797379567509, + "learning_rate": 5.299482578851803e-06, + "loss": 0.5181, "step": 14152 }, { - "epoch": 2.967708114908786, - "grad_norm": 3.0117341915061315, - "learning_rate": 6.076815131415004e-09, - "loss": 0.0911, + "epoch": 1.9984467664501553, + "grad_norm": 4.034396814009825, + "learning_rate": 5.2981370920538014e-06, + "loss": 0.6122, "step": 14153 }, { - "epoch": 2.967917802474313, - "grad_norm": 3.3552523549860522, - "learning_rate": 5.998159551982019e-09, - "loss": 0.11, + "epoch": 1.9985879695001412, + "grad_norm": 3.9394532807123497, + "learning_rate": 5.296791714525064e-06, + "loss": 0.5679, "step": 14154 }, { - "epoch": 2.9681274900398407, - "grad_norm": 4.620456743351178, - "learning_rate": 5.92001618247684e-09, - "loss": 0.1245, + "epoch": 1.998729172550127, + "grad_norm": 3.3692250589801445, + "learning_rate": 5.295446446296855e-06, + "loss": 0.5045, "step": 14155 }, { - "epoch": 2.9683371776053677, - "grad_norm": 4.004282909447476, - "learning_rate": 5.842385026906261e-09, - "loss": 0.1238, + "epoch": 1.998870375600113, + "grad_norm": 3.7991754933944493, + "learning_rate": 5.294101287400435e-06, + "loss": 0.59, "step": 14156 }, { - "epoch": 2.9685468651708953, - "grad_norm": 4.604404931440074, - "learning_rate": 5.765266089245991e-09, - "loss": 0.1603, + "epoch": 1.9990115786500988, + "grad_norm": 3.362914024480601, + "learning_rate": 5.292756237867074e-06, + "loss": 0.4627, "step": 14157 }, { - "epoch": 2.968756552736423, - "grad_norm": 4.744417334818594, - "learning_rate": 5.688659373450644e-09, - "loss": 0.1156, + "epoch": 1.9991527817000847, + "grad_norm": 3.1203553867958935, + "learning_rate": 5.291411297728027e-06, + "loss": 0.4926, "step": 14158 }, { - "epoch": 2.96896624030195, - "grad_norm": 3.9765035255092926, - "learning_rate": 5.61256488344597e-09, - "loss": 0.1157, + "epoch": 1.9992939847500706, + "grad_norm": 3.6160683926808073, + "learning_rate": 5.290066467014545e-06, + "loss": 0.5583, "step": 14159 }, { - "epoch": 2.9691759278674774, - "grad_norm": 3.626478872507043, - "learning_rate": 5.536982623131071e-09, - "loss": 0.1292, + "epoch": 1.9994351878000565, + "grad_norm": 3.450972404441004, + "learning_rate": 5.2887217457578856e-06, + "loss": 0.4696, "step": 14160 }, { - "epoch": 2.969385615433005, - "grad_norm": 3.034996859619405, - "learning_rate": 5.4619125963806255e-09, - "loss": 0.0909, + "epoch": 1.9995763908500424, + "grad_norm": 2.8505279836101414, + "learning_rate": 5.2873771339892925e-06, + "loss": 0.4525, "step": 14161 }, { - "epoch": 2.969595302998532, - "grad_norm": 3.5489429510148462, - "learning_rate": 5.3873548070426665e-09, - "loss": 0.087, + "epoch": 1.9997175939000282, + "grad_norm": 3.721099729355444, + "learning_rate": 5.286032631740023e-06, + "loss": 0.533, "step": 14162 }, { - "epoch": 2.9698049905640596, - "grad_norm": 4.456468645690029, - "learning_rate": 5.313309258936361e-09, - "loss": 0.1391, + "epoch": 1.9998587969500141, + "grad_norm": 2.946940992759772, + "learning_rate": 5.2846882390413214e-06, + "loss": 0.4195, "step": 14163 }, { - "epoch": 2.970014678129587, - "grad_norm": 4.3669368723374715, - "learning_rate": 5.239775955858673e-09, - "loss": 0.1327, + "epoch": 2.0, + "grad_norm": 2.925316863283814, + "learning_rate": 5.283343955924427e-06, + "loss": 0.3603, "step": 14164 }, { - "epoch": 2.970224365695114, - "grad_norm": 3.453636289223555, - "learning_rate": 5.166754901576587e-09, - "loss": 0.1083, + "epoch": 2.000141203049986, + "grad_norm": 2.764313582256329, + "learning_rate": 5.281999782420584e-06, + "loss": 0.2957, "step": 14165 }, { - "epoch": 2.9704340532606417, - "grad_norm": 3.774700351581176, - "learning_rate": 5.094246099833777e-09, - "loss": 0.1323, + "epoch": 2.0002824060999718, + "grad_norm": 2.0027093765135153, + "learning_rate": 5.280655718561026e-06, + "loss": 0.2162, "step": 14166 }, { - "epoch": 2.970643740826169, - "grad_norm": 4.406581763555014, - "learning_rate": 5.0222495543461595e-09, - "loss": 0.1447, + "epoch": 2.0004236091499576, + "grad_norm": 2.718105164338127, + "learning_rate": 5.2793117643769935e-06, + "loss": 0.2292, "step": 14167 }, { - "epoch": 2.9708534283916963, - "grad_norm": 4.036319666846888, - "learning_rate": 4.9507652688041145e-09, - "loss": 0.1455, + "epoch": 2.0005648121999435, + "grad_norm": 2.3104670680762873, + "learning_rate": 5.2779679198997145e-06, + "loss": 0.1983, "step": 14168 }, { - "epoch": 2.971063115957224, - "grad_norm": 3.9097460877956274, - "learning_rate": 4.879793246870268e-09, - "loss": 0.1304, + "epoch": 2.0007060152499294, + "grad_norm": 2.426336070834441, + "learning_rate": 5.2766241851604225e-06, + "loss": 0.2435, "step": 14169 }, { - "epoch": 2.9712728035227514, - "grad_norm": 3.4211235322165545, - "learning_rate": 4.809333492183932e-09, - "loss": 0.1398, + "epoch": 2.0008472182999153, + "grad_norm": 2.547201164183543, + "learning_rate": 5.275280560190346e-06, + "loss": 0.2368, "step": 14170 }, { - "epoch": 2.9714824910882784, - "grad_norm": 3.707036201767182, - "learning_rate": 4.739386008353331e-09, - "loss": 0.0915, + "epoch": 2.000988421349901, + "grad_norm": 2.7535339666019767, + "learning_rate": 5.2739370450207075e-06, + "loss": 0.2264, "step": 14171 }, { - "epoch": 2.971692178653806, - "grad_norm": 3.4758162801310193, - "learning_rate": 4.669950798965595e-09, - "loss": 0.119, + "epoch": 2.001129624399887, + "grad_norm": 1.9715003169700214, + "learning_rate": 5.27259363968273e-06, + "loss": 0.201, "step": 14172 }, { - "epoch": 2.971901866219333, - "grad_norm": 3.667608740590732, - "learning_rate": 4.601027867580099e-09, - "loss": 0.1155, + "epoch": 2.001270827449873, + "grad_norm": 2.1153406530200263, + "learning_rate": 5.2712503442076325e-06, + "loss": 0.1899, "step": 14173 }, { - "epoch": 2.9721115537848606, - "grad_norm": 4.607440536294672, - "learning_rate": 4.532617217726243e-09, - "loss": 0.1543, + "epoch": 2.001412030499859, + "grad_norm": 1.8926984858194746, + "learning_rate": 5.269907158626639e-06, + "loss": 0.2006, "step": 14174 }, { - "epoch": 2.9723212413503877, - "grad_norm": 3.3147481178439575, - "learning_rate": 4.464718852913441e-09, - "loss": 0.1157, + "epoch": 2.0015532335498447, + "grad_norm": 2.5361594730093224, + "learning_rate": 5.268564082970964e-06, + "loss": 0.2619, "step": 14175 }, { - "epoch": 2.972530928915915, - "grad_norm": 3.61358622771724, - "learning_rate": 4.397332776620023e-09, - "loss": 0.1133, + "epoch": 2.0016944365998306, + "grad_norm": 2.4405677275249693, + "learning_rate": 5.267221117271812e-06, + "loss": 0.1937, "step": 14176 }, { - "epoch": 2.9727406164814427, - "grad_norm": 5.71801667384248, - "learning_rate": 4.33045899229878e-09, - "loss": 0.1635, + "epoch": 2.0018356396498165, + "grad_norm": 2.0369341148734055, + "learning_rate": 5.265878261560397e-06, + "loss": 0.1733, "step": 14177 }, { - "epoch": 2.97295030404697, - "grad_norm": 4.847679761357739, - "learning_rate": 4.264097503379194e-09, - "loss": 0.1331, + "epoch": 2.0019768426998024, + "grad_norm": 2.4771319069289834, + "learning_rate": 5.264535515867924e-06, + "loss": 0.1934, "step": 14178 }, { - "epoch": 2.9731599916124973, - "grad_norm": 3.3976399018744523, - "learning_rate": 4.198248313260767e-09, - "loss": 0.0992, + "epoch": 2.0021180457497882, + "grad_norm": 2.494853993603759, + "learning_rate": 5.2631928802256024e-06, + "loss": 0.2219, "step": 14179 }, { - "epoch": 2.973369679178025, - "grad_norm": 3.613015178730115, - "learning_rate": 4.132911425318575e-09, - "loss": 0.0826, + "epoch": 2.002259248799774, + "grad_norm": 2.2666890200972833, + "learning_rate": 5.261850354664633e-06, + "loss": 0.172, "step": 14180 }, { - "epoch": 2.973579366743552, - "grad_norm": 4.159919588986853, - "learning_rate": 4.068086842902163e-09, - "loss": 0.1382, + "epoch": 2.00240045184976, + "grad_norm": 1.9982746863492475, + "learning_rate": 5.260507939216217e-06, + "loss": 0.1841, "step": 14181 }, { - "epoch": 2.9737890543090795, - "grad_norm": 4.817047176948955, - "learning_rate": 4.0037745693333185e-09, - "loss": 0.1166, + "epoch": 2.002541654899746, + "grad_norm": 2.1616349671330943, + "learning_rate": 5.259165633911549e-06, + "loss": 0.1363, "step": 14182 }, { - "epoch": 2.973998741874607, - "grad_norm": 6.062466634057586, - "learning_rate": 3.939974607908293e-09, - "loss": 0.1548, + "epoch": 2.0026828579497318, + "grad_norm": 2.2446598198390384, + "learning_rate": 5.257823438781823e-06, + "loss": 0.14, "step": 14183 }, { - "epoch": 2.974208429440134, - "grad_norm": 3.5185610690126508, - "learning_rate": 3.876686961896692e-09, - "loss": 0.1116, + "epoch": 2.0028240609997177, + "grad_norm": 1.8597113972274455, + "learning_rate": 5.256481353858234e-06, + "loss": 0.1464, "step": 14184 }, { - "epoch": 2.9744181170056616, - "grad_norm": 4.937806368366158, - "learning_rate": 3.813911634542589e-09, - "loss": 0.1549, + "epoch": 2.0029652640497035, + "grad_norm": 2.795630820238163, + "learning_rate": 5.255139379171968e-06, + "loss": 0.1914, "step": 14185 }, { - "epoch": 2.974627804571189, - "grad_norm": 3.997290833991194, - "learning_rate": 3.751648629063409e-09, - "loss": 0.1069, + "epoch": 2.0031064670996894, + "grad_norm": 2.0371779665190366, + "learning_rate": 5.253797514754214e-06, + "loss": 0.1248, "step": 14186 }, { - "epoch": 2.974837492136716, - "grad_norm": 4.13437208317382, - "learning_rate": 3.689897948648824e-09, - "loss": 0.1478, + "epoch": 2.0032476701496753, + "grad_norm": 3.0332761946912634, + "learning_rate": 5.252455760636157e-06, + "loss": 0.166, "step": 14187 }, { - "epoch": 2.9750471797022437, - "grad_norm": 3.4103122089167477, - "learning_rate": 3.6286595964651893e-09, - "loss": 0.1006, + "epoch": 2.003388873199661, + "grad_norm": 2.485954602177113, + "learning_rate": 5.2511141168489764e-06, + "loss": 0.1889, "step": 14188 }, { - "epoch": 2.9752568672677713, - "grad_norm": 4.0256495984750575, - "learning_rate": 3.5679335756511057e-09, - "loss": 0.1235, + "epoch": 2.003530076249647, + "grad_norm": 3.085159638632945, + "learning_rate": 5.249772583423855e-06, + "loss": 0.1583, "step": 14189 }, { - "epoch": 2.9754665548332984, - "grad_norm": 4.039987075504377, - "learning_rate": 3.507719889317418e-09, - "loss": 0.1416, + "epoch": 2.003671279299633, + "grad_norm": 3.337559871531928, + "learning_rate": 5.248431160391963e-06, + "loss": 0.1878, "step": 14190 }, { - "epoch": 2.975676242398826, - "grad_norm": 4.676280311929736, - "learning_rate": 3.4480185405516562e-09, - "loss": 0.1797, + "epoch": 2.003812482349619, + "grad_norm": 2.8635680750214925, + "learning_rate": 5.2470898477844815e-06, + "loss": 0.1679, "step": 14191 }, { - "epoch": 2.975885929964353, - "grad_norm": 4.245659538942573, - "learning_rate": 3.3888295324124854e-09, - "loss": 0.1625, + "epoch": 2.0039536853996047, + "grad_norm": 2.98437199484801, + "learning_rate": 5.245748645632583e-06, + "loss": 0.1937, "step": 14192 }, { - "epoch": 2.9760956175298805, - "grad_norm": 4.885486084064281, - "learning_rate": 3.3301528679341444e-09, - "loss": 0.1249, + "epoch": 2.0040948884495906, + "grad_norm": 2.384055370766391, + "learning_rate": 5.24440755396743e-06, + "loss": 0.1667, "step": 14193 }, { - "epoch": 2.9763053050954076, - "grad_norm": 3.753814168439786, - "learning_rate": 3.2719885501242276e-09, - "loss": 0.1369, + "epoch": 2.0042360914995765, + "grad_norm": 3.3477950157204623, + "learning_rate": 5.24306657282019e-06, + "loss": 0.224, "step": 14194 }, { - "epoch": 2.976514992660935, - "grad_norm": 3.46367356200396, - "learning_rate": 3.214336581963684e-09, - "loss": 0.0988, + "epoch": 2.0043772945495624, + "grad_norm": 3.1348614932761243, + "learning_rate": 5.241725702222027e-06, + "loss": 0.1678, "step": 14195 }, { - "epoch": 2.9767246802264626, - "grad_norm": 3.2551930545368846, - "learning_rate": 3.157196966406817e-09, - "loss": 0.0713, + "epoch": 2.0045184975995483, + "grad_norm": 2.5593119380690936, + "learning_rate": 5.240384942204105e-06, + "loss": 0.127, "step": 14196 }, { - "epoch": 2.9769343677919897, - "grad_norm": 3.6762343714189556, - "learning_rate": 3.1005697063812845e-09, - "loss": 0.117, + "epoch": 2.004659700649534, + "grad_norm": 3.4366944809424167, + "learning_rate": 5.2390442927975835e-06, + "loss": 0.2114, "step": 14197 }, { - "epoch": 2.9771440553575172, - "grad_norm": 4.195869216396358, - "learning_rate": 3.04445480479143e-09, - "loss": 0.1307, + "epoch": 2.00480090369952, + "grad_norm": 2.981538377243886, + "learning_rate": 5.237703754033616e-06, + "loss": 0.1667, "step": 14198 }, { - "epoch": 2.9773537429230448, - "grad_norm": 4.830250833372161, - "learning_rate": 2.9888522645116215e-09, - "loss": 0.1115, + "epoch": 2.004942106749506, + "grad_norm": 2.7998566495326433, + "learning_rate": 5.236363325943357e-06, + "loss": 0.1405, "step": 14199 }, { - "epoch": 2.977563430488572, - "grad_norm": 4.068902606082635, - "learning_rate": 2.9337620883918004e-09, - "loss": 0.1408, + "epoch": 2.005083309799492, + "grad_norm": 2.879230384218745, + "learning_rate": 5.235023008557955e-06, + "loss": 0.2018, "step": 14200 }, { - "epoch": 2.9777731180540994, - "grad_norm": 3.928014834035326, - "learning_rate": 2.879184279256375e-09, - "loss": 0.1511, + "epoch": 2.0052245128494777, + "grad_norm": 2.904074177485924, + "learning_rate": 5.2336828019085616e-06, + "loss": 0.2007, "step": 14201 }, { - "epoch": 2.977982805619627, - "grad_norm": 4.174579938672508, - "learning_rate": 2.8251188399008865e-09, - "loss": 0.1077, + "epoch": 2.0053657158994636, + "grad_norm": 3.241188879297599, + "learning_rate": 5.232342706026323e-06, + "loss": 0.2035, "step": 14202 }, { - "epoch": 2.978192493185154, - "grad_norm": 4.472218819325068, - "learning_rate": 2.771565773097562e-09, - "loss": 0.1558, + "epoch": 2.0055069189494494, + "grad_norm": 3.213711204145903, + "learning_rate": 5.231002720942379e-06, + "loss": 0.1945, "step": 14203 }, { - "epoch": 2.9784021807506815, - "grad_norm": 3.378865315142501, - "learning_rate": 2.7185250815908728e-09, - "loss": 0.1181, + "epoch": 2.0056481219994353, + "grad_norm": 2.8725007667883475, + "learning_rate": 5.229662846687873e-06, + "loss": 0.1774, "step": 14204 }, { - "epoch": 2.978611868316209, - "grad_norm": 3.8621367295032343, - "learning_rate": 2.6659967680997544e-09, - "loss": 0.0962, + "epoch": 2.005789325049421, + "grad_norm": 3.027188981647017, + "learning_rate": 5.228323083293943e-06, + "loss": 0.1684, "step": 14205 }, { - "epoch": 2.978821555881736, - "grad_norm": 5.053110291358464, - "learning_rate": 2.6139808353153885e-09, - "loss": 0.1412, + "epoch": 2.005930528099407, + "grad_norm": 3.487025672725611, + "learning_rate": 5.226983430791722e-06, + "loss": 0.1932, "step": 14206 }, { - "epoch": 2.9790312434472637, - "grad_norm": 4.463334170656582, - "learning_rate": 2.56247728590453e-09, - "loss": 0.1328, + "epoch": 2.006071731149393, + "grad_norm": 2.5140653643065196, + "learning_rate": 5.225643889212342e-06, + "loss": 0.1697, "step": 14207 }, { - "epoch": 2.979240931012791, - "grad_norm": 3.6619533364431383, - "learning_rate": 2.5114861225061795e-09, - "loss": 0.0833, + "epoch": 2.006212934199379, + "grad_norm": 2.9079514481862323, + "learning_rate": 5.22430445858694e-06, + "loss": 0.17, "step": 14208 }, { - "epoch": 2.9794506185783183, - "grad_norm": 5.613834569852398, - "learning_rate": 2.4610073477338013e-09, - "loss": 0.1355, + "epoch": 2.0063541372493647, + "grad_norm": 3.092545711536245, + "learning_rate": 5.2229651389466425e-06, + "loss": 0.1615, "step": 14209 }, { - "epoch": 2.979660306143846, - "grad_norm": 6.845096687523262, - "learning_rate": 2.411040964174216e-09, - "loss": 0.1855, + "epoch": 2.0064953402993506, + "grad_norm": 2.358912403650366, + "learning_rate": 5.221625930322571e-06, + "loss": 0.1576, "step": 14210 }, { - "epoch": 2.979869993709373, - "grad_norm": 4.105777188877517, - "learning_rate": 2.3615869743887075e-09, - "loss": 0.1076, + "epoch": 2.0066365433493365, + "grad_norm": 3.230862535969985, + "learning_rate": 5.220286832745847e-06, + "loss": 0.2036, "step": 14211 }, { - "epoch": 2.9800796812749004, - "grad_norm": 7.2156246461822775, - "learning_rate": 2.312645380913026e-09, - "loss": 0.1738, + "epoch": 2.0067777463993224, + "grad_norm": 3.2766740755668966, + "learning_rate": 5.2189478462475905e-06, + "loss": 0.1743, "step": 14212 }, { - "epoch": 2.9802893688404275, - "grad_norm": 4.228085455036401, - "learning_rate": 2.2642161862529434e-09, - "loss": 0.1382, + "epoch": 2.0069189494493083, + "grad_norm": 2.576363529450593, + "learning_rate": 5.217608970858923e-06, + "loss": 0.1315, "step": 14213 }, { - "epoch": 2.980499056405955, - "grad_norm": 5.241006719368144, - "learning_rate": 2.2162993928931397e-09, - "loss": 0.1529, + "epoch": 2.007060152499294, + "grad_norm": 3.175153181539466, + "learning_rate": 5.216270206610959e-06, + "loss": 0.18, "step": 14214 }, { - "epoch": 2.9807087439714826, - "grad_norm": 5.091989618735093, - "learning_rate": 2.1688950032883183e-09, - "loss": 0.1724, + "epoch": 2.00720135554928, + "grad_norm": 3.011573116576983, + "learning_rate": 5.21493155353481e-06, + "loss": 0.2171, "step": 14215 }, { - "epoch": 2.9809184315370096, - "grad_norm": 4.055609026644988, - "learning_rate": 2.1220030198665366e-09, - "loss": 0.1268, + "epoch": 2.007342558599266, + "grad_norm": 3.943489728370928, + "learning_rate": 5.213593011661584e-06, + "loss": 0.1617, "step": 14216 }, { - "epoch": 2.981128119102537, - "grad_norm": 4.4428377679534385, - "learning_rate": 2.0756234450336475e-09, - "loss": 0.1443, + "epoch": 2.007483761649252, + "grad_norm": 3.1586607506246343, + "learning_rate": 5.21225458102239e-06, + "loss": 0.2609, "step": 14217 }, { - "epoch": 2.9813378066680647, - "grad_norm": 4.169969193504995, - "learning_rate": 2.029756281165529e-09, - "loss": 0.1151, + "epoch": 2.0076249646992377, + "grad_norm": 2.7151740132946847, + "learning_rate": 5.2109162616483325e-06, + "loss": 0.1475, "step": 14218 }, { - "epoch": 2.9815474942335918, - "grad_norm": 3.324104290161633, - "learning_rate": 1.984401530612523e-09, - "loss": 0.096, + "epoch": 2.0077661677492236, + "grad_norm": 2.938301378282818, + "learning_rate": 5.209578053570512e-06, + "loss": 0.142, "step": 14219 }, { - "epoch": 2.9817571817991193, - "grad_norm": 4.090981989088206, - "learning_rate": 1.939559195700547e-09, - "loss": 0.1211, + "epoch": 2.0079073707992094, + "grad_norm": 2.828506970847201, + "learning_rate": 5.20823995682003e-06, + "loss": 0.1729, "step": 14220 }, { - "epoch": 2.981966869364647, - "grad_norm": 4.746819267430773, - "learning_rate": 1.8952292787266514e-09, - "loss": 0.1231, + "epoch": 2.0080485738491953, + "grad_norm": 2.825051815880469, + "learning_rate": 5.20690197142798e-06, + "loss": 0.1851, "step": 14221 }, { - "epoch": 2.982176556930174, - "grad_norm": 5.209049922443455, - "learning_rate": 1.8514117819623534e-09, - "loss": 0.1256, + "epoch": 2.008189776899181, + "grad_norm": 5.26932705193779, + "learning_rate": 5.205564097425458e-06, + "loss": 0.2492, "step": 14222 }, { - "epoch": 2.9823862444957014, - "grad_norm": 6.032526163186445, - "learning_rate": 1.8081067076547443e-09, - "loss": 0.1765, + "epoch": 2.008330979949167, + "grad_norm": 2.572939951277935, + "learning_rate": 5.204226334843558e-06, + "loss": 0.1443, "step": 14223 }, { - "epoch": 2.982595932061229, - "grad_norm": 4.949502494411583, - "learning_rate": 1.7653140580220495e-09, - "loss": 0.1297, + "epoch": 2.008472182999153, + "grad_norm": 2.377493368791424, + "learning_rate": 5.2028886837133605e-06, + "loss": 0.1729, "step": 14224 }, { - "epoch": 2.982805619626756, - "grad_norm": 4.369051221594472, - "learning_rate": 1.7230338352591803e-09, - "loss": 0.1643, + "epoch": 2.008613386049139, + "grad_norm": 3.0403590517460297, + "learning_rate": 5.201551144065964e-06, + "loss": 0.1894, "step": 14225 }, { - "epoch": 2.9830153071922836, - "grad_norm": 3.6989507289178816, - "learning_rate": 1.681266041531071e-09, - "loss": 0.1491, + "epoch": 2.0087545890991247, + "grad_norm": 2.9680354506223585, + "learning_rate": 5.200213715932449e-06, + "loss": 0.1945, "step": 14226 }, { - "epoch": 2.983224994757811, - "grad_norm": 4.150111420832969, - "learning_rate": 1.6400106789793424e-09, - "loss": 0.1247, + "epoch": 2.0088957921491106, + "grad_norm": 2.776244738273056, + "learning_rate": 5.198876399343893e-06, + "loss": 0.1701, "step": 14227 }, { - "epoch": 2.983434682323338, - "grad_norm": 3.1265917227548012, - "learning_rate": 1.5992677497178587e-09, - "loss": 0.0898, + "epoch": 2.0090369951990965, + "grad_norm": 3.739130407624207, + "learning_rate": 5.197539194331373e-06, + "loss": 0.1941, "step": 14228 }, { - "epoch": 2.9836443698888657, - "grad_norm": 3.944158507349355, - "learning_rate": 1.55903725583606e-09, - "loss": 0.1542, + "epoch": 2.0091781982490824, + "grad_norm": 2.863443668479093, + "learning_rate": 5.196202100925967e-06, + "loss": 0.1955, "step": 14229 }, { - "epoch": 2.983854057454393, - "grad_norm": 3.3946792700038815, - "learning_rate": 1.51931919939452e-09, - "loss": 0.1065, + "epoch": 2.0093194012990683, + "grad_norm": 2.2225374371218947, + "learning_rate": 5.194865119158752e-06, + "loss": 0.1299, "step": 14230 }, { - "epoch": 2.9840637450199203, - "grad_norm": 4.300415961632463, - "learning_rate": 1.480113582430498e-09, - "loss": 0.1214, + "epoch": 2.009460604349054, + "grad_norm": 3.004502551734456, + "learning_rate": 5.1935282490607964e-06, + "loss": 0.1695, "step": 14231 }, { - "epoch": 2.9842734325854474, - "grad_norm": 4.162462353786712, - "learning_rate": 1.441420406951277e-09, - "loss": 0.1317, + "epoch": 2.00960180739904, + "grad_norm": 2.591361505249893, + "learning_rate": 5.192191490663168e-06, + "loss": 0.1569, "step": 14232 }, { - "epoch": 2.984483120150975, - "grad_norm": 4.793681544454836, - "learning_rate": 1.403239674940826e-09, - "loss": 0.1837, + "epoch": 2.0097430104490255, + "grad_norm": 2.523837340637139, + "learning_rate": 5.190854843996934e-06, + "loss": 0.189, "step": 14233 }, { - "epoch": 2.9846928077165025, - "grad_norm": 3.9930645499280746, - "learning_rate": 1.365571388356468e-09, - "loss": 0.1218, + "epoch": 2.0098842134990114, + "grad_norm": 3.2449230023383038, + "learning_rate": 5.189518309093157e-06, + "loss": 0.1788, "step": 14234 }, { - "epoch": 2.9849024952820296, - "grad_norm": 4.503888404654512, - "learning_rate": 1.3284155491277706e-09, - "loss": 0.154, + "epoch": 2.0100254165489972, + "grad_norm": 2.792490598095333, + "learning_rate": 5.188181885982896e-06, + "loss": 0.1572, "step": 14235 }, { - "epoch": 2.985112182847557, - "grad_norm": 3.5728393507715244, - "learning_rate": 1.2917721591598764e-09, - "loss": 0.1022, + "epoch": 2.010166619598983, + "grad_norm": 2.957272892195983, + "learning_rate": 5.18684557469721e-06, + "loss": 0.1689, "step": 14236 }, { - "epoch": 2.9853218704130846, - "grad_norm": 3.774709005663481, - "learning_rate": 1.2556412203301727e-09, - "loss": 0.1492, + "epoch": 2.010307822648969, + "grad_norm": 3.074026717603523, + "learning_rate": 5.185509375267155e-06, + "loss": 0.2192, "step": 14237 }, { - "epoch": 2.9855315579786117, - "grad_norm": 4.30946456616066, - "learning_rate": 1.2200227344916216e-09, - "loss": 0.1868, + "epoch": 2.010449025698955, + "grad_norm": 2.9301614419600104, + "learning_rate": 5.184173287723782e-06, + "loss": 0.1761, "step": 14238 }, { - "epoch": 2.985741245544139, - "grad_norm": 4.5471076876092855, - "learning_rate": 1.1849167034683196e-09, - "loss": 0.1598, + "epoch": 2.0105902287489408, + "grad_norm": 3.2150119772573413, + "learning_rate": 5.182837312098141e-06, + "loss": 0.1863, "step": 14239 }, { - "epoch": 2.9859509331096667, - "grad_norm": 4.80132463610375, - "learning_rate": 1.150323129059938e-09, - "loss": 0.1626, + "epoch": 2.0107314317989267, + "grad_norm": 2.674874134693218, + "learning_rate": 5.1815014484212825e-06, + "loss": 0.15, "step": 14240 }, { - "epoch": 2.986160620675194, - "grad_norm": 3.6972411754906074, - "learning_rate": 1.1162420130395036e-09, - "loss": 0.1489, + "epoch": 2.0108726348489125, + "grad_norm": 2.730382139777421, + "learning_rate": 5.1801656967242446e-06, + "loss": 0.1965, "step": 14241 }, { - "epoch": 2.9863703082407214, - "grad_norm": 4.252336012160244, - "learning_rate": 1.0826733571545068e-09, - "loss": 0.1735, + "epoch": 2.0110138378988984, + "grad_norm": 3.4104860281630245, + "learning_rate": 5.178830057038079e-06, + "loss": 0.2252, "step": 14242 }, { - "epoch": 2.986579995806249, - "grad_norm": 4.622739605589684, - "learning_rate": 1.0496171631246831e-09, - "loss": 0.1614, + "epoch": 2.0111550409488843, + "grad_norm": 2.8803258837059036, + "learning_rate": 5.177494529393824e-06, + "loss": 0.1883, "step": 14243 }, { - "epoch": 2.986789683371776, - "grad_norm": 4.575671271683195, - "learning_rate": 1.0170734326442332e-09, - "loss": 0.1383, + "epoch": 2.01129624399887, + "grad_norm": 3.8409567415236, + "learning_rate": 5.1761591138225096e-06, + "loss": 0.164, "step": 14244 }, { - "epoch": 2.9869993709373035, - "grad_norm": 4.055892033166719, - "learning_rate": 9.85042167380712e-10, - "loss": 0.136, + "epoch": 2.011437447048856, + "grad_norm": 2.963409400142365, + "learning_rate": 5.174823810355174e-06, + "loss": 0.1798, "step": 14245 }, { - "epoch": 2.987209058502831, - "grad_norm": 3.718926650698118, - "learning_rate": 9.535233689761392e-10, - "loss": 0.1122, + "epoch": 2.011578650098842, + "grad_norm": 3.9543857642348845, + "learning_rate": 5.1734886190228496e-06, + "loss": 0.2037, "step": 14246 }, { - "epoch": 2.987418746068358, - "grad_norm": 5.665129866823334, - "learning_rate": 9.225170390469995e-10, - "loss": 0.1379, + "epoch": 2.011719853148828, + "grad_norm": 2.60517873264044, + "learning_rate": 5.1721535398565616e-06, + "loss": 0.1424, "step": 14247 }, { - "epoch": 2.9876284336338856, - "grad_norm": 3.9556185589182222, - "learning_rate": 8.920231791798017e-10, - "loss": 0.1131, + "epoch": 2.0118610561988137, + "grad_norm": 6.411939889063179, + "learning_rate": 5.170818572887344e-06, + "loss": 0.1927, "step": 14248 }, { - "epoch": 2.9878381211994127, - "grad_norm": 4.8565627229061015, - "learning_rate": 8.620417909399603e-10, - "loss": 0.1106, + "epoch": 2.0120022592487996, + "grad_norm": 3.2834816983888095, + "learning_rate": 5.169483718146216e-06, + "loss": 0.1621, "step": 14249 }, { - "epoch": 2.9880478087649402, - "grad_norm": 5.022450264050266, - "learning_rate": 8.325728758629137e-10, - "loss": 0.1611, + "epoch": 2.0121434622987855, + "grad_norm": 3.5214453367225875, + "learning_rate": 5.168148975664203e-06, + "loss": 0.1794, "step": 14250 }, { - "epoch": 2.9882574963304673, - "grad_norm": 5.32275588463611, - "learning_rate": 8.036164354596753e-10, - "loss": 0.1456, + "epoch": 2.0122846653487714, + "grad_norm": 3.499273405943138, + "learning_rate": 5.166814345472322e-06, + "loss": 0.2308, "step": 14251 }, { - "epoch": 2.988467183895995, - "grad_norm": 4.717961748688537, - "learning_rate": 7.751724712135034e-10, - "loss": 0.1316, + "epoch": 2.0124258683987573, + "grad_norm": 2.922330510282529, + "learning_rate": 5.165479827601583e-06, + "loss": 0.1481, "step": 14252 }, { - "epoch": 2.9886768714615224, - "grad_norm": 5.079328021821132, - "learning_rate": 7.472409845821205e-10, - "loss": 0.1571, + "epoch": 2.012567071448743, + "grad_norm": 3.903078103500291, + "learning_rate": 5.164145422083007e-06, + "loss": 0.2282, "step": 14253 }, { - "epoch": 2.9888865590270495, - "grad_norm": 4.578899448642258, - "learning_rate": 7.198219769977144e-10, - "loss": 0.1626, + "epoch": 2.012708274498729, + "grad_norm": 2.5586107285867854, + "learning_rate": 5.1628111289476025e-06, + "loss": 0.1487, "step": 14254 }, { - "epoch": 2.989096246592577, - "grad_norm": 5.765370571071565, - "learning_rate": 6.929154498658275e-10, - "loss": 0.185, + "epoch": 2.012849477548715, + "grad_norm": 2.4014701856486385, + "learning_rate": 5.161476948226381e-06, + "loss": 0.157, "step": 14255 }, { - "epoch": 2.9893059341581045, - "grad_norm": 4.101698138628173, - "learning_rate": 6.665214045642466e-10, - "loss": 0.1459, + "epoch": 2.012990680598701, + "grad_norm": 3.03918997455723, + "learning_rate": 5.160142879950343e-06, + "loss": 0.185, "step": 14256 }, { - "epoch": 2.9895156217236316, - "grad_norm": 6.011572093054696, - "learning_rate": 6.406398424463334e-10, - "loss": 0.1642, + "epoch": 2.0131318836486867, + "grad_norm": 3.93537986322291, + "learning_rate": 5.158808924150496e-06, + "loss": 0.2293, "step": 14257 }, { - "epoch": 2.989725309289159, - "grad_norm": 3.406181106997159, - "learning_rate": 6.152707648388045e-10, - "loss": 0.1187, + "epoch": 2.0132730866986726, + "grad_norm": 4.104975124740706, + "learning_rate": 5.157475080857838e-06, + "loss": 0.1952, "step": 14258 }, { - "epoch": 2.9899349968546867, - "grad_norm": 5.184306824398564, - "learning_rate": 5.904141730417312e-10, - "loss": 0.1352, + "epoch": 2.0134142897486584, + "grad_norm": 3.0242805165582864, + "learning_rate": 5.156141350103364e-06, + "loss": 0.1816, "step": 14259 }, { - "epoch": 2.9901446844202137, - "grad_norm": 3.4342081212772215, - "learning_rate": 5.660700683296494e-10, - "loss": 0.1024, + "epoch": 2.0135554927986443, + "grad_norm": 2.7964190397144124, + "learning_rate": 5.154807731918081e-06, + "loss": 0.1841, "step": 14260 }, { - "epoch": 2.9903543719857413, - "grad_norm": 4.716916031842858, - "learning_rate": 5.422384519493395e-10, - "loss": 0.1497, + "epoch": 2.01369669584863, + "grad_norm": 2.8153029263490117, + "learning_rate": 5.1534742263329705e-06, + "loss": 0.1457, "step": 14261 }, { - "epoch": 2.990564059551269, - "grad_norm": 4.631812140620607, - "learning_rate": 5.18919325122047e-10, - "loss": 0.112, + "epoch": 2.013837898898616, + "grad_norm": 4.333958228662656, + "learning_rate": 5.152140833379025e-06, + "loss": 0.2535, "step": 14262 }, { - "epoch": 2.990773747116796, - "grad_norm": 3.108250847427697, - "learning_rate": 4.96112689043482e-10, - "loss": 0.1021, + "epoch": 2.013979101948602, + "grad_norm": 2.8187272028955217, + "learning_rate": 5.150807553087234e-06, + "loss": 0.1274, "step": 14263 }, { - "epoch": 2.9909834346823234, - "grad_norm": 4.221239962168933, - "learning_rate": 4.738185448827093e-10, - "loss": 0.1362, + "epoch": 2.014120304998588, + "grad_norm": 3.7289097788501087, + "learning_rate": 5.1494743854885755e-06, + "loss": 0.1642, "step": 14264 }, { - "epoch": 2.991193122247851, - "grad_norm": 4.145358739628082, - "learning_rate": 4.520368937810382e-10, - "loss": 0.1159, + "epoch": 2.0142615080485737, + "grad_norm": 2.9142387587933034, + "learning_rate": 5.148141330614043e-06, + "loss": 0.1559, "step": 14265 }, { - "epoch": 2.991402809813378, - "grad_norm": 3.9226733597650285, - "learning_rate": 4.307677368564633e-10, - "loss": 0.1304, + "epoch": 2.0144027110985596, + "grad_norm": 3.424627146168462, + "learning_rate": 5.1468083884946104e-06, + "loss": 0.2203, "step": 14266 }, { - "epoch": 2.9916124973789056, - "grad_norm": 3.6770658974418744, - "learning_rate": 4.1001107519922365e-10, - "loss": 0.1336, + "epoch": 2.0145439141485455, + "grad_norm": 2.8524654714765565, + "learning_rate": 5.145475559161255e-06, + "loss": 0.1395, "step": 14267 }, { - "epoch": 2.9918221849444326, - "grad_norm": 3.8167229222572634, - "learning_rate": 3.8976690987180265e-10, - "loss": 0.1163, + "epoch": 2.0146851171985314, + "grad_norm": 3.356551840326665, + "learning_rate": 5.144142842644954e-06, + "loss": 0.1992, "step": 14268 }, { - "epoch": 2.99203187250996, - "grad_norm": 4.153900105342872, - "learning_rate": 3.700352419111486e-10, - "loss": 0.1235, + "epoch": 2.0148263202485173, + "grad_norm": 2.9127948550316467, + "learning_rate": 5.14281023897667e-06, + "loss": 0.1724, "step": 14269 }, { - "epoch": 2.9922415600754873, - "grad_norm": 4.695739705732879, - "learning_rate": 3.508160723308951e-10, - "loss": 0.1417, + "epoch": 2.014967523298503, + "grad_norm": 2.9915798834247758, + "learning_rate": 5.14147774818738e-06, + "loss": 0.1616, "step": 14270 }, { - "epoch": 2.9924512476410148, - "grad_norm": 4.184957444078295, - "learning_rate": 3.3210940211469977e-10, - "loss": 0.144, + "epoch": 2.015108726348489, + "grad_norm": 2.754564000908517, + "learning_rate": 5.140145370308052e-06, + "loss": 0.1849, "step": 14271 }, { - "epoch": 2.9926609352065423, - "grad_norm": 3.5827991661900027, - "learning_rate": 3.139152322217953e-10, - "loss": 0.1222, + "epoch": 2.015249929398475, + "grad_norm": 2.7499737192811278, + "learning_rate": 5.138813105369645e-06, + "loss": 0.1643, "step": 14272 }, { - "epoch": 2.9928706227720694, - "grad_norm": 5.0263091575559, - "learning_rate": 2.9623356358365884e-10, - "loss": 0.1634, + "epoch": 2.015391132448461, + "grad_norm": 2.702267368635129, + "learning_rate": 5.137480953403121e-06, + "loss": 0.1241, "step": 14273 }, { - "epoch": 2.993080310337597, - "grad_norm": 3.332992455458001, - "learning_rate": 2.7906439710734255e-10, - "loss": 0.1123, + "epoch": 2.0155323354984467, + "grad_norm": 3.2157430062993133, + "learning_rate": 5.136148914439441e-06, + "loss": 0.2016, "step": 14274 }, { - "epoch": 2.9932899979031244, - "grad_norm": 3.6597966219254774, - "learning_rate": 2.624077336721431e-10, - "loss": 0.0896, + "epoch": 2.0156735385484326, + "grad_norm": 3.7214830687940554, + "learning_rate": 5.134816988509559e-06, + "loss": 0.2341, "step": 14275 }, { - "epoch": 2.9934996854686515, - "grad_norm": 4.049995356358593, - "learning_rate": 2.462635741329322e-10, - "loss": 0.1267, + "epoch": 2.0158147415984184, + "grad_norm": 2.680550947567722, + "learning_rate": 5.133485175644426e-06, + "loss": 0.1556, "step": 14276 }, { - "epoch": 2.993709373034179, - "grad_norm": 4.861911612798047, - "learning_rate": 2.3063191931571583e-10, - "loss": 0.167, + "epoch": 2.0159559446484043, + "grad_norm": 3.493604159202271, + "learning_rate": 5.132153475875003e-06, + "loss": 0.1795, "step": 14277 }, { - "epoch": 2.9939190605997066, - "grad_norm": 3.785223173552741, - "learning_rate": 2.1551277002318515e-10, - "loss": 0.1303, + "epoch": 2.01609714769839, + "grad_norm": 3.5515040373096523, + "learning_rate": 5.130821889232228e-06, + "loss": 0.2041, "step": 14278 }, { - "epoch": 2.9941287481652337, - "grad_norm": 5.943901446434463, - "learning_rate": 2.0090612702805546e-10, - "loss": 0.1445, + "epoch": 2.016238350748376, + "grad_norm": 3.3819112021125464, + "learning_rate": 5.1294904157470494e-06, + "loss": 0.1543, "step": 14279 }, { - "epoch": 2.994338435730761, - "grad_norm": 4.778420101108463, - "learning_rate": 1.868119910819477e-10, - "loss": 0.1707, + "epoch": 2.016379553798362, + "grad_norm": 2.9024671571010217, + "learning_rate": 5.1281590554504095e-06, + "loss": 0.1462, "step": 14280 }, { - "epoch": 2.9945481232962887, - "grad_norm": 4.351730393628573, - "learning_rate": 1.732303629042864e-10, - "loss": 0.1436, + "epoch": 2.016520756848348, + "grad_norm": 2.7384774592747374, + "learning_rate": 5.126827808373245e-06, + "loss": 0.1393, "step": 14281 }, { - "epoch": 2.994757810861816, - "grad_norm": 4.887286616434012, - "learning_rate": 1.6016124319229165e-10, - "loss": 0.1628, + "epoch": 2.0166619598983337, + "grad_norm": 2.780977016349886, + "learning_rate": 5.125496674546502e-06, + "loss": 0.1818, "step": 14282 }, { - "epoch": 2.9949674984273433, - "grad_norm": 4.680603516026108, - "learning_rate": 1.476046326165381e-10, - "loss": 0.165, + "epoch": 2.0168031629483196, + "grad_norm": 3.0958753675088997, + "learning_rate": 5.124165654001111e-06, + "loss": 0.1723, "step": 14283 }, { - "epoch": 2.995177185992871, - "grad_norm": 3.499897399743875, - "learning_rate": 1.3556053181984497e-10, - "loss": 0.1173, + "epoch": 2.0169443659983055, + "grad_norm": 3.0788198776394315, + "learning_rate": 5.122834746768004e-06, + "loss": 0.2036, "step": 14284 }, { - "epoch": 2.995386873558398, - "grad_norm": 3.3287776000213656, - "learning_rate": 1.2402894141949618e-10, - "loss": 0.1009, + "epoch": 2.0170855690482914, + "grad_norm": 2.7302967453617564, + "learning_rate": 5.1215039528781165e-06, + "loss": 0.1617, "step": 14285 }, { - "epoch": 2.9955965611239255, - "grad_norm": 3.924063056275675, - "learning_rate": 1.1300986200724063e-10, - "loss": 0.1432, + "epoch": 2.0172267720982773, + "grad_norm": 2.9371680412750236, + "learning_rate": 5.120173272362361e-06, + "loss": 0.158, "step": 14286 }, { - "epoch": 2.9958062486894526, - "grad_norm": 3.53551096117981, - "learning_rate": 1.0250329414596138e-10, - "loss": 0.1062, + "epoch": 2.017367975148263, + "grad_norm": 2.1490827787917004, + "learning_rate": 5.118842705251677e-06, + "loss": 0.1217, "step": 14287 }, { - "epoch": 2.99601593625498, - "grad_norm": 4.012627932488079, - "learning_rate": 9.250923837633708e-11, - "loss": 0.1499, + "epoch": 2.017509178198249, + "grad_norm": 2.535003464375976, + "learning_rate": 5.117512251576978e-06, + "loss": 0.1514, "step": 14288 }, { - "epoch": 2.996225623820507, - "grad_norm": 3.0910890372762747, - "learning_rate": 8.302769520907028e-11, - "loss": 0.0994, + "epoch": 2.017650381248235, + "grad_norm": 2.7603601249819008, + "learning_rate": 5.1161819113691865e-06, + "loss": 0.2034, "step": 14289 }, { - "epoch": 2.9964353113860347, - "grad_norm": 4.383922935382639, - "learning_rate": 7.405866513043869e-11, - "loss": 0.1428, + "epoch": 2.017791584298221, + "grad_norm": 2.8555176317647097, + "learning_rate": 5.114851684659219e-06, + "loss": 0.1609, "step": 14290 }, { - "epoch": 2.9966449989515622, - "grad_norm": 4.086305329462803, - "learning_rate": 6.560214860007463e-11, - "loss": 0.1404, + "epoch": 2.0179327873482067, + "grad_norm": 3.2143877287287426, + "learning_rate": 5.113521571477988e-06, + "loss": 0.1343, "step": 14291 }, { - "epoch": 2.9968546865170893, - "grad_norm": 3.7436921038095323, - "learning_rate": 5.765814605207532e-11, - "loss": 0.15, + "epoch": 2.0180739903981926, + "grad_norm": 3.4625203700038596, + "learning_rate": 5.112191571856406e-06, + "loss": 0.1658, "step": 14292 }, { - "epoch": 2.997064374082617, - "grad_norm": 3.412975188056128, - "learning_rate": 5.022665789278236e-11, - "loss": 0.1071, + "epoch": 2.0182151934481785, + "grad_norm": 3.026944741739093, + "learning_rate": 5.110861685825377e-06, + "loss": 0.2089, "step": 14293 }, { - "epoch": 2.9972740616481444, - "grad_norm": 4.283350806075584, - "learning_rate": 4.330768450411249e-11, - "loss": 0.1298, + "epoch": 2.0183563964981643, + "grad_norm": 2.158522000218707, + "learning_rate": 5.109531913415819e-06, + "loss": 0.1621, "step": 14294 }, { - "epoch": 2.9974837492136714, - "grad_norm": 4.452789328915619, - "learning_rate": 3.690122623911663e-11, - "loss": 0.1436, + "epoch": 2.0184975995481502, + "grad_norm": 2.6541316140519777, + "learning_rate": 5.108202254658623e-06, + "loss": 0.1305, "step": 14295 }, { - "epoch": 2.997693436779199, - "grad_norm": 4.88171505458831, - "learning_rate": 3.100728342753101e-11, - "loss": 0.1689, + "epoch": 2.018638802598136, + "grad_norm": 2.9064285720761682, + "learning_rate": 5.1068727095846935e-06, + "loss": 0.1067, "step": 14296 }, { - "epoch": 2.9979031243447265, - "grad_norm": 4.569187621717733, - "learning_rate": 2.5625856370226075e-11, - "loss": 0.171, + "epoch": 2.018780005648122, + "grad_norm": 3.495979050414805, + "learning_rate": 5.105543278224929e-06, + "loss": 0.2024, "step": 14297 }, { - "epoch": 2.9981128119102536, - "grad_norm": 4.181243597012907, - "learning_rate": 2.0756945344757585e-11, - "loss": 0.0922, + "epoch": 2.018921208698108, + "grad_norm": 3.284494536045945, + "learning_rate": 5.104213960610223e-06, + "loss": 0.1805, "step": 14298 }, { - "epoch": 2.998322499475781, - "grad_norm": 3.5021110538682705, - "learning_rate": 1.6400550598705266e-11, - "loss": 0.098, + "epoch": 2.0190624117480938, + "grad_norm": 3.2853795213068184, + "learning_rate": 5.102884756771471e-06, + "loss": 0.1939, "step": 14299 }, { - "epoch": 2.9985321870413086, - "grad_norm": 4.318872608265839, - "learning_rate": 1.2556672356334176e-11, - "loss": 0.1184, + "epoch": 2.0192036147980796, + "grad_norm": 3.702290032946965, + "learning_rate": 5.101555666739563e-06, + "loss": 0.2033, "step": 14300 }, { - "epoch": 2.9987418746068357, - "grad_norm": 4.4613017870445635, - "learning_rate": 9.22531081526401e-12, - "loss": 0.1387, + "epoch": 2.0193448178480655, + "grad_norm": 3.445254241275055, + "learning_rate": 5.100226690545389e-06, + "loss": 0.1478, "step": 14301 }, { - "epoch": 2.9989515621723632, - "grad_norm": 3.871335515730445, - "learning_rate": 6.4064661453588916e-12, - "loss": 0.1044, + "epoch": 2.0194860208980514, + "grad_norm": 3.216175143802346, + "learning_rate": 5.098897828219831e-06, + "loss": 0.208, "step": 14302 }, { - "epoch": 2.9991612497378908, - "grad_norm": 4.582158006035162, - "learning_rate": 4.100138489837591e-12, - "loss": 0.1301, + "epoch": 2.0196272239480373, + "grad_norm": 3.4322661040218607, + "learning_rate": 5.097569079793765e-06, + "loss": 0.2012, "step": 14303 }, { - "epoch": 2.999370937303418, - "grad_norm": 6.069782410370943, - "learning_rate": 2.306327969714417e-12, - "loss": 0.1158, + "epoch": 2.019768426998023, + "grad_norm": 3.287857603936467, + "learning_rate": 5.09624044529808e-06, + "loss": 0.189, "step": 14304 }, { - "epoch": 2.9995806248689454, - "grad_norm": 4.739417643890966, - "learning_rate": 1.0250346749174356e-12, - "loss": 0.1418, + "epoch": 2.019909630048009, + "grad_norm": 3.962516942484197, + "learning_rate": 5.094911924763649e-06, + "loss": 0.1865, "step": 14305 }, { - "epoch": 2.9997903124344725, - "grad_norm": 4.02449458850959, - "learning_rate": 2.5625867206002795e-13, - "loss": 0.1291, + "epoch": 2.020050833097995, + "grad_norm": 3.0687525123911725, + "learning_rate": 5.093583518221347e-06, + "loss": 0.1267, "step": 14306 }, { - "epoch": 3.0, - "grad_norm": 3.351427500888645, - "learning_rate": 0.0, - "loss": 0.1252, + "epoch": 2.020192036147981, + "grad_norm": 2.69741910815603, + "learning_rate": 5.092255225702044e-06, + "loss": 0.1479, "step": 14307 }, + { + "epoch": 2.0203332391979667, + "grad_norm": 2.874969017459384, + "learning_rate": 5.090927047236611e-06, + "loss": 0.1617, + "step": 14308 + }, + { + "epoch": 2.0204744422479526, + "grad_norm": 3.1260285356173965, + "learning_rate": 5.089598982855913e-06, + "loss": 0.162, + "step": 14309 + }, + { + "epoch": 2.0206156452979385, + "grad_norm": 3.4280457272706006, + "learning_rate": 5.0882710325908106e-06, + "loss": 0.1893, + "step": 14310 + }, + { + "epoch": 2.0207568483479244, + "grad_norm": 2.8575679563518412, + "learning_rate": 5.086943196472175e-06, + "loss": 0.1078, + "step": 14311 + }, + { + "epoch": 2.0208980513979102, + "grad_norm": 2.837968988277867, + "learning_rate": 5.085615474530854e-06, + "loss": 0.1511, + "step": 14312 + }, + { + "epoch": 2.021039254447896, + "grad_norm": 2.5132387422991886, + "learning_rate": 5.084287866797708e-06, + "loss": 0.116, + "step": 14313 + }, + { + "epoch": 2.021180457497882, + "grad_norm": 2.997667691888442, + "learning_rate": 5.082960373303588e-06, + "loss": 0.1455, + "step": 14314 + }, + { + "epoch": 2.021321660547868, + "grad_norm": 4.083504383571186, + "learning_rate": 5.081632994079342e-06, + "loss": 0.2181, + "step": 14315 + }, + { + "epoch": 2.0214628635978538, + "grad_norm": 3.160523080242641, + "learning_rate": 5.0803057291558255e-06, + "loss": 0.1664, + "step": 14316 + }, + { + "epoch": 2.0216040666478396, + "grad_norm": 3.151836517720333, + "learning_rate": 5.078978578563878e-06, + "loss": 0.1534, + "step": 14317 + }, + { + "epoch": 2.0217452696978255, + "grad_norm": 2.8664655774830643, + "learning_rate": 5.0776515423343445e-06, + "loss": 0.1481, + "step": 14318 + }, + { + "epoch": 2.0218864727478114, + "grad_norm": 3.377888578270953, + "learning_rate": 5.076324620498066e-06, + "loss": 0.227, + "step": 14319 + }, + { + "epoch": 2.0220276757977973, + "grad_norm": 3.1228143672231394, + "learning_rate": 5.074997813085873e-06, + "loss": 0.1624, + "step": 14320 + }, + { + "epoch": 2.022168878847783, + "grad_norm": 3.2807220001996455, + "learning_rate": 5.073671120128601e-06, + "loss": 0.196, + "step": 14321 + }, + { + "epoch": 2.022310081897769, + "grad_norm": 3.1692646215771787, + "learning_rate": 5.072344541657089e-06, + "loss": 0.155, + "step": 14322 + }, + { + "epoch": 2.022451284947755, + "grad_norm": 3.0084076323219295, + "learning_rate": 5.071018077702161e-06, + "loss": 0.1708, + "step": 14323 + }, + { + "epoch": 2.022592487997741, + "grad_norm": 2.7475542580607564, + "learning_rate": 5.069691728294643e-06, + "loss": 0.175, + "step": 14324 + }, + { + "epoch": 2.0227336910477267, + "grad_norm": 3.391186321616539, + "learning_rate": 5.068365493465361e-06, + "loss": 0.1412, + "step": 14325 + }, + { + "epoch": 2.0228748940977126, + "grad_norm": 3.651232135350546, + "learning_rate": 5.067039373245136e-06, + "loss": 0.2176, + "step": 14326 + }, + { + "epoch": 2.0230160971476985, + "grad_norm": 3.2651476850777144, + "learning_rate": 5.065713367664781e-06, + "loss": 0.1621, + "step": 14327 + }, + { + "epoch": 2.0231573001976844, + "grad_norm": 2.2954667081660163, + "learning_rate": 5.064387476755124e-06, + "loss": 0.1325, + "step": 14328 + }, + { + "epoch": 2.0232985032476702, + "grad_norm": 2.8573364849337124, + "learning_rate": 5.0630617005469676e-06, + "loss": 0.1454, + "step": 14329 + }, + { + "epoch": 2.023439706297656, + "grad_norm": 3.1370163645774825, + "learning_rate": 5.061736039071124e-06, + "loss": 0.162, + "step": 14330 + }, + { + "epoch": 2.023580909347642, + "grad_norm": 2.9252788074065603, + "learning_rate": 5.060410492358402e-06, + "loss": 0.1662, + "step": 14331 + }, + { + "epoch": 2.023722112397628, + "grad_norm": 4.104286827489718, + "learning_rate": 5.059085060439608e-06, + "loss": 0.2688, + "step": 14332 + }, + { + "epoch": 2.0238633154476138, + "grad_norm": 2.68288795901017, + "learning_rate": 5.057759743345538e-06, + "loss": 0.1552, + "step": 14333 + }, + { + "epoch": 2.0240045184975997, + "grad_norm": 2.749144272339022, + "learning_rate": 5.0564345411070025e-06, + "loss": 0.1302, + "step": 14334 + }, + { + "epoch": 2.0241457215475855, + "grad_norm": 2.9055213748664905, + "learning_rate": 5.0551094537547915e-06, + "loss": 0.1231, + "step": 14335 + }, + { + "epoch": 2.0242869245975714, + "grad_norm": 2.6650302774262244, + "learning_rate": 5.053784481319708e-06, + "loss": 0.1583, + "step": 14336 + }, + { + "epoch": 2.0244281276475573, + "grad_norm": 2.8705612937789855, + "learning_rate": 5.052459623832531e-06, + "loss": 0.1494, + "step": 14337 + }, + { + "epoch": 2.024569330697543, + "grad_norm": 2.377704314149694, + "learning_rate": 5.0511348813240515e-06, + "loss": 0.1405, + "step": 14338 + }, + { + "epoch": 2.024710533747529, + "grad_norm": 3.3624783597169388, + "learning_rate": 5.0498102538250645e-06, + "loss": 0.1776, + "step": 14339 + }, + { + "epoch": 2.024851736797515, + "grad_norm": 3.010236408065653, + "learning_rate": 5.048485741366351e-06, + "loss": 0.1426, + "step": 14340 + }, + { + "epoch": 2.024992939847501, + "grad_norm": 3.7898106021779268, + "learning_rate": 5.047161343978688e-06, + "loss": 0.1675, + "step": 14341 + }, + { + "epoch": 2.0251341428974867, + "grad_norm": 3.1728100636799494, + "learning_rate": 5.045837061692857e-06, + "loss": 0.1764, + "step": 14342 + }, + { + "epoch": 2.0252753459474726, + "grad_norm": 2.7611004852732357, + "learning_rate": 5.044512894539635e-06, + "loss": 0.1339, + "step": 14343 + }, + { + "epoch": 2.0254165489974585, + "grad_norm": 3.5459142016380465, + "learning_rate": 5.043188842549789e-06, + "loss": 0.2682, + "step": 14344 + }, + { + "epoch": 2.0255577520474444, + "grad_norm": 3.103278357585595, + "learning_rate": 5.041864905754099e-06, + "loss": 0.1832, + "step": 14345 + }, + { + "epoch": 2.0256989550974303, + "grad_norm": 3.317205383651116, + "learning_rate": 5.040541084183326e-06, + "loss": 0.2119, + "step": 14346 + }, + { + "epoch": 2.025840158147416, + "grad_norm": 2.9364367802322304, + "learning_rate": 5.039217377868235e-06, + "loss": 0.1709, + "step": 14347 + }, + { + "epoch": 2.025981361197402, + "grad_norm": 3.9900746615327156, + "learning_rate": 5.037893786839589e-06, + "loss": 0.2049, + "step": 14348 + }, + { + "epoch": 2.026122564247388, + "grad_norm": 2.913108349160879, + "learning_rate": 5.036570311128151e-06, + "loss": 0.2026, + "step": 14349 + }, + { + "epoch": 2.026263767297374, + "grad_norm": 3.1605111393399814, + "learning_rate": 5.03524695076467e-06, + "loss": 0.1724, + "step": 14350 + }, + { + "epoch": 2.0264049703473597, + "grad_norm": 2.747580571003615, + "learning_rate": 5.033923705779908e-06, + "loss": 0.1561, + "step": 14351 + }, + { + "epoch": 2.0265461733973456, + "grad_norm": 2.858744698890405, + "learning_rate": 5.032600576204616e-06, + "loss": 0.1656, + "step": 14352 + }, + { + "epoch": 2.0266873764473314, + "grad_norm": 3.0464691514061246, + "learning_rate": 5.031277562069545e-06, + "loss": 0.1449, + "step": 14353 + }, + { + "epoch": 2.0268285794973173, + "grad_norm": 2.617906775579497, + "learning_rate": 5.0299546634054345e-06, + "loss": 0.1482, + "step": 14354 + }, + { + "epoch": 2.026969782547303, + "grad_norm": 2.893959955776391, + "learning_rate": 5.0286318802430265e-06, + "loss": 0.1472, + "step": 14355 + }, + { + "epoch": 2.027110985597289, + "grad_norm": 2.9869522340638763, + "learning_rate": 5.0273092126130715e-06, + "loss": 0.1418, + "step": 14356 + }, + { + "epoch": 2.027252188647275, + "grad_norm": 3.7185787171677105, + "learning_rate": 5.0259866605463025e-06, + "loss": 0.148, + "step": 14357 + }, + { + "epoch": 2.027393391697261, + "grad_norm": 3.6964771472901607, + "learning_rate": 5.024664224073454e-06, + "loss": 0.2241, + "step": 14358 + }, + { + "epoch": 2.0275345947472467, + "grad_norm": 2.4112004860583665, + "learning_rate": 5.023341903225263e-06, + "loss": 0.1473, + "step": 14359 + }, + { + "epoch": 2.0276757977972326, + "grad_norm": 2.6447613091692244, + "learning_rate": 5.0220196980324545e-06, + "loss": 0.1622, + "step": 14360 + }, + { + "epoch": 2.0278170008472185, + "grad_norm": 3.3984022848298543, + "learning_rate": 5.02069760852576e-06, + "loss": 0.2059, + "step": 14361 + }, + { + "epoch": 2.0279582038972044, + "grad_norm": 3.3626068429819407, + "learning_rate": 5.0193756347359015e-06, + "loss": 0.1935, + "step": 14362 + }, + { + "epoch": 2.0280994069471903, + "grad_norm": 2.762416962983157, + "learning_rate": 5.0180537766936026e-06, + "loss": 0.1573, + "step": 14363 + }, + { + "epoch": 2.028240609997176, + "grad_norm": 3.3537987722964937, + "learning_rate": 5.016732034429581e-06, + "loss": 0.1332, + "step": 14364 + }, + { + "epoch": 2.028381813047162, + "grad_norm": 2.6345409502602406, + "learning_rate": 5.015410407974557e-06, + "loss": 0.146, + "step": 14365 + }, + { + "epoch": 2.028523016097148, + "grad_norm": 2.957722803216369, + "learning_rate": 5.014088897359242e-06, + "loss": 0.1429, + "step": 14366 + }, + { + "epoch": 2.028664219147134, + "grad_norm": 3.3282396203486995, + "learning_rate": 5.012767502614344e-06, + "loss": 0.1999, + "step": 14367 + }, + { + "epoch": 2.0288054221971192, + "grad_norm": 3.1655430891975636, + "learning_rate": 5.011446223770578e-06, + "loss": 0.1631, + "step": 14368 + }, + { + "epoch": 2.028946625247105, + "grad_norm": 4.190118466290238, + "learning_rate": 5.010125060858648e-06, + "loss": 0.1819, + "step": 14369 + }, + { + "epoch": 2.029087828297091, + "grad_norm": 2.8680342102016025, + "learning_rate": 5.0088040139092605e-06, + "loss": 0.1645, + "step": 14370 + }, + { + "epoch": 2.029229031347077, + "grad_norm": 4.414539800071081, + "learning_rate": 5.007483082953109e-06, + "loss": 0.1902, + "step": 14371 + }, + { + "epoch": 2.0293702343970628, + "grad_norm": 2.877790409963356, + "learning_rate": 5.006162268020891e-06, + "loss": 0.138, + "step": 14372 + }, + { + "epoch": 2.0295114374470486, + "grad_norm": 3.0804430241035554, + "learning_rate": 5.004841569143308e-06, + "loss": 0.155, + "step": 14373 + }, + { + "epoch": 2.0296526404970345, + "grad_norm": 3.489980685524447, + "learning_rate": 5.0035209863510496e-06, + "loss": 0.1621, + "step": 14374 + }, + { + "epoch": 2.0297938435470204, + "grad_norm": 2.8578953863908914, + "learning_rate": 5.002200519674807e-06, + "loss": 0.1307, + "step": 14375 + }, + { + "epoch": 2.0299350465970063, + "grad_norm": 2.891395984033541, + "learning_rate": 5.000880169145264e-06, + "loss": 0.155, + "step": 14376 + }, + { + "epoch": 2.030076249646992, + "grad_norm": 2.8140909444963302, + "learning_rate": 4.9995599347931075e-06, + "loss": 0.1893, + "step": 14377 + }, + { + "epoch": 2.030217452696978, + "grad_norm": 3.47376664667973, + "learning_rate": 4.99823981664902e-06, + "loss": 0.1837, + "step": 14378 + }, + { + "epoch": 2.030358655746964, + "grad_norm": 2.2789404831276023, + "learning_rate": 4.9969198147436785e-06, + "loss": 0.1118, + "step": 14379 + }, + { + "epoch": 2.03049985879695, + "grad_norm": 3.5794591179775925, + "learning_rate": 4.995599929107758e-06, + "loss": 0.1727, + "step": 14380 + }, + { + "epoch": 2.0306410618469357, + "grad_norm": 2.9646859197838196, + "learning_rate": 4.994280159771936e-06, + "loss": 0.1829, + "step": 14381 + }, + { + "epoch": 2.0307822648969216, + "grad_norm": 3.4813244848788467, + "learning_rate": 4.992960506766881e-06, + "loss": 0.1485, + "step": 14382 + }, + { + "epoch": 2.0309234679469075, + "grad_norm": 2.610096709229147, + "learning_rate": 4.99164097012326e-06, + "loss": 0.1793, + "step": 14383 + }, + { + "epoch": 2.0310646709968934, + "grad_norm": 3.0311541027224624, + "learning_rate": 4.990321549871738e-06, + "loss": 0.1574, + "step": 14384 + }, + { + "epoch": 2.0312058740468792, + "grad_norm": 3.102962097892004, + "learning_rate": 4.989002246042982e-06, + "loss": 0.1627, + "step": 14385 + }, + { + "epoch": 2.031347077096865, + "grad_norm": 2.543570252241666, + "learning_rate": 4.987683058667651e-06, + "loss": 0.1529, + "step": 14386 + }, + { + "epoch": 2.031488280146851, + "grad_norm": 2.6555729673879127, + "learning_rate": 4.986363987776403e-06, + "loss": 0.1465, + "step": 14387 + }, + { + "epoch": 2.031629483196837, + "grad_norm": 3.3689759091298104, + "learning_rate": 4.985045033399889e-06, + "loss": 0.1545, + "step": 14388 + }, + { + "epoch": 2.0317706862468228, + "grad_norm": 3.028162363237027, + "learning_rate": 4.983726195568758e-06, + "loss": 0.1785, + "step": 14389 + }, + { + "epoch": 2.0319118892968087, + "grad_norm": 3.025675420290699, + "learning_rate": 4.982407474313667e-06, + "loss": 0.1866, + "step": 14390 + }, + { + "epoch": 2.0320530923467945, + "grad_norm": 3.2872120834588427, + "learning_rate": 4.98108886966526e-06, + "loss": 0.1889, + "step": 14391 + }, + { + "epoch": 2.0321942953967804, + "grad_norm": 3.583786146529471, + "learning_rate": 4.979770381654181e-06, + "loss": 0.2054, + "step": 14392 + }, + { + "epoch": 2.0323354984467663, + "grad_norm": 3.51715389649081, + "learning_rate": 4.978452010311069e-06, + "loss": 0.1846, + "step": 14393 + }, + { + "epoch": 2.032476701496752, + "grad_norm": 3.1202821920642863, + "learning_rate": 4.977133755666564e-06, + "loss": 0.1448, + "step": 14394 + }, + { + "epoch": 2.032617904546738, + "grad_norm": 3.2100223492400097, + "learning_rate": 4.975815617751301e-06, + "loss": 0.137, + "step": 14395 + }, + { + "epoch": 2.032759107596724, + "grad_norm": 3.3271857276497965, + "learning_rate": 4.9744975965959145e-06, + "loss": 0.1927, + "step": 14396 + }, + { + "epoch": 2.03290031064671, + "grad_norm": 3.017136942033875, + "learning_rate": 4.973179692231033e-06, + "loss": 0.1585, + "step": 14397 + }, + { + "epoch": 2.0330415136966957, + "grad_norm": 3.5420431462642243, + "learning_rate": 4.971861904687283e-06, + "loss": 0.1776, + "step": 14398 + }, + { + "epoch": 2.0331827167466816, + "grad_norm": 2.366027750351628, + "learning_rate": 4.9705442339952924e-06, + "loss": 0.119, + "step": 14399 + }, + { + "epoch": 2.0333239197966675, + "grad_norm": 3.016507923140273, + "learning_rate": 4.9692266801856815e-06, + "loss": 0.16, + "step": 14400 + }, + { + "epoch": 2.0334651228466534, + "grad_norm": 2.379249937292288, + "learning_rate": 4.967909243289066e-06, + "loss": 0.118, + "step": 14401 + }, + { + "epoch": 2.0336063258966393, + "grad_norm": 2.765877272180813, + "learning_rate": 4.96659192333607e-06, + "loss": 0.1206, + "step": 14402 + }, + { + "epoch": 2.033747528946625, + "grad_norm": 3.118654429216308, + "learning_rate": 4.965274720357303e-06, + "loss": 0.1666, + "step": 14403 + }, + { + "epoch": 2.033888731996611, + "grad_norm": 3.1721014618923173, + "learning_rate": 4.963957634383384e-06, + "loss": 0.174, + "step": 14404 + }, + { + "epoch": 2.034029935046597, + "grad_norm": 3.396584819963182, + "learning_rate": 4.9626406654449085e-06, + "loss": 0.1862, + "step": 14405 + }, + { + "epoch": 2.034171138096583, + "grad_norm": 3.2489918842077277, + "learning_rate": 4.961323813572485e-06, + "loss": 0.1685, + "step": 14406 + }, + { + "epoch": 2.0343123411465687, + "grad_norm": 3.6677805330764848, + "learning_rate": 4.960007078796725e-06, + "loss": 0.185, + "step": 14407 + }, + { + "epoch": 2.0344535441965546, + "grad_norm": 3.0668642501592838, + "learning_rate": 4.958690461148222e-06, + "loss": 0.1717, + "step": 14408 + }, + { + "epoch": 2.0345947472465404, + "grad_norm": 2.9950345301949715, + "learning_rate": 4.957373960657577e-06, + "loss": 0.145, + "step": 14409 + }, + { + "epoch": 2.0347359502965263, + "grad_norm": 3.021244840080818, + "learning_rate": 4.9560575773553824e-06, + "loss": 0.1659, + "step": 14410 + }, + { + "epoch": 2.034877153346512, + "grad_norm": 3.3261812652438625, + "learning_rate": 4.954741311272232e-06, + "loss": 0.1926, + "step": 14411 + }, + { + "epoch": 2.035018356396498, + "grad_norm": 3.6332731848057582, + "learning_rate": 4.953425162438714e-06, + "loss": 0.2188, + "step": 14412 + }, + { + "epoch": 2.035159559446484, + "grad_norm": 3.463149589561639, + "learning_rate": 4.9521091308854165e-06, + "loss": 0.1672, + "step": 14413 + }, + { + "epoch": 2.03530076249647, + "grad_norm": 3.639546616390054, + "learning_rate": 4.950793216642923e-06, + "loss": 0.2361, + "step": 14414 + }, + { + "epoch": 2.0354419655464557, + "grad_norm": 2.70479450723535, + "learning_rate": 4.949477419741814e-06, + "loss": 0.1642, + "step": 14415 + }, + { + "epoch": 2.0355831685964416, + "grad_norm": 3.19442161463194, + "learning_rate": 4.948161740212669e-06, + "loss": 0.1441, + "step": 14416 + }, + { + "epoch": 2.0357243716464275, + "grad_norm": 3.14411243038963, + "learning_rate": 4.946846178086063e-06, + "loss": 0.167, + "step": 14417 + }, + { + "epoch": 2.0358655746964134, + "grad_norm": 2.9209549904204573, + "learning_rate": 4.945530733392566e-06, + "loss": 0.1607, + "step": 14418 + }, + { + "epoch": 2.0360067777463993, + "grad_norm": 3.25603671123706, + "learning_rate": 4.944215406162756e-06, + "loss": 0.1771, + "step": 14419 + }, + { + "epoch": 2.036147980796385, + "grad_norm": 3.1952927664951236, + "learning_rate": 4.942900196427195e-06, + "loss": 0.1193, + "step": 14420 + }, + { + "epoch": 2.036289183846371, + "grad_norm": 3.3253950030301147, + "learning_rate": 4.941585104216455e-06, + "loss": 0.2051, + "step": 14421 + }, + { + "epoch": 2.036430386896357, + "grad_norm": 3.161273545556273, + "learning_rate": 4.940270129561088e-06, + "loss": 0.1643, + "step": 14422 + }, + { + "epoch": 2.036571589946343, + "grad_norm": 3.0794686033906316, + "learning_rate": 4.938955272491658e-06, + "loss": 0.1818, + "step": 14423 + }, + { + "epoch": 2.0367127929963287, + "grad_norm": 3.343477152971616, + "learning_rate": 4.937640533038718e-06, + "loss": 0.1523, + "step": 14424 + }, + { + "epoch": 2.0368539960463146, + "grad_norm": 2.72045550067017, + "learning_rate": 4.93632591123283e-06, + "loss": 0.1435, + "step": 14425 + }, + { + "epoch": 2.0369951990963004, + "grad_norm": 3.516167575412925, + "learning_rate": 4.9350114071045405e-06, + "loss": 0.1869, + "step": 14426 + }, + { + "epoch": 2.0371364021462863, + "grad_norm": 2.9425003429891023, + "learning_rate": 4.933697020684399e-06, + "loss": 0.1622, + "step": 14427 + }, + { + "epoch": 2.037277605196272, + "grad_norm": 3.1527418176714113, + "learning_rate": 4.932382752002951e-06, + "loss": 0.2026, + "step": 14428 + }, + { + "epoch": 2.037418808246258, + "grad_norm": 3.060786390341475, + "learning_rate": 4.9310686010907384e-06, + "loss": 0.1693, + "step": 14429 + }, + { + "epoch": 2.037560011296244, + "grad_norm": 2.812296176985327, + "learning_rate": 4.929754567978303e-06, + "loss": 0.1916, + "step": 14430 + }, + { + "epoch": 2.03770121434623, + "grad_norm": 2.798882413281499, + "learning_rate": 4.928440652696181e-06, + "loss": 0.1369, + "step": 14431 + }, + { + "epoch": 2.0378424173962157, + "grad_norm": 2.7242914221321985, + "learning_rate": 4.92712685527491e-06, + "loss": 0.1487, + "step": 14432 + }, + { + "epoch": 2.0379836204462016, + "grad_norm": 2.5991060870490523, + "learning_rate": 4.9258131757450175e-06, + "loss": 0.1513, + "step": 14433 + }, + { + "epoch": 2.0381248234961875, + "grad_norm": 3.6661838953118018, + "learning_rate": 4.924499614137037e-06, + "loss": 0.2036, + "step": 14434 + }, + { + "epoch": 2.0382660265461734, + "grad_norm": 3.2058879551837363, + "learning_rate": 4.9231861704814935e-06, + "loss": 0.1462, + "step": 14435 + }, + { + "epoch": 2.0384072295961593, + "grad_norm": 3.213894949275474, + "learning_rate": 4.921872844808906e-06, + "loss": 0.1514, + "step": 14436 + }, + { + "epoch": 2.038548432646145, + "grad_norm": 2.9134086050215724, + "learning_rate": 4.920559637149805e-06, + "loss": 0.1506, + "step": 14437 + }, + { + "epoch": 2.038689635696131, + "grad_norm": 2.74923612539152, + "learning_rate": 4.919246547534709e-06, + "loss": 0.1517, + "step": 14438 + }, + { + "epoch": 2.038830838746117, + "grad_norm": 2.682071353550135, + "learning_rate": 4.917933575994124e-06, + "loss": 0.1278, + "step": 14439 + }, + { + "epoch": 2.038972041796103, + "grad_norm": 3.2705496410561716, + "learning_rate": 4.916620722558568e-06, + "loss": 0.1566, + "step": 14440 + }, + { + "epoch": 2.0391132448460887, + "grad_norm": 2.8690052286964085, + "learning_rate": 4.915307987258547e-06, + "loss": 0.1389, + "step": 14441 + }, + { + "epoch": 2.0392544478960746, + "grad_norm": 3.308230467778446, + "learning_rate": 4.913995370124578e-06, + "loss": 0.133, + "step": 14442 + }, + { + "epoch": 2.0393956509460605, + "grad_norm": 3.066123818748472, + "learning_rate": 4.9126828711871585e-06, + "loss": 0.1833, + "step": 14443 + }, + { + "epoch": 2.0395368539960463, + "grad_norm": 2.4789147917615573, + "learning_rate": 4.911370490476792e-06, + "loss": 0.1342, + "step": 14444 + }, + { + "epoch": 2.0396780570460322, + "grad_norm": 3.9985157494152013, + "learning_rate": 4.91005822802398e-06, + "loss": 0.1754, + "step": 14445 + }, + { + "epoch": 2.039819260096018, + "grad_norm": 4.196855961328, + "learning_rate": 4.908746083859214e-06, + "loss": 0.1951, + "step": 14446 + }, + { + "epoch": 2.039960463146004, + "grad_norm": 3.315833152185437, + "learning_rate": 4.907434058012991e-06, + "loss": 0.1571, + "step": 14447 + }, + { + "epoch": 2.04010166619599, + "grad_norm": 3.797592149307698, + "learning_rate": 4.906122150515801e-06, + "loss": 0.1729, + "step": 14448 + }, + { + "epoch": 2.0402428692459758, + "grad_norm": 2.6630312289297073, + "learning_rate": 4.904810361398132e-06, + "loss": 0.1008, + "step": 14449 + }, + { + "epoch": 2.0403840722959616, + "grad_norm": 4.827326875472237, + "learning_rate": 4.9034986906904715e-06, + "loss": 0.2579, + "step": 14450 + }, + { + "epoch": 2.0405252753459475, + "grad_norm": 3.1732992206046378, + "learning_rate": 4.902187138423299e-06, + "loss": 0.1344, + "step": 14451 + }, + { + "epoch": 2.0406664783959334, + "grad_norm": 3.564656717310189, + "learning_rate": 4.900875704627096e-06, + "loss": 0.2047, + "step": 14452 + }, + { + "epoch": 2.0408076814459193, + "grad_norm": 3.958722176129888, + "learning_rate": 4.899564389332337e-06, + "loss": 0.1985, + "step": 14453 + }, + { + "epoch": 2.040948884495905, + "grad_norm": 2.967947643032591, + "learning_rate": 4.898253192569501e-06, + "loss": 0.1525, + "step": 14454 + }, + { + "epoch": 2.041090087545891, + "grad_norm": 3.8814231634163012, + "learning_rate": 4.896942114369061e-06, + "loss": 0.1622, + "step": 14455 + }, + { + "epoch": 2.041231290595877, + "grad_norm": 4.243009724057618, + "learning_rate": 4.8956311547614796e-06, + "loss": 0.2231, + "step": 14456 + }, + { + "epoch": 2.041372493645863, + "grad_norm": 3.973926363340206, + "learning_rate": 4.894320313777226e-06, + "loss": 0.1891, + "step": 14457 + }, + { + "epoch": 2.0415136966958487, + "grad_norm": 2.69751464755363, + "learning_rate": 4.893009591446759e-06, + "loss": 0.1778, + "step": 14458 + }, + { + "epoch": 2.0416548997458346, + "grad_norm": 3.4475903068847424, + "learning_rate": 4.891698987800547e-06, + "loss": 0.1709, + "step": 14459 + }, + { + "epoch": 2.0417961027958205, + "grad_norm": 2.7951824698686045, + "learning_rate": 4.8903885028690454e-06, + "loss": 0.1417, + "step": 14460 + }, + { + "epoch": 2.0419373058458063, + "grad_norm": 3.257452885225593, + "learning_rate": 4.8890781366827075e-06, + "loss": 0.1303, + "step": 14461 + }, + { + "epoch": 2.0420785088957922, + "grad_norm": 2.8495338617238306, + "learning_rate": 4.887767889271987e-06, + "loss": 0.1724, + "step": 14462 + }, + { + "epoch": 2.042219711945778, + "grad_norm": 2.3983026096438875, + "learning_rate": 4.886457760667332e-06, + "loss": 0.1098, + "step": 14463 + }, + { + "epoch": 2.042360914995764, + "grad_norm": 3.2747010421517584, + "learning_rate": 4.885147750899192e-06, + "loss": 0.1641, + "step": 14464 + }, + { + "epoch": 2.04250211804575, + "grad_norm": 2.7885144302966127, + "learning_rate": 4.883837859998009e-06, + "loss": 0.1583, + "step": 14465 + }, + { + "epoch": 2.0426433210957358, + "grad_norm": 2.748064948528609, + "learning_rate": 4.882528087994223e-06, + "loss": 0.1552, + "step": 14466 + }, + { + "epoch": 2.0427845241457216, + "grad_norm": 2.8641929437321525, + "learning_rate": 4.881218434918276e-06, + "loss": 0.1634, + "step": 14467 + }, + { + "epoch": 2.0429257271957075, + "grad_norm": 3.018267799468174, + "learning_rate": 4.8799089008006005e-06, + "loss": 0.1519, + "step": 14468 + }, + { + "epoch": 2.0430669302456934, + "grad_norm": 3.4160592729698553, + "learning_rate": 4.878599485671631e-06, + "loss": 0.1962, + "step": 14469 + }, + { + "epoch": 2.0432081332956793, + "grad_norm": 3.789761582749137, + "learning_rate": 4.877290189561795e-06, + "loss": 0.2248, + "step": 14470 + }, + { + "epoch": 2.043349336345665, + "grad_norm": 3.2795590658121605, + "learning_rate": 4.875981012501526e-06, + "loss": 0.1741, + "step": 14471 + }, + { + "epoch": 2.043490539395651, + "grad_norm": 3.0509738309865746, + "learning_rate": 4.874671954521249e-06, + "loss": 0.1545, + "step": 14472 + }, + { + "epoch": 2.043631742445637, + "grad_norm": 2.4125005815503746, + "learning_rate": 4.873363015651379e-06, + "loss": 0.1453, + "step": 14473 + }, + { + "epoch": 2.043772945495623, + "grad_norm": 3.223822307896231, + "learning_rate": 4.872054195922338e-06, + "loss": 0.1679, + "step": 14474 + }, + { + "epoch": 2.0439141485456087, + "grad_norm": 3.119359485457371, + "learning_rate": 4.870745495364539e-06, + "loss": 0.171, + "step": 14475 + }, + { + "epoch": 2.0440553515955946, + "grad_norm": 2.9642883978824073, + "learning_rate": 4.869436914008403e-06, + "loss": 0.1511, + "step": 14476 + }, + { + "epoch": 2.0441965546455805, + "grad_norm": 3.8382683873887493, + "learning_rate": 4.868128451884339e-06, + "loss": 0.2111, + "step": 14477 + }, + { + "epoch": 2.0443377576955664, + "grad_norm": 3.701817816728572, + "learning_rate": 4.866820109022752e-06, + "loss": 0.212, + "step": 14478 + }, + { + "epoch": 2.0444789607455522, + "grad_norm": 3.2401535968562927, + "learning_rate": 4.865511885454049e-06, + "loss": 0.1752, + "step": 14479 + }, + { + "epoch": 2.044620163795538, + "grad_norm": 2.8456347826651616, + "learning_rate": 4.864203781208632e-06, + "loss": 0.1486, + "step": 14480 + }, + { + "epoch": 2.044761366845524, + "grad_norm": 2.964767280176492, + "learning_rate": 4.862895796316902e-06, + "loss": 0.1455, + "step": 14481 + }, + { + "epoch": 2.04490256989551, + "grad_norm": 3.1783440330617503, + "learning_rate": 4.861587930809255e-06, + "loss": 0.1607, + "step": 14482 + }, + { + "epoch": 2.0450437729454958, + "grad_norm": 2.805464062517001, + "learning_rate": 4.860280184716085e-06, + "loss": 0.1538, + "step": 14483 + }, + { + "epoch": 2.0451849759954817, + "grad_norm": 3.0363029439724802, + "learning_rate": 4.858972558067784e-06, + "loss": 0.1252, + "step": 14484 + }, + { + "epoch": 2.0453261790454675, + "grad_norm": 2.7912879830353474, + "learning_rate": 4.8576650508947405e-06, + "loss": 0.1633, + "step": 14485 + }, + { + "epoch": 2.0454673820954534, + "grad_norm": 4.65681582840178, + "learning_rate": 4.856357663227341e-06, + "loss": 0.2214, + "step": 14486 + }, + { + "epoch": 2.0456085851454393, + "grad_norm": 2.967665760933173, + "learning_rate": 4.855050395095964e-06, + "loss": 0.1651, + "step": 14487 + }, + { + "epoch": 2.045749788195425, + "grad_norm": 3.2960868267511456, + "learning_rate": 4.8537432465309975e-06, + "loss": 0.1425, + "step": 14488 + }, + { + "epoch": 2.045890991245411, + "grad_norm": 4.540432646575895, + "learning_rate": 4.852436217562819e-06, + "loss": 0.164, + "step": 14489 + }, + { + "epoch": 2.046032194295397, + "grad_norm": 2.872218498108694, + "learning_rate": 4.851129308221796e-06, + "loss": 0.1696, + "step": 14490 + }, + { + "epoch": 2.046173397345383, + "grad_norm": 2.601509231900234, + "learning_rate": 4.849822518538304e-06, + "loss": 0.1349, + "step": 14491 + }, + { + "epoch": 2.0463146003953687, + "grad_norm": 3.5493912120676474, + "learning_rate": 4.848515848542709e-06, + "loss": 0.1864, + "step": 14492 + }, + { + "epoch": 2.0464558034453546, + "grad_norm": 3.160794284832748, + "learning_rate": 4.847209298265385e-06, + "loss": 0.173, + "step": 14493 + }, + { + "epoch": 2.0465970064953405, + "grad_norm": 2.5793314112141807, + "learning_rate": 4.845902867736692e-06, + "loss": 0.1437, + "step": 14494 + }, + { + "epoch": 2.0467382095453264, + "grad_norm": 3.0260085828590926, + "learning_rate": 4.844596556986989e-06, + "loss": 0.1919, + "step": 14495 + }, + { + "epoch": 2.0468794125953123, + "grad_norm": 3.0410348182592455, + "learning_rate": 4.843290366046637e-06, + "loss": 0.1545, + "step": 14496 + }, + { + "epoch": 2.047020615645298, + "grad_norm": 3.5868306716188534, + "learning_rate": 4.84198429494599e-06, + "loss": 0.1707, + "step": 14497 + }, + { + "epoch": 2.047161818695284, + "grad_norm": 3.3010423588028885, + "learning_rate": 4.840678343715399e-06, + "loss": 0.2064, + "step": 14498 + }, + { + "epoch": 2.04730302174527, + "grad_norm": 3.1544093565275917, + "learning_rate": 4.839372512385215e-06, + "loss": 0.1461, + "step": 14499 + }, + { + "epoch": 2.047444224795256, + "grad_norm": 3.275244596971666, + "learning_rate": 4.838066800985786e-06, + "loss": 0.1738, + "step": 14500 + }, + { + "epoch": 2.0475854278452417, + "grad_norm": 2.7475601438991446, + "learning_rate": 4.836761209547456e-06, + "loss": 0.146, + "step": 14501 + }, + { + "epoch": 2.0477266308952276, + "grad_norm": 3.5534864873617855, + "learning_rate": 4.8354557381005655e-06, + "loss": 0.1868, + "step": 14502 + }, + { + "epoch": 2.0478678339452134, + "grad_norm": 2.771343403983787, + "learning_rate": 4.8341503866754525e-06, + "loss": 0.1519, + "step": 14503 + }, + { + "epoch": 2.0480090369951993, + "grad_norm": 3.3522315043418356, + "learning_rate": 4.832845155302449e-06, + "loss": 0.1824, + "step": 14504 + }, + { + "epoch": 2.048150240045185, + "grad_norm": 3.1009461985662314, + "learning_rate": 4.831540044011898e-06, + "loss": 0.1614, + "step": 14505 + }, + { + "epoch": 2.0482914430951706, + "grad_norm": 3.2946245665499587, + "learning_rate": 4.830235052834127e-06, + "loss": 0.1297, + "step": 14506 + }, + { + "epoch": 2.0484326461451565, + "grad_norm": 2.934026589059701, + "learning_rate": 4.828930181799457e-06, + "loss": 0.1727, + "step": 14507 + }, + { + "epoch": 2.0485738491951424, + "grad_norm": 2.6742657975111204, + "learning_rate": 4.827625430938216e-06, + "loss": 0.1401, + "step": 14508 + }, + { + "epoch": 2.0487150522451283, + "grad_norm": 2.8155205381034096, + "learning_rate": 4.826320800280724e-06, + "loss": 0.1496, + "step": 14509 + }, + { + "epoch": 2.048856255295114, + "grad_norm": 2.585024664146013, + "learning_rate": 4.8250162898573046e-06, + "loss": 0.1197, + "step": 14510 + }, + { + "epoch": 2.0489974583451, + "grad_norm": 2.4630113574813115, + "learning_rate": 4.823711899698272e-06, + "loss": 0.1, + "step": 14511 + }, + { + "epoch": 2.049138661395086, + "grad_norm": 3.145132487311336, + "learning_rate": 4.822407629833941e-06, + "loss": 0.1465, + "step": 14512 + }, + { + "epoch": 2.049279864445072, + "grad_norm": 2.6480232836693816, + "learning_rate": 4.821103480294619e-06, + "loss": 0.1401, + "step": 14513 + }, + { + "epoch": 2.0494210674950577, + "grad_norm": 2.8567173217373343, + "learning_rate": 4.81979945111062e-06, + "loss": 0.1594, + "step": 14514 + }, + { + "epoch": 2.0495622705450436, + "grad_norm": 3.247257542962107, + "learning_rate": 4.818495542312236e-06, + "loss": 0.1687, + "step": 14515 + }, + { + "epoch": 2.0497034735950295, + "grad_norm": 2.8576051767621986, + "learning_rate": 4.8171917539297816e-06, + "loss": 0.1157, + "step": 14516 + }, + { + "epoch": 2.0498446766450154, + "grad_norm": 3.298349014100732, + "learning_rate": 4.815888085993554e-06, + "loss": 0.1646, + "step": 14517 + }, + { + "epoch": 2.0499858796950012, + "grad_norm": 2.741543886876584, + "learning_rate": 4.814584538533848e-06, + "loss": 0.1472, + "step": 14518 + }, + { + "epoch": 2.050127082744987, + "grad_norm": 2.6667480833985717, + "learning_rate": 4.813281111580958e-06, + "loss": 0.125, + "step": 14519 + }, + { + "epoch": 2.050268285794973, + "grad_norm": 3.504940685230831, + "learning_rate": 4.811977805165174e-06, + "loss": 0.1372, + "step": 14520 + }, + { + "epoch": 2.050409488844959, + "grad_norm": 3.116437641274064, + "learning_rate": 4.810674619316782e-06, + "loss": 0.1739, + "step": 14521 + }, + { + "epoch": 2.0505506918949448, + "grad_norm": 3.2684391051589765, + "learning_rate": 4.8093715540660745e-06, + "loss": 0.1893, + "step": 14522 + }, + { + "epoch": 2.0506918949449306, + "grad_norm": 4.225769454584112, + "learning_rate": 4.808068609443333e-06, + "loss": 0.1916, + "step": 14523 + }, + { + "epoch": 2.0508330979949165, + "grad_norm": 2.852047397862396, + "learning_rate": 4.806765785478833e-06, + "loss": 0.174, + "step": 14524 + }, + { + "epoch": 2.0509743010449024, + "grad_norm": 2.9341874008657234, + "learning_rate": 4.805463082202852e-06, + "loss": 0.1295, + "step": 14525 + }, + { + "epoch": 2.0511155040948883, + "grad_norm": 3.164927506202152, + "learning_rate": 4.804160499645667e-06, + "loss": 0.1738, + "step": 14526 + }, + { + "epoch": 2.051256707144874, + "grad_norm": 3.118051857326728, + "learning_rate": 4.802858037837543e-06, + "loss": 0.1594, + "step": 14527 + }, + { + "epoch": 2.05139791019486, + "grad_norm": 4.073432053418342, + "learning_rate": 4.801555696808758e-06, + "loss": 0.1915, + "step": 14528 + }, + { + "epoch": 2.051539113244846, + "grad_norm": 2.938396498779365, + "learning_rate": 4.8002534765895746e-06, + "loss": 0.1665, + "step": 14529 + }, + { + "epoch": 2.051680316294832, + "grad_norm": 3.4783238643001253, + "learning_rate": 4.798951377210253e-06, + "loss": 0.1845, + "step": 14530 + }, + { + "epoch": 2.0518215193448177, + "grad_norm": 3.1278819423532314, + "learning_rate": 4.797649398701061e-06, + "loss": 0.1573, + "step": 14531 + }, + { + "epoch": 2.0519627223948036, + "grad_norm": 3.306840448543757, + "learning_rate": 4.796347541092241e-06, + "loss": 0.1496, + "step": 14532 + }, + { + "epoch": 2.0521039254447895, + "grad_norm": 3.49600215583203, + "learning_rate": 4.7950458044140614e-06, + "loss": 0.2002, + "step": 14533 + }, + { + "epoch": 2.0522451284947754, + "grad_norm": 3.246943747904683, + "learning_rate": 4.7937441886967694e-06, + "loss": 0.1718, + "step": 14534 + }, + { + "epoch": 2.0523863315447612, + "grad_norm": 2.9637311309017726, + "learning_rate": 4.792442693970614e-06, + "loss": 0.166, + "step": 14535 + }, + { + "epoch": 2.052527534594747, + "grad_norm": 2.8466348179332894, + "learning_rate": 4.791141320265842e-06, + "loss": 0.1812, + "step": 14536 + }, + { + "epoch": 2.052668737644733, + "grad_norm": 3.42864011915757, + "learning_rate": 4.7898400676126946e-06, + "loss": 0.1478, + "step": 14537 + }, + { + "epoch": 2.052809940694719, + "grad_norm": 3.4276552424087066, + "learning_rate": 4.788538936041415e-06, + "loss": 0.1853, + "step": 14538 + }, + { + "epoch": 2.0529511437447048, + "grad_norm": 3.730478099923996, + "learning_rate": 4.787237925582237e-06, + "loss": 0.1835, + "step": 14539 + }, + { + "epoch": 2.0530923467946907, + "grad_norm": 2.761514372505962, + "learning_rate": 4.7859370362654045e-06, + "loss": 0.1442, + "step": 14540 + }, + { + "epoch": 2.0532335498446765, + "grad_norm": 2.884582167454878, + "learning_rate": 4.78463626812114e-06, + "loss": 0.1436, + "step": 14541 + }, + { + "epoch": 2.0533747528946624, + "grad_norm": 2.6521006983908393, + "learning_rate": 4.783335621179675e-06, + "loss": 0.1707, + "step": 14542 + }, + { + "epoch": 2.0535159559446483, + "grad_norm": 3.532411439116292, + "learning_rate": 4.78203509547124e-06, + "loss": 0.185, + "step": 14543 + }, + { + "epoch": 2.053657158994634, + "grad_norm": 2.7642484762186372, + "learning_rate": 4.780734691026051e-06, + "loss": 0.1684, + "step": 14544 + }, + { + "epoch": 2.05379836204462, + "grad_norm": 3.0002742334175267, + "learning_rate": 4.779434407874337e-06, + "loss": 0.1719, + "step": 14545 + }, + { + "epoch": 2.053939565094606, + "grad_norm": 3.0852635829896045, + "learning_rate": 4.778134246046313e-06, + "loss": 0.1995, + "step": 14546 + }, + { + "epoch": 2.054080768144592, + "grad_norm": 3.9868922650329814, + "learning_rate": 4.776834205572194e-06, + "loss": 0.2383, + "step": 14547 + }, + { + "epoch": 2.0542219711945777, + "grad_norm": 2.623615788264525, + "learning_rate": 4.775534286482197e-06, + "loss": 0.1417, + "step": 14548 + }, + { + "epoch": 2.0543631742445636, + "grad_norm": 2.8091972795339957, + "learning_rate": 4.774234488806517e-06, + "loss": 0.1326, + "step": 14549 + }, + { + "epoch": 2.0545043772945495, + "grad_norm": 3.1106108909666785, + "learning_rate": 4.772934812575377e-06, + "loss": 0.1648, + "step": 14550 + }, + { + "epoch": 2.0546455803445354, + "grad_norm": 2.841043330077408, + "learning_rate": 4.771635257818973e-06, + "loss": 0.1492, + "step": 14551 + }, + { + "epoch": 2.0547867833945213, + "grad_norm": 3.236467731611961, + "learning_rate": 4.770335824567508e-06, + "loss": 0.1895, + "step": 14552 + }, + { + "epoch": 2.054927986444507, + "grad_norm": 3.068723290317985, + "learning_rate": 4.769036512851181e-06, + "loss": 0.1956, + "step": 14553 + }, + { + "epoch": 2.055069189494493, + "grad_norm": 3.2571445380593245, + "learning_rate": 4.767737322700185e-06, + "loss": 0.1538, + "step": 14554 + }, + { + "epoch": 2.055210392544479, + "grad_norm": 4.353635912000762, + "learning_rate": 4.766438254144714e-06, + "loss": 0.2065, + "step": 14555 + }, + { + "epoch": 2.055351595594465, + "grad_norm": 3.4657953081230684, + "learning_rate": 4.765139307214956e-06, + "loss": 0.2037, + "step": 14556 + }, + { + "epoch": 2.0554927986444507, + "grad_norm": 3.5216367986386143, + "learning_rate": 4.763840481941104e-06, + "loss": 0.1886, + "step": 14557 + }, + { + "epoch": 2.0556340016944366, + "grad_norm": 3.995323943844227, + "learning_rate": 4.762541778353337e-06, + "loss": 0.17, + "step": 14558 + }, + { + "epoch": 2.0557752047444224, + "grad_norm": 3.195036420341871, + "learning_rate": 4.761243196481835e-06, + "loss": 0.161, + "step": 14559 + }, + { + "epoch": 2.0559164077944083, + "grad_norm": 2.6690543835378078, + "learning_rate": 4.75994473635678e-06, + "loss": 0.1273, + "step": 14560 + }, + { + "epoch": 2.056057610844394, + "grad_norm": 3.521330546201101, + "learning_rate": 4.758646398008342e-06, + "loss": 0.1568, + "step": 14561 + }, + { + "epoch": 2.05619881389438, + "grad_norm": 3.109355959077045, + "learning_rate": 4.757348181466702e-06, + "loss": 0.1404, + "step": 14562 + }, + { + "epoch": 2.056340016944366, + "grad_norm": 2.8202268118063225, + "learning_rate": 4.756050086762028e-06, + "loss": 0.1748, + "step": 14563 + }, + { + "epoch": 2.056481219994352, + "grad_norm": 2.8964411338576728, + "learning_rate": 4.754752113924482e-06, + "loss": 0.1451, + "step": 14564 + }, + { + "epoch": 2.0566224230443377, + "grad_norm": 2.653786648258179, + "learning_rate": 4.753454262984238e-06, + "loss": 0.1768, + "step": 14565 + }, + { + "epoch": 2.0567636260943236, + "grad_norm": 2.8933153106071865, + "learning_rate": 4.7521565339714415e-06, + "loss": 0.1562, + "step": 14566 + }, + { + "epoch": 2.0569048291443095, + "grad_norm": 3.6819364818263796, + "learning_rate": 4.750858926916266e-06, + "loss": 0.2102, + "step": 14567 + }, + { + "epoch": 2.0570460321942954, + "grad_norm": 2.5768264984096674, + "learning_rate": 4.749561441848862e-06, + "loss": 0.117, + "step": 14568 + }, + { + "epoch": 2.0571872352442813, + "grad_norm": 2.7749613516535443, + "learning_rate": 4.748264078799382e-06, + "loss": 0.1705, + "step": 14569 + }, + { + "epoch": 2.057328438294267, + "grad_norm": 2.5641453387634665, + "learning_rate": 4.746966837797977e-06, + "loss": 0.149, + "step": 14570 + }, + { + "epoch": 2.057469641344253, + "grad_norm": 3.2472698068938337, + "learning_rate": 4.745669718874795e-06, + "loss": 0.1765, + "step": 14571 + }, + { + "epoch": 2.057610844394239, + "grad_norm": 3.2171274648342894, + "learning_rate": 4.744372722059978e-06, + "loss": 0.1535, + "step": 14572 + }, + { + "epoch": 2.057752047444225, + "grad_norm": 2.932980407575749, + "learning_rate": 4.74307584738367e-06, + "loss": 0.1201, + "step": 14573 + }, + { + "epoch": 2.0578932504942107, + "grad_norm": 3.351798804594412, + "learning_rate": 4.741779094876009e-06, + "loss": 0.1806, + "step": 14574 + }, + { + "epoch": 2.0580344535441966, + "grad_norm": 2.9507311558184175, + "learning_rate": 4.7404824645671314e-06, + "loss": 0.1751, + "step": 14575 + }, + { + "epoch": 2.0581756565941824, + "grad_norm": 2.9260992971226565, + "learning_rate": 4.739185956487169e-06, + "loss": 0.1433, + "step": 14576 + }, + { + "epoch": 2.0583168596441683, + "grad_norm": 3.3584715023393747, + "learning_rate": 4.737889570666253e-06, + "loss": 0.19, + "step": 14577 + }, + { + "epoch": 2.058458062694154, + "grad_norm": 2.479939649544663, + "learning_rate": 4.736593307134508e-06, + "loss": 0.1159, + "step": 14578 + }, + { + "epoch": 2.05859926574414, + "grad_norm": 3.7224454554767035, + "learning_rate": 4.735297165922065e-06, + "loss": 0.1665, + "step": 14579 + }, + { + "epoch": 2.058740468794126, + "grad_norm": 3.458701228776448, + "learning_rate": 4.7340011470590415e-06, + "loss": 0.1998, + "step": 14580 + }, + { + "epoch": 2.058881671844112, + "grad_norm": 3.162857260665056, + "learning_rate": 4.732705250575558e-06, + "loss": 0.1349, + "step": 14581 + }, + { + "epoch": 2.0590228748940977, + "grad_norm": 2.8403605883911576, + "learning_rate": 4.7314094765017325e-06, + "loss": 0.1525, + "step": 14582 + }, + { + "epoch": 2.0591640779440836, + "grad_norm": 3.1992001135000527, + "learning_rate": 4.730113824867668e-06, + "loss": 0.2035, + "step": 14583 + }, + { + "epoch": 2.0593052809940695, + "grad_norm": 2.648282229832616, + "learning_rate": 4.728818295703487e-06, + "loss": 0.1593, + "step": 14584 + }, + { + "epoch": 2.0594464840440554, + "grad_norm": 3.007968036461489, + "learning_rate": 4.727522889039292e-06, + "loss": 0.1619, + "step": 14585 + }, + { + "epoch": 2.0595876870940413, + "grad_norm": 3.8038497459193437, + "learning_rate": 4.726227604905188e-06, + "loss": 0.2118, + "step": 14586 + }, + { + "epoch": 2.059728890144027, + "grad_norm": 3.6179408844566976, + "learning_rate": 4.7249324433312775e-06, + "loss": 0.1816, + "step": 14587 + }, + { + "epoch": 2.059870093194013, + "grad_norm": 2.7689915142598234, + "learning_rate": 4.723637404347658e-06, + "loss": 0.1549, + "step": 14588 + }, + { + "epoch": 2.060011296243999, + "grad_norm": 2.789638715487804, + "learning_rate": 4.722342487984426e-06, + "loss": 0.1301, + "step": 14589 + }, + { + "epoch": 2.060152499293985, + "grad_norm": 2.847566461959244, + "learning_rate": 4.721047694271676e-06, + "loss": 0.1681, + "step": 14590 + }, + { + "epoch": 2.0602937023439707, + "grad_norm": 3.323595266061516, + "learning_rate": 4.719753023239498e-06, + "loss": 0.1665, + "step": 14591 + }, + { + "epoch": 2.0604349053939566, + "grad_norm": 3.192750341669437, + "learning_rate": 4.718458474917979e-06, + "loss": 0.1596, + "step": 14592 + }, + { + "epoch": 2.0605761084439425, + "grad_norm": 3.6462342323683457, + "learning_rate": 4.717164049337205e-06, + "loss": 0.176, + "step": 14593 + }, + { + "epoch": 2.0607173114939283, + "grad_norm": 3.878666353507677, + "learning_rate": 4.715869746527256e-06, + "loss": 0.1904, + "step": 14594 + }, + { + "epoch": 2.060858514543914, + "grad_norm": 2.9470708923734557, + "learning_rate": 4.714575566518209e-06, + "loss": 0.1528, + "step": 14595 + }, + { + "epoch": 2.0609997175939, + "grad_norm": 3.9209314096008607, + "learning_rate": 4.713281509340146e-06, + "loss": 0.1744, + "step": 14596 + }, + { + "epoch": 2.061140920643886, + "grad_norm": 3.2482012332949233, + "learning_rate": 4.7119875750231395e-06, + "loss": 0.1495, + "step": 14597 + }, + { + "epoch": 2.061282123693872, + "grad_norm": 2.5126908947578164, + "learning_rate": 4.7106937635972565e-06, + "loss": 0.1051, + "step": 14598 + }, + { + "epoch": 2.0614233267438578, + "grad_norm": 2.8896517390888667, + "learning_rate": 4.70940007509257e-06, + "loss": 0.1846, + "step": 14599 + }, + { + "epoch": 2.0615645297938436, + "grad_norm": 3.5348870815979083, + "learning_rate": 4.708106509539134e-06, + "loss": 0.1612, + "step": 14600 + }, + { + "epoch": 2.0617057328438295, + "grad_norm": 2.874917253744368, + "learning_rate": 4.706813066967021e-06, + "loss": 0.1626, + "step": 14601 + }, + { + "epoch": 2.0618469358938154, + "grad_norm": 3.118695010241647, + "learning_rate": 4.705519747406285e-06, + "loss": 0.1761, + "step": 14602 + }, + { + "epoch": 2.0619881389438013, + "grad_norm": 3.037491061569983, + "learning_rate": 4.7042265508869855e-06, + "loss": 0.1709, + "step": 14603 + }, + { + "epoch": 2.062129341993787, + "grad_norm": 2.893084895351072, + "learning_rate": 4.702933477439172e-06, + "loss": 0.1371, + "step": 14604 + }, + { + "epoch": 2.062270545043773, + "grad_norm": 3.5008183748342594, + "learning_rate": 4.7016405270928985e-06, + "loss": 0.1892, + "step": 14605 + }, + { + "epoch": 2.062411748093759, + "grad_norm": 3.9740044113957316, + "learning_rate": 4.700347699878211e-06, + "loss": 0.2049, + "step": 14606 + }, + { + "epoch": 2.062552951143745, + "grad_norm": 3.059640078716887, + "learning_rate": 4.699054995825153e-06, + "loss": 0.1605, + "step": 14607 + }, + { + "epoch": 2.0626941541937307, + "grad_norm": 3.0647110018905788, + "learning_rate": 4.697762414963768e-06, + "loss": 0.168, + "step": 14608 + }, + { + "epoch": 2.0628353572437166, + "grad_norm": 2.928518624130187, + "learning_rate": 4.696469957324094e-06, + "loss": 0.1475, + "step": 14609 + }, + { + "epoch": 2.0629765602937025, + "grad_norm": 2.552423475697362, + "learning_rate": 4.695177622936169e-06, + "loss": 0.1329, + "step": 14610 + }, + { + "epoch": 2.0631177633436883, + "grad_norm": 3.650369928103259, + "learning_rate": 4.693885411830025e-06, + "loss": 0.1764, + "step": 14611 + }, + { + "epoch": 2.0632589663936742, + "grad_norm": 3.10239989841359, + "learning_rate": 4.692593324035688e-06, + "loss": 0.1552, + "step": 14612 + }, + { + "epoch": 2.06340016944366, + "grad_norm": 2.8650250485629507, + "learning_rate": 4.691301359583195e-06, + "loss": 0.1596, + "step": 14613 + }, + { + "epoch": 2.063541372493646, + "grad_norm": 2.8342796954252014, + "learning_rate": 4.690009518502564e-06, + "loss": 0.1558, + "step": 14614 + }, + { + "epoch": 2.063682575543632, + "grad_norm": 3.1280005916282105, + "learning_rate": 4.68871780082382e-06, + "loss": 0.1922, + "step": 14615 + }, + { + "epoch": 2.0638237785936178, + "grad_norm": 2.9336291447364826, + "learning_rate": 4.687426206576983e-06, + "loss": 0.1612, + "step": 14616 + }, + { + "epoch": 2.0639649816436036, + "grad_norm": 3.132439844782433, + "learning_rate": 4.686134735792064e-06, + "loss": 0.1469, + "step": 14617 + }, + { + "epoch": 2.0641061846935895, + "grad_norm": 3.0325203779537317, + "learning_rate": 4.684843388499075e-06, + "loss": 0.1675, + "step": 14618 + }, + { + "epoch": 2.0642473877435754, + "grad_norm": 2.53948808917075, + "learning_rate": 4.683552164728033e-06, + "loss": 0.1442, + "step": 14619 + }, + { + "epoch": 2.0643885907935613, + "grad_norm": 2.512427411162805, + "learning_rate": 4.682261064508944e-06, + "loss": 0.1277, + "step": 14620 + }, + { + "epoch": 2.064529793843547, + "grad_norm": 2.9280640623766434, + "learning_rate": 4.680970087871811e-06, + "loss": 0.1398, + "step": 14621 + }, + { + "epoch": 2.064670996893533, + "grad_norm": 2.8192033797968827, + "learning_rate": 4.679679234846636e-06, + "loss": 0.1624, + "step": 14622 + }, + { + "epoch": 2.064812199943519, + "grad_norm": 3.2511219112624463, + "learning_rate": 4.678388505463417e-06, + "loss": 0.156, + "step": 14623 + }, + { + "epoch": 2.064953402993505, + "grad_norm": 3.19350399453411, + "learning_rate": 4.677097899752152e-06, + "loss": 0.1641, + "step": 14624 + }, + { + "epoch": 2.0650946060434907, + "grad_norm": 3.191256580327672, + "learning_rate": 4.675807417742832e-06, + "loss": 0.1575, + "step": 14625 + }, + { + "epoch": 2.0652358090934766, + "grad_norm": 3.1236377453924526, + "learning_rate": 4.674517059465449e-06, + "loss": 0.1459, + "step": 14626 + }, + { + "epoch": 2.0653770121434625, + "grad_norm": 2.9712037598668717, + "learning_rate": 4.67322682494999e-06, + "loss": 0.161, + "step": 14627 + }, + { + "epoch": 2.0655182151934484, + "grad_norm": 3.0656621266934807, + "learning_rate": 4.671936714226438e-06, + "loss": 0.1531, + "step": 14628 + }, + { + "epoch": 2.0656594182434342, + "grad_norm": 2.9629760266444976, + "learning_rate": 4.6706467273247766e-06, + "loss": 0.1298, + "step": 14629 + }, + { + "epoch": 2.06580062129342, + "grad_norm": 2.835014546155835, + "learning_rate": 4.66935686427498e-06, + "loss": 0.1436, + "step": 14630 + }, + { + "epoch": 2.065941824343406, + "grad_norm": 3.2045946385137007, + "learning_rate": 4.6680671251070306e-06, + "loss": 0.1624, + "step": 14631 + }, + { + "epoch": 2.066083027393392, + "grad_norm": 3.7945292588806274, + "learning_rate": 4.666777509850899e-06, + "loss": 0.2096, + "step": 14632 + }, + { + "epoch": 2.0662242304433778, + "grad_norm": 3.2625338323915236, + "learning_rate": 4.665488018536559e-06, + "loss": 0.1307, + "step": 14633 + }, + { + "epoch": 2.0663654334933637, + "grad_norm": 3.259428476327424, + "learning_rate": 4.664198651193969e-06, + "loss": 0.1459, + "step": 14634 + }, + { + "epoch": 2.0665066365433495, + "grad_norm": 3.751529472395767, + "learning_rate": 4.662909407853096e-06, + "loss": 0.1881, + "step": 14635 + }, + { + "epoch": 2.0666478395933354, + "grad_norm": 3.8586700412946446, + "learning_rate": 4.6616202885439056e-06, + "loss": 0.1769, + "step": 14636 + }, + { + "epoch": 2.0667890426433213, + "grad_norm": 2.485426300728362, + "learning_rate": 4.660331293296355e-06, + "loss": 0.1014, + "step": 14637 + }, + { + "epoch": 2.066930245693307, + "grad_norm": 3.033677964650005, + "learning_rate": 4.659042422140399e-06, + "loss": 0.1675, + "step": 14638 + }, + { + "epoch": 2.0670714487432926, + "grad_norm": 3.543588609325569, + "learning_rate": 4.65775367510599e-06, + "loss": 0.1553, + "step": 14639 + }, + { + "epoch": 2.0672126517932785, + "grad_norm": 3.4025258491146584, + "learning_rate": 4.656465052223079e-06, + "loss": 0.1635, + "step": 14640 + }, + { + "epoch": 2.0673538548432644, + "grad_norm": 3.392461143517055, + "learning_rate": 4.655176553521612e-06, + "loss": 0.1505, + "step": 14641 + }, + { + "epoch": 2.0674950578932503, + "grad_norm": 3.2749484841609195, + "learning_rate": 4.653888179031533e-06, + "loss": 0.151, + "step": 14642 + }, + { + "epoch": 2.067636260943236, + "grad_norm": 4.143356489301317, + "learning_rate": 4.652599928782786e-06, + "loss": 0.2052, + "step": 14643 + }, + { + "epoch": 2.067777463993222, + "grad_norm": 2.7448579883531474, + "learning_rate": 4.6513118028053065e-06, + "loss": 0.1451, + "step": 14644 + }, + { + "epoch": 2.067918667043208, + "grad_norm": 2.6900722822828738, + "learning_rate": 4.65002380112903e-06, + "loss": 0.1015, + "step": 14645 + }, + { + "epoch": 2.068059870093194, + "grad_norm": 2.87713081792148, + "learning_rate": 4.64873592378389e-06, + "loss": 0.1303, + "step": 14646 + }, + { + "epoch": 2.0682010731431797, + "grad_norm": 3.4431861699135315, + "learning_rate": 4.647448170799813e-06, + "loss": 0.2075, + "step": 14647 + }, + { + "epoch": 2.0683422761931656, + "grad_norm": 3.440467482759277, + "learning_rate": 4.646160542206733e-06, + "loss": 0.1991, + "step": 14648 + }, + { + "epoch": 2.0684834792431515, + "grad_norm": 3.3183933573921665, + "learning_rate": 4.644873038034568e-06, + "loss": 0.1474, + "step": 14649 + }, + { + "epoch": 2.0686246822931373, + "grad_norm": 2.784566255648997, + "learning_rate": 4.6435856583132465e-06, + "loss": 0.1726, + "step": 14650 + }, + { + "epoch": 2.0687658853431232, + "grad_norm": 3.1361440696705527, + "learning_rate": 4.642298403072677e-06, + "loss": 0.1232, + "step": 14651 + }, + { + "epoch": 2.068907088393109, + "grad_norm": 2.343068463033581, + "learning_rate": 4.641011272342775e-06, + "loss": 0.1304, + "step": 14652 + }, + { + "epoch": 2.069048291443095, + "grad_norm": 3.089784748506537, + "learning_rate": 4.639724266153459e-06, + "loss": 0.1761, + "step": 14653 + }, + { + "epoch": 2.069189494493081, + "grad_norm": 3.124526206622, + "learning_rate": 4.6384373845346375e-06, + "loss": 0.1693, + "step": 14654 + }, + { + "epoch": 2.0693306975430668, + "grad_norm": 3.4770227369051816, + "learning_rate": 4.637150627516215e-06, + "loss": 0.1354, + "step": 14655 + }, + { + "epoch": 2.0694719005930526, + "grad_norm": 2.964237823095029, + "learning_rate": 4.635863995128097e-06, + "loss": 0.1544, + "step": 14656 + }, + { + "epoch": 2.0696131036430385, + "grad_norm": 3.6455839127496756, + "learning_rate": 4.634577487400182e-06, + "loss": 0.1697, + "step": 14657 + }, + { + "epoch": 2.0697543066930244, + "grad_norm": 2.242226678578641, + "learning_rate": 4.633291104362369e-06, + "loss": 0.085, + "step": 14658 + }, + { + "epoch": 2.0698955097430103, + "grad_norm": 2.914102869308605, + "learning_rate": 4.632004846044555e-06, + "loss": 0.108, + "step": 14659 + }, + { + "epoch": 2.070036712792996, + "grad_norm": 2.8743823220547102, + "learning_rate": 4.630718712476628e-06, + "loss": 0.1292, + "step": 14660 + }, + { + "epoch": 2.070177915842982, + "grad_norm": 3.244372647564022, + "learning_rate": 4.62943270368848e-06, + "loss": 0.1828, + "step": 14661 + }, + { + "epoch": 2.070319118892968, + "grad_norm": 3.0521290080655055, + "learning_rate": 4.628146819709997e-06, + "loss": 0.1393, + "step": 14662 + }, + { + "epoch": 2.070460321942954, + "grad_norm": 3.879372136043958, + "learning_rate": 4.626861060571061e-06, + "loss": 0.1892, + "step": 14663 + }, + { + "epoch": 2.0706015249929397, + "grad_norm": 3.7941529531463423, + "learning_rate": 4.6255754263015495e-06, + "loss": 0.2226, + "step": 14664 + }, + { + "epoch": 2.0707427280429256, + "grad_norm": 2.9503825090448488, + "learning_rate": 4.624289916931349e-06, + "loss": 0.1357, + "step": 14665 + }, + { + "epoch": 2.0708839310929115, + "grad_norm": 4.137609784273914, + "learning_rate": 4.623004532490328e-06, + "loss": 0.1918, + "step": 14666 + }, + { + "epoch": 2.0710251341428974, + "grad_norm": 2.76583384798674, + "learning_rate": 4.6217192730083645e-06, + "loss": 0.1543, + "step": 14667 + }, + { + "epoch": 2.0711663371928832, + "grad_norm": 3.049304803794742, + "learning_rate": 4.6204341385153186e-06, + "loss": 0.1672, + "step": 14668 + }, + { + "epoch": 2.071307540242869, + "grad_norm": 3.511286794759079, + "learning_rate": 4.619149129041056e-06, + "loss": 0.1936, + "step": 14669 + }, + { + "epoch": 2.071448743292855, + "grad_norm": 3.560594584298056, + "learning_rate": 4.617864244615448e-06, + "loss": 0.1811, + "step": 14670 + }, + { + "epoch": 2.071589946342841, + "grad_norm": 3.6844193061281514, + "learning_rate": 4.61657948526835e-06, + "loss": 0.1764, + "step": 14671 + }, + { + "epoch": 2.0717311493928268, + "grad_norm": 2.8845571761890962, + "learning_rate": 4.615294851029619e-06, + "loss": 0.16, + "step": 14672 + }, + { + "epoch": 2.0718723524428126, + "grad_norm": 3.673445053258589, + "learning_rate": 4.614010341929112e-06, + "loss": 0.1499, + "step": 14673 + }, + { + "epoch": 2.0720135554927985, + "grad_norm": 2.805554667742319, + "learning_rate": 4.612725957996677e-06, + "loss": 0.1324, + "step": 14674 + }, + { + "epoch": 2.0721547585427844, + "grad_norm": 3.648881882688345, + "learning_rate": 4.6114416992621645e-06, + "loss": 0.1863, + "step": 14675 + }, + { + "epoch": 2.0722959615927703, + "grad_norm": 3.000001512007524, + "learning_rate": 4.610157565755421e-06, + "loss": 0.1615, + "step": 14676 + }, + { + "epoch": 2.072437164642756, + "grad_norm": 2.6928126251656326, + "learning_rate": 4.608873557506287e-06, + "loss": 0.1517, + "step": 14677 + }, + { + "epoch": 2.072578367692742, + "grad_norm": 3.114969395821639, + "learning_rate": 4.607589674544603e-06, + "loss": 0.1553, + "step": 14678 + }, + { + "epoch": 2.072719570742728, + "grad_norm": 3.0876503342139165, + "learning_rate": 4.606305916900206e-06, + "loss": 0.1664, + "step": 14679 + }, + { + "epoch": 2.072860773792714, + "grad_norm": 2.9884965229670915, + "learning_rate": 4.6050222846029315e-06, + "loss": 0.1511, + "step": 14680 + }, + { + "epoch": 2.0730019768426997, + "grad_norm": 3.7433518284316865, + "learning_rate": 4.603738777682604e-06, + "loss": 0.1602, + "step": 14681 + }, + { + "epoch": 2.0731431798926856, + "grad_norm": 3.288698736598821, + "learning_rate": 4.602455396169062e-06, + "loss": 0.1865, + "step": 14682 + }, + { + "epoch": 2.0732843829426715, + "grad_norm": 2.877266163911154, + "learning_rate": 4.601172140092125e-06, + "loss": 0.1388, + "step": 14683 + }, + { + "epoch": 2.0734255859926574, + "grad_norm": 3.736464992739931, + "learning_rate": 4.59988900948162e-06, + "loss": 0.196, + "step": 14684 + }, + { + "epoch": 2.0735667890426432, + "grad_norm": 2.799427415147197, + "learning_rate": 4.598606004367358e-06, + "loss": 0.1528, + "step": 14685 + }, + { + "epoch": 2.073707992092629, + "grad_norm": 3.023109675095226, + "learning_rate": 4.597323124779155e-06, + "loss": 0.1394, + "step": 14686 + }, + { + "epoch": 2.073849195142615, + "grad_norm": 2.8983180028306235, + "learning_rate": 4.596040370746835e-06, + "loss": 0.1417, + "step": 14687 + }, + { + "epoch": 2.073990398192601, + "grad_norm": 2.3930957576533953, + "learning_rate": 4.594757742300201e-06, + "loss": 0.1329, + "step": 14688 + }, + { + "epoch": 2.0741316012425868, + "grad_norm": 2.9215574769745727, + "learning_rate": 4.593475239469064e-06, + "loss": 0.1391, + "step": 14689 + }, + { + "epoch": 2.0742728042925727, + "grad_norm": 2.8407623388259027, + "learning_rate": 4.592192862283228e-06, + "loss": 0.1761, + "step": 14690 + }, + { + "epoch": 2.0744140073425585, + "grad_norm": 2.803837297825593, + "learning_rate": 4.590910610772493e-06, + "loss": 0.1335, + "step": 14691 + }, + { + "epoch": 2.0745552103925444, + "grad_norm": 3.705153680428913, + "learning_rate": 4.589628484966661e-06, + "loss": 0.1546, + "step": 14692 + }, + { + "epoch": 2.0746964134425303, + "grad_norm": 2.677548486329379, + "learning_rate": 4.588346484895525e-06, + "loss": 0.1172, + "step": 14693 + }, + { + "epoch": 2.074837616492516, + "grad_norm": 3.530784935867504, + "learning_rate": 4.587064610588881e-06, + "loss": 0.1673, + "step": 14694 + }, + { + "epoch": 2.074978819542502, + "grad_norm": 3.835583788441309, + "learning_rate": 4.585782862076517e-06, + "loss": 0.1826, + "step": 14695 + }, + { + "epoch": 2.075120022592488, + "grad_norm": 2.810338910208409, + "learning_rate": 4.5845012393882205e-06, + "loss": 0.1247, + "step": 14696 + }, + { + "epoch": 2.075261225642474, + "grad_norm": 2.670421583774606, + "learning_rate": 4.583219742553777e-06, + "loss": 0.1634, + "step": 14697 + }, + { + "epoch": 2.0754024286924597, + "grad_norm": 3.4911466877046577, + "learning_rate": 4.581938371602964e-06, + "loss": 0.1967, + "step": 14698 + }, + { + "epoch": 2.0755436317424456, + "grad_norm": 3.384510781102771, + "learning_rate": 4.580657126565566e-06, + "loss": 0.2017, + "step": 14699 + }, + { + "epoch": 2.0756848347924315, + "grad_norm": 2.950843819307427, + "learning_rate": 4.5793760074713565e-06, + "loss": 0.1931, + "step": 14700 + }, + { + "epoch": 2.0758260378424174, + "grad_norm": 2.465239592204603, + "learning_rate": 4.578095014350113e-06, + "loss": 0.1127, + "step": 14701 + }, + { + "epoch": 2.0759672408924033, + "grad_norm": 3.013464678330998, + "learning_rate": 4.576814147231594e-06, + "loss": 0.1298, + "step": 14702 + }, + { + "epoch": 2.076108443942389, + "grad_norm": 3.3342127123388177, + "learning_rate": 4.5755334061455685e-06, + "loss": 0.1805, + "step": 14703 + }, + { + "epoch": 2.076249646992375, + "grad_norm": 3.804902666181815, + "learning_rate": 4.574252791121808e-06, + "loss": 0.1785, + "step": 14704 + }, + { + "epoch": 2.076390850042361, + "grad_norm": 4.219726050235283, + "learning_rate": 4.57297230219007e-06, + "loss": 0.2245, + "step": 14705 + }, + { + "epoch": 2.076532053092347, + "grad_norm": 3.2869633158365428, + "learning_rate": 4.571691939380111e-06, + "loss": 0.1672, + "step": 14706 + }, + { + "epoch": 2.0766732561423327, + "grad_norm": 2.883201253351491, + "learning_rate": 4.570411702721688e-06, + "loss": 0.1654, + "step": 14707 + }, + { + "epoch": 2.0768144591923186, + "grad_norm": 2.9341604826350256, + "learning_rate": 4.569131592244552e-06, + "loss": 0.1235, + "step": 14708 + }, + { + "epoch": 2.0769556622423044, + "grad_norm": 3.537611157175534, + "learning_rate": 4.567851607978452e-06, + "loss": 0.1567, + "step": 14709 + }, + { + "epoch": 2.0770968652922903, + "grad_norm": 3.2301576981102236, + "learning_rate": 4.566571749953135e-06, + "loss": 0.159, + "step": 14710 + }, + { + "epoch": 2.077238068342276, + "grad_norm": 3.3168217313129627, + "learning_rate": 4.565292018198344e-06, + "loss": 0.2093, + "step": 14711 + }, + { + "epoch": 2.077379271392262, + "grad_norm": 3.6085816615228152, + "learning_rate": 4.564012412743819e-06, + "loss": 0.2345, + "step": 14712 + }, + { + "epoch": 2.077520474442248, + "grad_norm": 3.7185986269953117, + "learning_rate": 4.562732933619298e-06, + "loss": 0.1716, + "step": 14713 + }, + { + "epoch": 2.077661677492234, + "grad_norm": 3.3619422330644992, + "learning_rate": 4.561453580854516e-06, + "loss": 0.1799, + "step": 14714 + }, + { + "epoch": 2.0778028805422197, + "grad_norm": 3.0835975112833967, + "learning_rate": 4.5601743544791995e-06, + "loss": 0.1808, + "step": 14715 + }, + { + "epoch": 2.0779440835922056, + "grad_norm": 2.720222764102392, + "learning_rate": 4.558895254523086e-06, + "loss": 0.1357, + "step": 14716 + }, + { + "epoch": 2.0780852866421915, + "grad_norm": 3.7691111678852307, + "learning_rate": 4.5576162810158966e-06, + "loss": 0.2266, + "step": 14717 + }, + { + "epoch": 2.0782264896921774, + "grad_norm": 3.068380231707557, + "learning_rate": 4.556337433987359e-06, + "loss": 0.1653, + "step": 14718 + }, + { + "epoch": 2.0783676927421633, + "grad_norm": 2.9330922610478765, + "learning_rate": 4.555058713467184e-06, + "loss": 0.1819, + "step": 14719 + }, + { + "epoch": 2.078508895792149, + "grad_norm": 3.5628272873615114, + "learning_rate": 4.553780119485093e-06, + "loss": 0.1515, + "step": 14720 + }, + { + "epoch": 2.078650098842135, + "grad_norm": 2.608584105521275, + "learning_rate": 4.552501652070796e-06, + "loss": 0.1447, + "step": 14721 + }, + { + "epoch": 2.078791301892121, + "grad_norm": 3.1552897009342273, + "learning_rate": 4.551223311254013e-06, + "loss": 0.1158, + "step": 14722 + }, + { + "epoch": 2.078932504942107, + "grad_norm": 2.8022880804967083, + "learning_rate": 4.5499450970644455e-06, + "loss": 0.1801, + "step": 14723 + }, + { + "epoch": 2.0790737079920927, + "grad_norm": 3.509499661794767, + "learning_rate": 4.5486670095318006e-06, + "loss": 0.194, + "step": 14724 + }, + { + "epoch": 2.0792149110420786, + "grad_norm": 2.6474141595607925, + "learning_rate": 4.547389048685781e-06, + "loss": 0.147, + "step": 14725 + }, + { + "epoch": 2.0793561140920644, + "grad_norm": 3.65937466596559, + "learning_rate": 4.5461112145560845e-06, + "loss": 0.191, + "step": 14726 + }, + { + "epoch": 2.0794973171420503, + "grad_norm": 3.2089370048205588, + "learning_rate": 4.544833507172408e-06, + "loss": 0.1641, + "step": 14727 + }, + { + "epoch": 2.079638520192036, + "grad_norm": 3.594347137145248, + "learning_rate": 4.543555926564445e-06, + "loss": 0.191, + "step": 14728 + }, + { + "epoch": 2.079779723242022, + "grad_norm": 3.355553440511009, + "learning_rate": 4.542278472761886e-06, + "loss": 0.1876, + "step": 14729 + }, + { + "epoch": 2.079920926292008, + "grad_norm": 4.358562061112239, + "learning_rate": 4.541001145794419e-06, + "loss": 0.2084, + "step": 14730 + }, + { + "epoch": 2.080062129341994, + "grad_norm": 3.663134912340441, + "learning_rate": 4.539723945691727e-06, + "loss": 0.1952, + "step": 14731 + }, + { + "epoch": 2.0802033323919797, + "grad_norm": 2.620609435243367, + "learning_rate": 4.538446872483492e-06, + "loss": 0.1585, + "step": 14732 + }, + { + "epoch": 2.0803445354419656, + "grad_norm": 3.18988289485214, + "learning_rate": 4.53716992619939e-06, + "loss": 0.1706, + "step": 14733 + }, + { + "epoch": 2.0804857384919515, + "grad_norm": 3.3767087967814704, + "learning_rate": 4.535893106869107e-06, + "loss": 0.1794, + "step": 14734 + }, + { + "epoch": 2.0806269415419374, + "grad_norm": 2.482774651330091, + "learning_rate": 4.534616414522304e-06, + "loss": 0.1465, + "step": 14735 + }, + { + "epoch": 2.0807681445919233, + "grad_norm": 3.1214397052021474, + "learning_rate": 4.533339849188656e-06, + "loss": 0.1763, + "step": 14736 + }, + { + "epoch": 2.080909347641909, + "grad_norm": 3.2769096578573267, + "learning_rate": 4.532063410897829e-06, + "loss": 0.1512, + "step": 14737 + }, + { + "epoch": 2.081050550691895, + "grad_norm": 3.4795245883684207, + "learning_rate": 4.530787099679482e-06, + "loss": 0.2028, + "step": 14738 + }, + { + "epoch": 2.081191753741881, + "grad_norm": 2.950221792215814, + "learning_rate": 4.5295109155632865e-06, + "loss": 0.1449, + "step": 14739 + }, + { + "epoch": 2.081332956791867, + "grad_norm": 2.527010016401659, + "learning_rate": 4.528234858578894e-06, + "loss": 0.1283, + "step": 14740 + }, + { + "epoch": 2.0814741598418527, + "grad_norm": 3.0980367902942763, + "learning_rate": 4.526958928755959e-06, + "loss": 0.178, + "step": 14741 + }, + { + "epoch": 2.0816153628918386, + "grad_norm": 2.8845475225499775, + "learning_rate": 4.525683126124137e-06, + "loss": 0.1741, + "step": 14742 + }, + { + "epoch": 2.0817565659418245, + "grad_norm": 2.99925301901024, + "learning_rate": 4.524407450713073e-06, + "loss": 0.1822, + "step": 14743 + }, + { + "epoch": 2.0818977689918103, + "grad_norm": 2.530270627382348, + "learning_rate": 4.523131902552417e-06, + "loss": 0.1573, + "step": 14744 + }, + { + "epoch": 2.082038972041796, + "grad_norm": 3.5011484764194543, + "learning_rate": 4.521856481671809e-06, + "loss": 0.194, + "step": 14745 + }, + { + "epoch": 2.082180175091782, + "grad_norm": 3.122057458557989, + "learning_rate": 4.520581188100891e-06, + "loss": 0.1502, + "step": 14746 + }, + { + "epoch": 2.082321378141768, + "grad_norm": 2.883435123567567, + "learning_rate": 4.5193060218692995e-06, + "loss": 0.1462, + "step": 14747 + }, + { + "epoch": 2.082462581191754, + "grad_norm": 3.0580899428797315, + "learning_rate": 4.5180309830066696e-06, + "loss": 0.166, + "step": 14748 + }, + { + "epoch": 2.0826037842417398, + "grad_norm": 4.309262967509799, + "learning_rate": 4.5167560715426314e-06, + "loss": 0.2126, + "step": 14749 + }, + { + "epoch": 2.0827449872917256, + "grad_norm": 3.0728251994612905, + "learning_rate": 4.515481287506811e-06, + "loss": 0.1493, + "step": 14750 + }, + { + "epoch": 2.0828861903417115, + "grad_norm": 3.242438596854217, + "learning_rate": 4.514206630928845e-06, + "loss": 0.1689, + "step": 14751 + }, + { + "epoch": 2.0830273933916974, + "grad_norm": 2.9169527570582487, + "learning_rate": 4.512932101838343e-06, + "loss": 0.1427, + "step": 14752 + }, + { + "epoch": 2.0831685964416833, + "grad_norm": 2.564260144329419, + "learning_rate": 4.51165770026493e-06, + "loss": 0.1469, + "step": 14753 + }, + { + "epoch": 2.083309799491669, + "grad_norm": 2.905321137966021, + "learning_rate": 4.510383426238221e-06, + "loss": 0.1698, + "step": 14754 + }, + { + "epoch": 2.083451002541655, + "grad_norm": 3.210278845358085, + "learning_rate": 4.509109279787826e-06, + "loss": 0.1893, + "step": 14755 + }, + { + "epoch": 2.083592205591641, + "grad_norm": 3.071897152654023, + "learning_rate": 4.507835260943365e-06, + "loss": 0.1595, + "step": 14756 + }, + { + "epoch": 2.083733408641627, + "grad_norm": 4.599099350285521, + "learning_rate": 4.506561369734441e-06, + "loss": 0.2427, + "step": 14757 + }, + { + "epoch": 2.0838746116916127, + "grad_norm": 3.3709038021514006, + "learning_rate": 4.505287606190658e-06, + "loss": 0.1901, + "step": 14758 + }, + { + "epoch": 2.0840158147415986, + "grad_norm": 3.5956189374270497, + "learning_rate": 4.504013970341618e-06, + "loss": 0.1548, + "step": 14759 + }, + { + "epoch": 2.0841570177915845, + "grad_norm": 3.460639583309433, + "learning_rate": 4.502740462216919e-06, + "loss": 0.1743, + "step": 14760 + }, + { + "epoch": 2.0842982208415703, + "grad_norm": 3.9106752735296153, + "learning_rate": 4.501467081846158e-06, + "loss": 0.1975, + "step": 14761 + }, + { + "epoch": 2.0844394238915562, + "grad_norm": 2.577299182990156, + "learning_rate": 4.500193829258928e-06, + "loss": 0.1423, + "step": 14762 + }, + { + "epoch": 2.084580626941542, + "grad_norm": 2.4666178976754543, + "learning_rate": 4.498920704484817e-06, + "loss": 0.1011, + "step": 14763 + }, + { + "epoch": 2.084721829991528, + "grad_norm": 2.6806325645666966, + "learning_rate": 4.497647707553414e-06, + "loss": 0.1383, + "step": 14764 + }, + { + "epoch": 2.084863033041514, + "grad_norm": 3.6345367504397825, + "learning_rate": 4.496374838494302e-06, + "loss": 0.1611, + "step": 14765 + }, + { + "epoch": 2.0850042360914998, + "grad_norm": 3.837225139829277, + "learning_rate": 4.495102097337062e-06, + "loss": 0.1843, + "step": 14766 + }, + { + "epoch": 2.0851454391414856, + "grad_norm": 2.9897523737953455, + "learning_rate": 4.493829484111267e-06, + "loss": 0.1799, + "step": 14767 + }, + { + "epoch": 2.0852866421914715, + "grad_norm": 3.2996123634210925, + "learning_rate": 4.492556998846505e-06, + "loss": 0.2218, + "step": 14768 + }, + { + "epoch": 2.0854278452414574, + "grad_norm": 3.5960858087511145, + "learning_rate": 4.491284641572338e-06, + "loss": 0.2227, + "step": 14769 + }, + { + "epoch": 2.0855690482914433, + "grad_norm": 4.195496101232432, + "learning_rate": 4.490012412318334e-06, + "loss": 0.2608, + "step": 14770 + }, + { + "epoch": 2.085710251341429, + "grad_norm": 3.041712918328491, + "learning_rate": 4.488740311114064e-06, + "loss": 0.1714, + "step": 14771 + }, + { + "epoch": 2.085851454391415, + "grad_norm": 3.199707506797733, + "learning_rate": 4.487468337989083e-06, + "loss": 0.1827, + "step": 14772 + }, + { + "epoch": 2.085992657441401, + "grad_norm": 2.737178528616085, + "learning_rate": 4.486196492972964e-06, + "loss": 0.1715, + "step": 14773 + }, + { + "epoch": 2.086133860491387, + "grad_norm": 3.2689073476561528, + "learning_rate": 4.484924776095255e-06, + "loss": 0.1848, + "step": 14774 + }, + { + "epoch": 2.0862750635413727, + "grad_norm": 2.9070056491402156, + "learning_rate": 4.483653187385514e-06, + "loss": 0.1491, + "step": 14775 + }, + { + "epoch": 2.0864162665913586, + "grad_norm": 3.3285631651922905, + "learning_rate": 4.48238172687329e-06, + "loss": 0.1645, + "step": 14776 + }, + { + "epoch": 2.0865574696413445, + "grad_norm": 3.1527224533195586, + "learning_rate": 4.481110394588131e-06, + "loss": 0.1673, + "step": 14777 + }, + { + "epoch": 2.0866986726913304, + "grad_norm": 3.569857753140123, + "learning_rate": 4.479839190559583e-06, + "loss": 0.216, + "step": 14778 + }, + { + "epoch": 2.086839875741316, + "grad_norm": 3.411398949315808, + "learning_rate": 4.4785681148171885e-06, + "loss": 0.1464, + "step": 14779 + }, + { + "epoch": 2.0869810787913017, + "grad_norm": 3.2647383971700767, + "learning_rate": 4.477297167390487e-06, + "loss": 0.2029, + "step": 14780 + }, + { + "epoch": 2.0871222818412876, + "grad_norm": 3.346219672569038, + "learning_rate": 4.476026348309014e-06, + "loss": 0.2099, + "step": 14781 + }, + { + "epoch": 2.0872634848912734, + "grad_norm": 3.165621249847517, + "learning_rate": 4.474755657602303e-06, + "loss": 0.175, + "step": 14782 + }, + { + "epoch": 2.0874046879412593, + "grad_norm": 2.954786489251234, + "learning_rate": 4.473485095299885e-06, + "loss": 0.1874, + "step": 14783 + }, + { + "epoch": 2.087545890991245, + "grad_norm": 3.180168472474487, + "learning_rate": 4.472214661431282e-06, + "loss": 0.195, + "step": 14784 + }, + { + "epoch": 2.087687094041231, + "grad_norm": 2.4944453671412, + "learning_rate": 4.47094435602603e-06, + "loss": 0.1162, + "step": 14785 + }, + { + "epoch": 2.087828297091217, + "grad_norm": 2.7462661622366675, + "learning_rate": 4.469674179113641e-06, + "loss": 0.1316, + "step": 14786 + }, + { + "epoch": 2.087969500141203, + "grad_norm": 3.3388234911876205, + "learning_rate": 4.4684041307236345e-06, + "loss": 0.1562, + "step": 14787 + }, + { + "epoch": 2.0881107031911887, + "grad_norm": 3.232322320950187, + "learning_rate": 4.467134210885529e-06, + "loss": 0.1845, + "step": 14788 + }, + { + "epoch": 2.0882519062411746, + "grad_norm": 3.0059205588299363, + "learning_rate": 4.465864419628829e-06, + "loss": 0.1598, + "step": 14789 + }, + { + "epoch": 2.0883931092911605, + "grad_norm": 3.4000775578431743, + "learning_rate": 4.464594756983055e-06, + "loss": 0.1958, + "step": 14790 + }, + { + "epoch": 2.0885343123411464, + "grad_norm": 3.6036805416685627, + "learning_rate": 4.463325222977708e-06, + "loss": 0.2047, + "step": 14791 + }, + { + "epoch": 2.0886755153911323, + "grad_norm": 3.1765058636760672, + "learning_rate": 4.462055817642291e-06, + "loss": 0.1917, + "step": 14792 + }, + { + "epoch": 2.088816718441118, + "grad_norm": 3.354980318032052, + "learning_rate": 4.460786541006305e-06, + "loss": 0.2047, + "step": 14793 + }, + { + "epoch": 2.088957921491104, + "grad_norm": 3.121617781585857, + "learning_rate": 4.459517393099253e-06, + "loss": 0.1525, + "step": 14794 + }, + { + "epoch": 2.08909912454109, + "grad_norm": 3.1263451041656585, + "learning_rate": 4.458248373950616e-06, + "loss": 0.1525, + "step": 14795 + }, + { + "epoch": 2.089240327591076, + "grad_norm": 2.621276669067131, + "learning_rate": 4.456979483589896e-06, + "loss": 0.1393, + "step": 14796 + }, + { + "epoch": 2.0893815306410617, + "grad_norm": 2.5769628276992975, + "learning_rate": 4.45571072204658e-06, + "loss": 0.1524, + "step": 14797 + }, + { + "epoch": 2.0895227336910476, + "grad_norm": 3.333510010626498, + "learning_rate": 4.454442089350151e-06, + "loss": 0.1665, + "step": 14798 + }, + { + "epoch": 2.0896639367410335, + "grad_norm": 2.6756184294840595, + "learning_rate": 4.4531735855300954e-06, + "loss": 0.1324, + "step": 14799 + }, + { + "epoch": 2.0898051397910193, + "grad_norm": 2.7463422131108435, + "learning_rate": 4.451905210615889e-06, + "loss": 0.1593, + "step": 14800 + }, + { + "epoch": 2.0899463428410052, + "grad_norm": 3.26841170314668, + "learning_rate": 4.450636964637005e-06, + "loss": 0.1354, + "step": 14801 + }, + { + "epoch": 2.090087545890991, + "grad_norm": 2.9900825630036545, + "learning_rate": 4.4493688476229295e-06, + "loss": 0.1432, + "step": 14802 + }, + { + "epoch": 2.090228748940977, + "grad_norm": 3.4545242970346917, + "learning_rate": 4.44810085960312e-06, + "loss": 0.0991, + "step": 14803 + }, + { + "epoch": 2.090369951990963, + "grad_norm": 2.9520612797555987, + "learning_rate": 4.4468330006070505e-06, + "loss": 0.1265, + "step": 14804 + }, + { + "epoch": 2.0905111550409488, + "grad_norm": 3.096010947621436, + "learning_rate": 4.445565270664184e-06, + "loss": 0.154, + "step": 14805 + }, + { + "epoch": 2.0906523580909346, + "grad_norm": 3.6246407079592617, + "learning_rate": 4.444297669803981e-06, + "loss": 0.1946, + "step": 14806 + }, + { + "epoch": 2.0907935611409205, + "grad_norm": 3.1607073559508887, + "learning_rate": 4.443030198055897e-06, + "loss": 0.1468, + "step": 14807 + }, + { + "epoch": 2.0909347641909064, + "grad_norm": 3.2154633273797657, + "learning_rate": 4.441762855449395e-06, + "loss": 0.1492, + "step": 14808 + }, + { + "epoch": 2.0910759672408923, + "grad_norm": 3.6044325483107995, + "learning_rate": 4.4404956420139245e-06, + "loss": 0.1927, + "step": 14809 + }, + { + "epoch": 2.091217170290878, + "grad_norm": 3.614882966736453, + "learning_rate": 4.439228557778933e-06, + "loss": 0.189, + "step": 14810 + }, + { + "epoch": 2.091358373340864, + "grad_norm": 3.4637240785764005, + "learning_rate": 4.437961602773874e-06, + "loss": 0.1632, + "step": 14811 + }, + { + "epoch": 2.09149957639085, + "grad_norm": 3.137579592637615, + "learning_rate": 4.436694777028175e-06, + "loss": 0.1579, + "step": 14812 + }, + { + "epoch": 2.091640779440836, + "grad_norm": 3.01614947690494, + "learning_rate": 4.435428080571293e-06, + "loss": 0.159, + "step": 14813 + }, + { + "epoch": 2.0917819824908217, + "grad_norm": 3.0429779982154175, + "learning_rate": 4.434161513432659e-06, + "loss": 0.1274, + "step": 14814 + }, + { + "epoch": 2.0919231855408076, + "grad_norm": 3.2929352557122322, + "learning_rate": 4.432895075641707e-06, + "loss": 0.1708, + "step": 14815 + }, + { + "epoch": 2.0920643885907935, + "grad_norm": 2.8386277956830988, + "learning_rate": 4.431628767227869e-06, + "loss": 0.1415, + "step": 14816 + }, + { + "epoch": 2.0922055916407793, + "grad_norm": 3.891388677289785, + "learning_rate": 4.4303625882205735e-06, + "loss": 0.2128, + "step": 14817 + }, + { + "epoch": 2.0923467946907652, + "grad_norm": 3.149948032572408, + "learning_rate": 4.4290965386492436e-06, + "loss": 0.1475, + "step": 14818 + }, + { + "epoch": 2.092487997740751, + "grad_norm": 4.007698313373494, + "learning_rate": 4.427830618543311e-06, + "loss": 0.2441, + "step": 14819 + }, + { + "epoch": 2.092629200790737, + "grad_norm": 2.92107266459875, + "learning_rate": 4.426564827932185e-06, + "loss": 0.1807, + "step": 14820 + }, + { + "epoch": 2.092770403840723, + "grad_norm": 3.1223032818463174, + "learning_rate": 4.425299166845285e-06, + "loss": 0.146, + "step": 14821 + }, + { + "epoch": 2.0929116068907088, + "grad_norm": 5.280147708513427, + "learning_rate": 4.424033635312025e-06, + "loss": 0.2332, + "step": 14822 + }, + { + "epoch": 2.0930528099406946, + "grad_norm": 2.7770934091599377, + "learning_rate": 4.422768233361815e-06, + "loss": 0.1392, + "step": 14823 + }, + { + "epoch": 2.0931940129906805, + "grad_norm": 3.6269976184711568, + "learning_rate": 4.421502961024059e-06, + "loss": 0.1755, + "step": 14824 + }, + { + "epoch": 2.0933352160406664, + "grad_norm": 3.247149494228637, + "learning_rate": 4.420237818328169e-06, + "loss": 0.1663, + "step": 14825 + }, + { + "epoch": 2.0934764190906523, + "grad_norm": 3.00890367077005, + "learning_rate": 4.418972805303542e-06, + "loss": 0.1389, + "step": 14826 + }, + { + "epoch": 2.093617622140638, + "grad_norm": 3.5266573223263915, + "learning_rate": 4.417707921979577e-06, + "loss": 0.1949, + "step": 14827 + }, + { + "epoch": 2.093758825190624, + "grad_norm": 3.2962994776832275, + "learning_rate": 4.416443168385672e-06, + "loss": 0.1627, + "step": 14828 + }, + { + "epoch": 2.09390002824061, + "grad_norm": 3.023267164479171, + "learning_rate": 4.415178544551211e-06, + "loss": 0.1204, + "step": 14829 + }, + { + "epoch": 2.094041231290596, + "grad_norm": 2.8526165874527636, + "learning_rate": 4.413914050505591e-06, + "loss": 0.1735, + "step": 14830 + }, + { + "epoch": 2.0941824343405817, + "grad_norm": 3.0237715823739766, + "learning_rate": 4.412649686278195e-06, + "loss": 0.1606, + "step": 14831 + }, + { + "epoch": 2.0943236373905676, + "grad_norm": 4.007294511241387, + "learning_rate": 4.4113854518984085e-06, + "loss": 0.1832, + "step": 14832 + }, + { + "epoch": 2.0944648404405535, + "grad_norm": 2.979239271609002, + "learning_rate": 4.410121347395612e-06, + "loss": 0.1724, + "step": 14833 + }, + { + "epoch": 2.0946060434905394, + "grad_norm": 3.431458698942434, + "learning_rate": 4.408857372799179e-06, + "loss": 0.1823, + "step": 14834 + }, + { + "epoch": 2.0947472465405252, + "grad_norm": 3.110631748267666, + "learning_rate": 4.4075935281384875e-06, + "loss": 0.1523, + "step": 14835 + }, + { + "epoch": 2.094888449590511, + "grad_norm": 3.928381714299041, + "learning_rate": 4.406329813442907e-06, + "loss": 0.2255, + "step": 14836 + }, + { + "epoch": 2.095029652640497, + "grad_norm": 3.6633800423974474, + "learning_rate": 4.405066228741805e-06, + "loss": 0.1376, + "step": 14837 + }, + { + "epoch": 2.095170855690483, + "grad_norm": 3.1022673119154103, + "learning_rate": 4.403802774064548e-06, + "loss": 0.1568, + "step": 14838 + }, + { + "epoch": 2.0953120587404688, + "grad_norm": 3.3718871810380446, + "learning_rate": 4.402539449440499e-06, + "loss": 0.2237, + "step": 14839 + }, + { + "epoch": 2.0954532617904547, + "grad_norm": 3.0438122311282623, + "learning_rate": 4.401276254899014e-06, + "loss": 0.1986, + "step": 14840 + }, + { + "epoch": 2.0955944648404405, + "grad_norm": 3.2434081434213446, + "learning_rate": 4.400013190469448e-06, + "loss": 0.1328, + "step": 14841 + }, + { + "epoch": 2.0957356678904264, + "grad_norm": 3.420380656620362, + "learning_rate": 4.39875025618116e-06, + "loss": 0.2275, + "step": 14842 + }, + { + "epoch": 2.0958768709404123, + "grad_norm": 3.8499492969113374, + "learning_rate": 4.397487452063498e-06, + "loss": 0.2032, + "step": 14843 + }, + { + "epoch": 2.096018073990398, + "grad_norm": 4.264430293136782, + "learning_rate": 4.396224778145808e-06, + "loss": 0.204, + "step": 14844 + }, + { + "epoch": 2.096159277040384, + "grad_norm": 2.8705637589430695, + "learning_rate": 4.394962234457437e-06, + "loss": 0.1665, + "step": 14845 + }, + { + "epoch": 2.09630048009037, + "grad_norm": 2.8925088328316715, + "learning_rate": 4.393699821027716e-06, + "loss": 0.1432, + "step": 14846 + }, + { + "epoch": 2.096441683140356, + "grad_norm": 3.7661947691622273, + "learning_rate": 4.392437537885994e-06, + "loss": 0.221, + "step": 14847 + }, + { + "epoch": 2.0965828861903417, + "grad_norm": 2.6518016188653877, + "learning_rate": 4.391175385061601e-06, + "loss": 0.124, + "step": 14848 + }, + { + "epoch": 2.0967240892403276, + "grad_norm": 3.6642526872880627, + "learning_rate": 4.389913362583871e-06, + "loss": 0.1811, + "step": 14849 + }, + { + "epoch": 2.0968652922903135, + "grad_norm": 2.8003833095445683, + "learning_rate": 4.3886514704821315e-06, + "loss": 0.1469, + "step": 14850 + }, + { + "epoch": 2.0970064953402994, + "grad_norm": 3.6431541882073994, + "learning_rate": 4.387389708785708e-06, + "loss": 0.1988, + "step": 14851 + }, + { + "epoch": 2.0971476983902853, + "grad_norm": 2.7586957518606057, + "learning_rate": 4.386128077523923e-06, + "loss": 0.1424, + "step": 14852 + }, + { + "epoch": 2.097288901440271, + "grad_norm": 3.0509260388095996, + "learning_rate": 4.384866576726099e-06, + "loss": 0.1801, + "step": 14853 + }, + { + "epoch": 2.097430104490257, + "grad_norm": 3.172895054998916, + "learning_rate": 4.383605206421549e-06, + "loss": 0.1813, + "step": 14854 + }, + { + "epoch": 2.097571307540243, + "grad_norm": 4.007224745658212, + "learning_rate": 4.3823439666395895e-06, + "loss": 0.2486, + "step": 14855 + }, + { + "epoch": 2.097712510590229, + "grad_norm": 3.6581780512053528, + "learning_rate": 4.38108285740953e-06, + "loss": 0.1671, + "step": 14856 + }, + { + "epoch": 2.0978537136402147, + "grad_norm": 2.9558809374791815, + "learning_rate": 4.379821878760679e-06, + "loss": 0.1469, + "step": 14857 + }, + { + "epoch": 2.0979949166902006, + "grad_norm": 3.24766558221534, + "learning_rate": 4.378561030722335e-06, + "loss": 0.1834, + "step": 14858 + }, + { + "epoch": 2.0981361197401864, + "grad_norm": 3.3788336628057953, + "learning_rate": 4.377300313323809e-06, + "loss": 0.1685, + "step": 14859 + }, + { + "epoch": 2.0982773227901723, + "grad_norm": 3.257120797683473, + "learning_rate": 4.3760397265943965e-06, + "loss": 0.1278, + "step": 14860 + }, + { + "epoch": 2.098418525840158, + "grad_norm": 2.3337844707372866, + "learning_rate": 4.374779270563391e-06, + "loss": 0.1428, + "step": 14861 + }, + { + "epoch": 2.098559728890144, + "grad_norm": 3.3899738293785577, + "learning_rate": 4.37351894526009e-06, + "loss": 0.1786, + "step": 14862 + }, + { + "epoch": 2.09870093194013, + "grad_norm": 3.231098927035343, + "learning_rate": 4.372258750713771e-06, + "loss": 0.1892, + "step": 14863 + }, + { + "epoch": 2.098842134990116, + "grad_norm": 2.9858132781219417, + "learning_rate": 4.370998686953733e-06, + "loss": 0.1494, + "step": 14864 + }, + { + "epoch": 2.0989833380401017, + "grad_norm": 3.6665589280435946, + "learning_rate": 4.369738754009253e-06, + "loss": 0.2189, + "step": 14865 + }, + { + "epoch": 2.0991245410900876, + "grad_norm": 3.6455033138250363, + "learning_rate": 4.368478951909614e-06, + "loss": 0.1603, + "step": 14866 + }, + { + "epoch": 2.0992657441400735, + "grad_norm": 2.7809100450445143, + "learning_rate": 4.367219280684091e-06, + "loss": 0.1023, + "step": 14867 + }, + { + "epoch": 2.0994069471900594, + "grad_norm": 2.351442099352178, + "learning_rate": 4.36595974036196e-06, + "loss": 0.1016, + "step": 14868 + }, + { + "epoch": 2.0995481502400453, + "grad_norm": 2.6976451267489554, + "learning_rate": 4.364700330972492e-06, + "loss": 0.1464, + "step": 14869 + }, + { + "epoch": 2.099689353290031, + "grad_norm": 3.8132638017116554, + "learning_rate": 4.363441052544953e-06, + "loss": 0.2113, + "step": 14870 + }, + { + "epoch": 2.099830556340017, + "grad_norm": 2.6185184978354594, + "learning_rate": 4.362181905108611e-06, + "loss": 0.1433, + "step": 14871 + }, + { + "epoch": 2.099971759390003, + "grad_norm": 3.364107867167429, + "learning_rate": 4.3609228886927265e-06, + "loss": 0.1962, + "step": 14872 + }, + { + "epoch": 2.100112962439989, + "grad_norm": 2.946728253642788, + "learning_rate": 4.359664003326559e-06, + "loss": 0.1586, + "step": 14873 + }, + { + "epoch": 2.1002541654899747, + "grad_norm": 2.934823074906148, + "learning_rate": 4.3584052490393645e-06, + "loss": 0.1941, + "step": 14874 + }, + { + "epoch": 2.1003953685399606, + "grad_norm": 3.410164330629009, + "learning_rate": 4.357146625860391e-06, + "loss": 0.2351, + "step": 14875 + }, + { + "epoch": 2.1005365715899464, + "grad_norm": 2.6355142674112537, + "learning_rate": 4.355888133818897e-06, + "loss": 0.1658, + "step": 14876 + }, + { + "epoch": 2.1006777746399323, + "grad_norm": 2.7918339739886995, + "learning_rate": 4.3546297729441256e-06, + "loss": 0.1865, + "step": 14877 + }, + { + "epoch": 2.100818977689918, + "grad_norm": 3.226498127834616, + "learning_rate": 4.35337154326532e-06, + "loss": 0.1775, + "step": 14878 + }, + { + "epoch": 2.100960180739904, + "grad_norm": 3.115239480675422, + "learning_rate": 4.352113444811724e-06, + "loss": 0.1736, + "step": 14879 + }, + { + "epoch": 2.10110138378989, + "grad_norm": 2.9237773285505453, + "learning_rate": 4.350855477612565e-06, + "loss": 0.1336, + "step": 14880 + }, + { + "epoch": 2.101242586839876, + "grad_norm": 3.059546421125386, + "learning_rate": 4.349597641697091e-06, + "loss": 0.1503, + "step": 14881 + }, + { + "epoch": 2.1013837898898617, + "grad_norm": 2.3377603270119094, + "learning_rate": 4.348339937094525e-06, + "loss": 0.1294, + "step": 14882 + }, + { + "epoch": 2.1015249929398476, + "grad_norm": 2.6550347434385797, + "learning_rate": 4.3470823638340995e-06, + "loss": 0.1264, + "step": 14883 + }, + { + "epoch": 2.1016661959898335, + "grad_norm": 2.861419878098478, + "learning_rate": 4.345824921945039e-06, + "loss": 0.167, + "step": 14884 + }, + { + "epoch": 2.1018073990398194, + "grad_norm": 3.063558956679336, + "learning_rate": 4.344567611456564e-06, + "loss": 0.1662, + "step": 14885 + }, + { + "epoch": 2.1019486020898053, + "grad_norm": 3.1617001351913863, + "learning_rate": 4.3433104323978956e-06, + "loss": 0.1501, + "step": 14886 + }, + { + "epoch": 2.102089805139791, + "grad_norm": 3.490238549557351, + "learning_rate": 4.34205338479825e-06, + "loss": 0.1853, + "step": 14887 + }, + { + "epoch": 2.102231008189777, + "grad_norm": 2.7021273634409178, + "learning_rate": 4.340796468686841e-06, + "loss": 0.1674, + "step": 14888 + }, + { + "epoch": 2.102372211239763, + "grad_norm": 2.930307317253397, + "learning_rate": 4.339539684092877e-06, + "loss": 0.1453, + "step": 14889 + }, + { + "epoch": 2.102513414289749, + "grad_norm": 3.005669476133508, + "learning_rate": 4.338283031045567e-06, + "loss": 0.1604, + "step": 14890 + }, + { + "epoch": 2.1026546173397347, + "grad_norm": 3.2647644071534936, + "learning_rate": 4.3370265095741135e-06, + "loss": 0.1643, + "step": 14891 + }, + { + "epoch": 2.1027958203897206, + "grad_norm": 2.604405240960576, + "learning_rate": 4.335770119707715e-06, + "loss": 0.1582, + "step": 14892 + }, + { + "epoch": 2.1029370234397065, + "grad_norm": 3.310422443778681, + "learning_rate": 4.334513861475577e-06, + "loss": 0.1709, + "step": 14893 + }, + { + "epoch": 2.1030782264896923, + "grad_norm": 3.185655743344321, + "learning_rate": 4.333257734906889e-06, + "loss": 0.1691, + "step": 14894 + }, + { + "epoch": 2.103219429539678, + "grad_norm": 3.6704081750717714, + "learning_rate": 4.332001740030849e-06, + "loss": 0.1699, + "step": 14895 + }, + { + "epoch": 2.103360632589664, + "grad_norm": 3.7543527398896357, + "learning_rate": 4.330745876876635e-06, + "loss": 0.1687, + "step": 14896 + }, + { + "epoch": 2.10350183563965, + "grad_norm": 2.962924753903919, + "learning_rate": 4.3294901454734405e-06, + "loss": 0.147, + "step": 14897 + }, + { + "epoch": 2.103643038689636, + "grad_norm": 3.242798923557571, + "learning_rate": 4.328234545850441e-06, + "loss": 0.1652, + "step": 14898 + }, + { + "epoch": 2.1037842417396218, + "grad_norm": 2.9847366424919306, + "learning_rate": 4.3269790780368256e-06, + "loss": 0.1601, + "step": 14899 + }, + { + "epoch": 2.1039254447896076, + "grad_norm": 3.922785570697993, + "learning_rate": 4.325723742061767e-06, + "loss": 0.1891, + "step": 14900 + }, + { + "epoch": 2.1040666478395935, + "grad_norm": 2.3151654861922037, + "learning_rate": 4.324468537954437e-06, + "loss": 0.1572, + "step": 14901 + }, + { + "epoch": 2.1042078508895794, + "grad_norm": 3.123206609118541, + "learning_rate": 4.323213465744007e-06, + "loss": 0.1504, + "step": 14902 + }, + { + "epoch": 2.1043490539395653, + "grad_norm": 2.90514734394011, + "learning_rate": 4.3219585254596455e-06, + "loss": 0.1532, + "step": 14903 + }, + { + "epoch": 2.104490256989551, + "grad_norm": 3.027450803759767, + "learning_rate": 4.320703717130516e-06, + "loss": 0.1432, + "step": 14904 + }, + { + "epoch": 2.104631460039537, + "grad_norm": 2.91162423138713, + "learning_rate": 4.319449040785778e-06, + "loss": 0.1391, + "step": 14905 + }, + { + "epoch": 2.104772663089523, + "grad_norm": 3.483731175370125, + "learning_rate": 4.3181944964545915e-06, + "loss": 0.1515, + "step": 14906 + }, + { + "epoch": 2.104913866139509, + "grad_norm": 3.0444545337867486, + "learning_rate": 4.316940084166111e-06, + "loss": 0.1456, + "step": 14907 + }, + { + "epoch": 2.1050550691894947, + "grad_norm": 4.30507932861241, + "learning_rate": 4.3156858039494895e-06, + "loss": 0.2329, + "step": 14908 + }, + { + "epoch": 2.1051962722394806, + "grad_norm": 3.0450061824220658, + "learning_rate": 4.314431655833874e-06, + "loss": 0.1381, + "step": 14909 + }, + { + "epoch": 2.1053374752894665, + "grad_norm": 3.199023202258202, + "learning_rate": 4.313177639848408e-06, + "loss": 0.1909, + "step": 14910 + }, + { + "epoch": 2.105478678339452, + "grad_norm": 3.2879930424187966, + "learning_rate": 4.311923756022243e-06, + "loss": 0.1913, + "step": 14911 + }, + { + "epoch": 2.105619881389438, + "grad_norm": 2.9228811658155567, + "learning_rate": 4.310670004384515e-06, + "loss": 0.1544, + "step": 14912 + }, + { + "epoch": 2.1057610844394237, + "grad_norm": 2.9836705655645295, + "learning_rate": 4.309416384964355e-06, + "loss": 0.1729, + "step": 14913 + }, + { + "epoch": 2.1059022874894096, + "grad_norm": 3.0379705858112023, + "learning_rate": 4.308162897790903e-06, + "loss": 0.1581, + "step": 14914 + }, + { + "epoch": 2.1060434905393954, + "grad_norm": 4.138632277129836, + "learning_rate": 4.306909542893281e-06, + "loss": 0.1813, + "step": 14915 + }, + { + "epoch": 2.1061846935893813, + "grad_norm": 3.0170395322653603, + "learning_rate": 4.305656320300628e-06, + "loss": 0.15, + "step": 14916 + }, + { + "epoch": 2.106325896639367, + "grad_norm": 3.1018977039841054, + "learning_rate": 4.304403230042063e-06, + "loss": 0.1769, + "step": 14917 + }, + { + "epoch": 2.106467099689353, + "grad_norm": 3.4307746281307425, + "learning_rate": 4.303150272146706e-06, + "loss": 0.1605, + "step": 14918 + }, + { + "epoch": 2.106608302739339, + "grad_norm": 3.9431462181931334, + "learning_rate": 4.301897446643677e-06, + "loss": 0.1555, + "step": 14919 + }, + { + "epoch": 2.106749505789325, + "grad_norm": 2.92845224895522, + "learning_rate": 4.30064475356209e-06, + "loss": 0.1375, + "step": 14920 + }, + { + "epoch": 2.1068907088393107, + "grad_norm": 2.9199468572958955, + "learning_rate": 4.299392192931058e-06, + "loss": 0.1769, + "step": 14921 + }, + { + "epoch": 2.1070319118892966, + "grad_norm": 3.0808912821237397, + "learning_rate": 4.29813976477969e-06, + "loss": 0.1201, + "step": 14922 + }, + { + "epoch": 2.1071731149392825, + "grad_norm": 2.665509370044914, + "learning_rate": 4.296887469137091e-06, + "loss": 0.1131, + "step": 14923 + }, + { + "epoch": 2.1073143179892684, + "grad_norm": 3.362935102608214, + "learning_rate": 4.295635306032364e-06, + "loss": 0.149, + "step": 14924 + }, + { + "epoch": 2.1074555210392543, + "grad_norm": 3.3016930406371747, + "learning_rate": 4.294383275494609e-06, + "loss": 0.1592, + "step": 14925 + }, + { + "epoch": 2.10759672408924, + "grad_norm": 2.9915239868989407, + "learning_rate": 4.293131377552923e-06, + "loss": 0.1522, + "step": 14926 + }, + { + "epoch": 2.107737927139226, + "grad_norm": 2.840127563091646, + "learning_rate": 4.291879612236395e-06, + "loss": 0.135, + "step": 14927 + }, + { + "epoch": 2.107879130189212, + "grad_norm": 2.873609055645654, + "learning_rate": 4.290627979574123e-06, + "loss": 0.1155, + "step": 14928 + }, + { + "epoch": 2.108020333239198, + "grad_norm": 3.3994236831957227, + "learning_rate": 4.2893764795951955e-06, + "loss": 0.1676, + "step": 14929 + }, + { + "epoch": 2.1081615362891837, + "grad_norm": 3.370001710807155, + "learning_rate": 4.2881251123286896e-06, + "loss": 0.1466, + "step": 14930 + }, + { + "epoch": 2.1083027393391696, + "grad_norm": 2.9610430277388473, + "learning_rate": 4.286873877803688e-06, + "loss": 0.1599, + "step": 14931 + }, + { + "epoch": 2.1084439423891554, + "grad_norm": 3.5469329957362827, + "learning_rate": 4.2856227760492665e-06, + "loss": 0.1857, + "step": 14932 + }, + { + "epoch": 2.1085851454391413, + "grad_norm": 2.967649905093386, + "learning_rate": 4.284371807094508e-06, + "loss": 0.1793, + "step": 14933 + }, + { + "epoch": 2.108726348489127, + "grad_norm": 3.2228400753042754, + "learning_rate": 4.28312097096848e-06, + "loss": 0.148, + "step": 14934 + }, + { + "epoch": 2.108867551539113, + "grad_norm": 3.2894830449048285, + "learning_rate": 4.281870267700251e-06, + "loss": 0.1468, + "step": 14935 + }, + { + "epoch": 2.109008754589099, + "grad_norm": 3.161925060626662, + "learning_rate": 4.280619697318888e-06, + "loss": 0.1611, + "step": 14936 + }, + { + "epoch": 2.109149957639085, + "grad_norm": 2.6948737421878426, + "learning_rate": 4.279369259853454e-06, + "loss": 0.1271, + "step": 14937 + }, + { + "epoch": 2.1092911606890707, + "grad_norm": 3.1466285862507744, + "learning_rate": 4.278118955333007e-06, + "loss": 0.1292, + "step": 14938 + }, + { + "epoch": 2.1094323637390566, + "grad_norm": 2.705543119220257, + "learning_rate": 4.276868783786605e-06, + "loss": 0.1409, + "step": 14939 + }, + { + "epoch": 2.1095735667890425, + "grad_norm": 2.3343913097812687, + "learning_rate": 4.275618745243301e-06, + "loss": 0.1384, + "step": 14940 + }, + { + "epoch": 2.1097147698390284, + "grad_norm": 3.334124086375242, + "learning_rate": 4.274368839732145e-06, + "loss": 0.1806, + "step": 14941 + }, + { + "epoch": 2.1098559728890143, + "grad_norm": 3.0754218680604644, + "learning_rate": 4.273119067282184e-06, + "loss": 0.1915, + "step": 14942 + }, + { + "epoch": 2.109997175939, + "grad_norm": 3.9666483931710657, + "learning_rate": 4.271869427922463e-06, + "loss": 0.1803, + "step": 14943 + }, + { + "epoch": 2.110138378988986, + "grad_norm": 3.050769715090813, + "learning_rate": 4.270619921682019e-06, + "loss": 0.1349, + "step": 14944 + }, + { + "epoch": 2.110279582038972, + "grad_norm": 3.7490523746071527, + "learning_rate": 4.269370548589897e-06, + "loss": 0.1692, + "step": 14945 + }, + { + "epoch": 2.110420785088958, + "grad_norm": 3.9353258016788835, + "learning_rate": 4.268121308675132e-06, + "loss": 0.2086, + "step": 14946 + }, + { + "epoch": 2.1105619881389437, + "grad_norm": 3.6957392876328434, + "learning_rate": 4.26687220196675e-06, + "loss": 0.1764, + "step": 14947 + }, + { + "epoch": 2.1107031911889296, + "grad_norm": 2.8877215962754694, + "learning_rate": 4.265623228493781e-06, + "loss": 0.1579, + "step": 14948 + }, + { + "epoch": 2.1108443942389155, + "grad_norm": 3.1526232535011043, + "learning_rate": 4.2643743882852486e-06, + "loss": 0.1621, + "step": 14949 + }, + { + "epoch": 2.1109855972889013, + "grad_norm": 4.676465827580886, + "learning_rate": 4.2631256813701815e-06, + "loss": 0.2213, + "step": 14950 + }, + { + "epoch": 2.111126800338887, + "grad_norm": 3.859406800222345, + "learning_rate": 4.261877107777596e-06, + "loss": 0.195, + "step": 14951 + }, + { + "epoch": 2.111268003388873, + "grad_norm": 3.1723621501857258, + "learning_rate": 4.260628667536508e-06, + "loss": 0.1452, + "step": 14952 + }, + { + "epoch": 2.111409206438859, + "grad_norm": 3.3518591906624216, + "learning_rate": 4.2593803606759324e-06, + "loss": 0.1888, + "step": 14953 + }, + { + "epoch": 2.111550409488845, + "grad_norm": 2.8849255309611395, + "learning_rate": 4.258132187224877e-06, + "loss": 0.1738, + "step": 14954 + }, + { + "epoch": 2.1116916125388308, + "grad_norm": 2.5035119889991355, + "learning_rate": 4.256884147212351e-06, + "loss": 0.1498, + "step": 14955 + }, + { + "epoch": 2.1118328155888166, + "grad_norm": 3.3491211750099006, + "learning_rate": 4.255636240667356e-06, + "loss": 0.1464, + "step": 14956 + }, + { + "epoch": 2.1119740186388025, + "grad_norm": 2.7697017372059203, + "learning_rate": 4.254388467618894e-06, + "loss": 0.1625, + "step": 14957 + }, + { + "epoch": 2.1121152216887884, + "grad_norm": 3.3454954291310934, + "learning_rate": 4.253140828095964e-06, + "loss": 0.1806, + "step": 14958 + }, + { + "epoch": 2.1122564247387743, + "grad_norm": 2.4364290655683827, + "learning_rate": 4.251893322127558e-06, + "loss": 0.11, + "step": 14959 + }, + { + "epoch": 2.11239762778876, + "grad_norm": 2.5987351444910862, + "learning_rate": 4.2506459497426685e-06, + "loss": 0.0881, + "step": 14960 + }, + { + "epoch": 2.112538830838746, + "grad_norm": 3.450966224988919, + "learning_rate": 4.2493987109702814e-06, + "loss": 0.2145, + "step": 14961 + }, + { + "epoch": 2.112680033888732, + "grad_norm": 3.84106334736533, + "learning_rate": 4.2481516058393876e-06, + "loss": 0.202, + "step": 14962 + }, + { + "epoch": 2.112821236938718, + "grad_norm": 3.1324661551584696, + "learning_rate": 4.2469046343789715e-06, + "loss": 0.1658, + "step": 14963 + }, + { + "epoch": 2.1129624399887037, + "grad_norm": 3.216044549219269, + "learning_rate": 4.2456577966180025e-06, + "loss": 0.1666, + "step": 14964 + }, + { + "epoch": 2.1131036430386896, + "grad_norm": 2.805378137647697, + "learning_rate": 4.244411092585461e-06, + "loss": 0.1733, + "step": 14965 + }, + { + "epoch": 2.1132448460886755, + "grad_norm": 3.1503686632829333, + "learning_rate": 4.243164522310317e-06, + "loss": 0.1577, + "step": 14966 + }, + { + "epoch": 2.1133860491386613, + "grad_norm": 3.94323185859925, + "learning_rate": 4.241918085821547e-06, + "loss": 0.2141, + "step": 14967 + }, + { + "epoch": 2.1135272521886472, + "grad_norm": 2.9421215266790983, + "learning_rate": 4.240671783148114e-06, + "loss": 0.1542, + "step": 14968 + }, + { + "epoch": 2.113668455238633, + "grad_norm": 3.149453921887369, + "learning_rate": 4.23942561431898e-06, + "loss": 0.1679, + "step": 14969 + }, + { + "epoch": 2.113809658288619, + "grad_norm": 3.0518665916513803, + "learning_rate": 4.238179579363109e-06, + "loss": 0.1533, + "step": 14970 + }, + { + "epoch": 2.113950861338605, + "grad_norm": 2.8931169273462567, + "learning_rate": 4.236933678309455e-06, + "loss": 0.1619, + "step": 14971 + }, + { + "epoch": 2.1140920643885908, + "grad_norm": 3.252152318483666, + "learning_rate": 4.235687911186974e-06, + "loss": 0.2056, + "step": 14972 + }, + { + "epoch": 2.1142332674385766, + "grad_norm": 2.3990781208039205, + "learning_rate": 4.234442278024616e-06, + "loss": 0.1619, + "step": 14973 + }, + { + "epoch": 2.1143744704885625, + "grad_norm": 3.244987205817203, + "learning_rate": 4.2331967788513295e-06, + "loss": 0.1609, + "step": 14974 + }, + { + "epoch": 2.1145156735385484, + "grad_norm": 2.9794194675563523, + "learning_rate": 4.2319514136960605e-06, + "loss": 0.1379, + "step": 14975 + }, + { + "epoch": 2.1146568765885343, + "grad_norm": 3.0139823547475566, + "learning_rate": 4.230706182587748e-06, + "loss": 0.1268, + "step": 14976 + }, + { + "epoch": 2.11479807963852, + "grad_norm": 2.804843237428806, + "learning_rate": 4.229461085555333e-06, + "loss": 0.1767, + "step": 14977 + }, + { + "epoch": 2.114939282688506, + "grad_norm": 3.059740113459109, + "learning_rate": 4.228216122627747e-06, + "loss": 0.175, + "step": 14978 + }, + { + "epoch": 2.115080485738492, + "grad_norm": 2.9609526234077954, + "learning_rate": 4.226971293833929e-06, + "loss": 0.1496, + "step": 14979 + }, + { + "epoch": 2.115221688788478, + "grad_norm": 3.4798730569758223, + "learning_rate": 4.225726599202808e-06, + "loss": 0.1907, + "step": 14980 + }, + { + "epoch": 2.1153628918384637, + "grad_norm": 4.08775318922559, + "learning_rate": 4.224482038763305e-06, + "loss": 0.1791, + "step": 14981 + }, + { + "epoch": 2.1155040948884496, + "grad_norm": 2.531328177773969, + "learning_rate": 4.223237612544344e-06, + "loss": 0.1445, + "step": 14982 + }, + { + "epoch": 2.1156452979384355, + "grad_norm": 3.329116697563938, + "learning_rate": 4.221993320574842e-06, + "loss": 0.1793, + "step": 14983 + }, + { + "epoch": 2.1157865009884214, + "grad_norm": 4.046029421308103, + "learning_rate": 4.220749162883725e-06, + "loss": 0.1678, + "step": 14984 + }, + { + "epoch": 2.1159277040384072, + "grad_norm": 3.380780169913058, + "learning_rate": 4.219505139499901e-06, + "loss": 0.1956, + "step": 14985 + }, + { + "epoch": 2.116068907088393, + "grad_norm": 3.8856173836564283, + "learning_rate": 4.218261250452281e-06, + "loss": 0.1783, + "step": 14986 + }, + { + "epoch": 2.116210110138379, + "grad_norm": 3.0188358575181935, + "learning_rate": 4.217017495769772e-06, + "loss": 0.1295, + "step": 14987 + }, + { + "epoch": 2.116351313188365, + "grad_norm": 3.9108532376077645, + "learning_rate": 4.21577387548128e-06, + "loss": 0.2092, + "step": 14988 + }, + { + "epoch": 2.1164925162383508, + "grad_norm": 3.646000478123008, + "learning_rate": 4.214530389615704e-06, + "loss": 0.1845, + "step": 14989 + }, + { + "epoch": 2.1166337192883367, + "grad_norm": 2.9919507888537296, + "learning_rate": 4.213287038201943e-06, + "loss": 0.153, + "step": 14990 + }, + { + "epoch": 2.1167749223383225, + "grad_norm": 2.604114597330168, + "learning_rate": 4.212043821268893e-06, + "loss": 0.1773, + "step": 14991 + }, + { + "epoch": 2.1169161253883084, + "grad_norm": 3.0187264274322363, + "learning_rate": 4.210800738845445e-06, + "loss": 0.1622, + "step": 14992 + }, + { + "epoch": 2.1170573284382943, + "grad_norm": 3.8901259044929426, + "learning_rate": 4.209557790960488e-06, + "loss": 0.2034, + "step": 14993 + }, + { + "epoch": 2.11719853148828, + "grad_norm": 3.078774077890639, + "learning_rate": 4.208314977642907e-06, + "loss": 0.1576, + "step": 14994 + }, + { + "epoch": 2.117339734538266, + "grad_norm": 2.4090286591856875, + "learning_rate": 4.20707229892158e-06, + "loss": 0.1468, + "step": 14995 + }, + { + "epoch": 2.117480937588252, + "grad_norm": 2.645305681220911, + "learning_rate": 4.205829754825396e-06, + "loss": 0.1368, + "step": 14996 + }, + { + "epoch": 2.117622140638238, + "grad_norm": 3.686023318559023, + "learning_rate": 4.2045873453832296e-06, + "loss": 0.2151, + "step": 14997 + }, + { + "epoch": 2.1177633436882237, + "grad_norm": 3.4230883998239032, + "learning_rate": 4.203345070623947e-06, + "loss": 0.1252, + "step": 14998 + }, + { + "epoch": 2.1179045467382096, + "grad_norm": 3.877781277386026, + "learning_rate": 4.2021029305764205e-06, + "loss": 0.2358, + "step": 14999 + }, + { + "epoch": 2.1180457497881955, + "grad_norm": 2.5377684155300217, + "learning_rate": 4.200860925269519e-06, + "loss": 0.1364, + "step": 15000 + }, + { + "epoch": 2.1181869528381814, + "grad_norm": 3.8775433759596556, + "learning_rate": 4.1996190547321e-06, + "loss": 0.2193, + "step": 15001 + }, + { + "epoch": 2.1183281558881673, + "grad_norm": 3.375783433924981, + "learning_rate": 4.198377318993035e-06, + "loss": 0.1452, + "step": 15002 + }, + { + "epoch": 2.118469358938153, + "grad_norm": 2.897557966792658, + "learning_rate": 4.197135718081173e-06, + "loss": 0.1449, + "step": 15003 + }, + { + "epoch": 2.118610561988139, + "grad_norm": 2.8229348090093884, + "learning_rate": 4.1958942520253735e-06, + "loss": 0.1114, + "step": 15004 + }, + { + "epoch": 2.118751765038125, + "grad_norm": 2.7829848479897947, + "learning_rate": 4.194652920854483e-06, + "loss": 0.1139, + "step": 15005 + }, + { + "epoch": 2.118892968088111, + "grad_norm": 3.3053893218495665, + "learning_rate": 4.193411724597352e-06, + "loss": 0.1685, + "step": 15006 + }, + { + "epoch": 2.1190341711380967, + "grad_norm": 3.7341367839155617, + "learning_rate": 4.192170663282825e-06, + "loss": 0.2181, + "step": 15007 + }, + { + "epoch": 2.1191753741880826, + "grad_norm": 3.539189794135913, + "learning_rate": 4.1909297369397435e-06, + "loss": 0.15, + "step": 15008 + }, + { + "epoch": 2.1193165772380684, + "grad_norm": 3.040661257208769, + "learning_rate": 4.189688945596947e-06, + "loss": 0.1297, + "step": 15009 + }, + { + "epoch": 2.1194577802880543, + "grad_norm": 3.3813302582229157, + "learning_rate": 4.188448289283269e-06, + "loss": 0.1766, + "step": 15010 + }, + { + "epoch": 2.11959898333804, + "grad_norm": 2.9788250801288165, + "learning_rate": 4.1872077680275435e-06, + "loss": 0.1397, + "step": 15011 + }, + { + "epoch": 2.119740186388026, + "grad_norm": 3.9348054053650805, + "learning_rate": 4.185967381858599e-06, + "loss": 0.202, + "step": 15012 + }, + { + "epoch": 2.119881389438012, + "grad_norm": 2.7323535036660296, + "learning_rate": 4.184727130805258e-06, + "loss": 0.1083, + "step": 15013 + }, + { + "epoch": 2.120022592487998, + "grad_norm": 3.175895298213388, + "learning_rate": 4.183487014896354e-06, + "loss": 0.1768, + "step": 15014 + }, + { + "epoch": 2.1201637955379837, + "grad_norm": 2.567138502671909, + "learning_rate": 4.182247034160697e-06, + "loss": 0.1465, + "step": 15015 + }, + { + "epoch": 2.1203049985879696, + "grad_norm": 3.3725784453691423, + "learning_rate": 4.1810071886271065e-06, + "loss": 0.1824, + "step": 15016 + }, + { + "epoch": 2.1204462016379555, + "grad_norm": 3.465111551640221, + "learning_rate": 4.179767478324394e-06, + "loss": 0.1577, + "step": 15017 + }, + { + "epoch": 2.1205874046879414, + "grad_norm": 3.4411676313966617, + "learning_rate": 4.178527903281371e-06, + "loss": 0.1754, + "step": 15018 + }, + { + "epoch": 2.1207286077379273, + "grad_norm": 2.9355279210236565, + "learning_rate": 4.177288463526848e-06, + "loss": 0.1678, + "step": 15019 + }, + { + "epoch": 2.120869810787913, + "grad_norm": 3.1830502892892225, + "learning_rate": 4.176049159089626e-06, + "loss": 0.1833, + "step": 15020 + }, + { + "epoch": 2.121011013837899, + "grad_norm": 3.7100785031135572, + "learning_rate": 4.174809989998506e-06, + "loss": 0.2061, + "step": 15021 + }, + { + "epoch": 2.121152216887885, + "grad_norm": 3.020319444234034, + "learning_rate": 4.173570956282286e-06, + "loss": 0.1582, + "step": 15022 + }, + { + "epoch": 2.121293419937871, + "grad_norm": 3.2570410794452416, + "learning_rate": 4.172332057969762e-06, + "loss": 0.1614, + "step": 15023 + }, + { + "epoch": 2.1214346229878567, + "grad_norm": 3.421671625015669, + "learning_rate": 4.171093295089723e-06, + "loss": 0.1873, + "step": 15024 + }, + { + "epoch": 2.1215758260378426, + "grad_norm": 3.6725136520261064, + "learning_rate": 4.169854667670958e-06, + "loss": 0.2005, + "step": 15025 + }, + { + "epoch": 2.1217170290878284, + "grad_norm": 2.8740106213502954, + "learning_rate": 4.168616175742255e-06, + "loss": 0.1577, + "step": 15026 + }, + { + "epoch": 2.1218582321378143, + "grad_norm": 3.39007598518118, + "learning_rate": 4.167377819332392e-06, + "loss": 0.1669, + "step": 15027 + }, + { + "epoch": 2.1219994351878, + "grad_norm": 3.8893944417913446, + "learning_rate": 4.1661395984701495e-06, + "loss": 0.1527, + "step": 15028 + }, + { + "epoch": 2.122140638237786, + "grad_norm": 3.6523311713010664, + "learning_rate": 4.164901513184304e-06, + "loss": 0.1689, + "step": 15029 + }, + { + "epoch": 2.122281841287772, + "grad_norm": 3.2139326574200013, + "learning_rate": 4.1636635635036235e-06, + "loss": 0.157, + "step": 15030 + }, + { + "epoch": 2.122423044337758, + "grad_norm": 3.7698136708259082, + "learning_rate": 4.162425749456889e-06, + "loss": 0.1798, + "step": 15031 + }, + { + "epoch": 2.1225642473877437, + "grad_norm": 3.2163990018433624, + "learning_rate": 4.161188071072854e-06, + "loss": 0.136, + "step": 15032 + }, + { + "epoch": 2.1227054504377296, + "grad_norm": 3.1792484676247117, + "learning_rate": 4.159950528380287e-06, + "loss": 0.1538, + "step": 15033 + }, + { + "epoch": 2.1228466534877155, + "grad_norm": 3.905499515906006, + "learning_rate": 4.158713121407949e-06, + "loss": 0.1653, + "step": 15034 + }, + { + "epoch": 2.1229878565377014, + "grad_norm": 3.564224959407462, + "learning_rate": 4.15747585018459e-06, + "loss": 0.1676, + "step": 15035 + }, + { + "epoch": 2.1231290595876873, + "grad_norm": 3.748945262770402, + "learning_rate": 4.156238714738974e-06, + "loss": 0.1843, + "step": 15036 + }, + { + "epoch": 2.123270262637673, + "grad_norm": 2.570973310936118, + "learning_rate": 4.155001715099845e-06, + "loss": 0.1268, + "step": 15037 + }, + { + "epoch": 2.123411465687659, + "grad_norm": 3.7183523154949634, + "learning_rate": 4.153764851295954e-06, + "loss": 0.2035, + "step": 15038 + }, + { + "epoch": 2.123552668737645, + "grad_norm": 4.096832486503096, + "learning_rate": 4.152528123356042e-06, + "loss": 0.2187, + "step": 15039 + }, + { + "epoch": 2.123693871787631, + "grad_norm": 2.9425809624842056, + "learning_rate": 4.1512915313088505e-06, + "loss": 0.1863, + "step": 15040 + }, + { + "epoch": 2.1238350748376167, + "grad_norm": 3.197391533332398, + "learning_rate": 4.150055075183119e-06, + "loss": 0.1897, + "step": 15041 + }, + { + "epoch": 2.1239762778876026, + "grad_norm": 3.295176317437243, + "learning_rate": 4.148818755007581e-06, + "loss": 0.1715, + "step": 15042 + }, + { + "epoch": 2.1241174809375885, + "grad_norm": 4.422025202361986, + "learning_rate": 4.147582570810967e-06, + "loss": 0.2516, + "step": 15043 + }, + { + "epoch": 2.1242586839875743, + "grad_norm": 3.0766039904430182, + "learning_rate": 4.146346522622008e-06, + "loss": 0.1411, + "step": 15044 + }, + { + "epoch": 2.12439988703756, + "grad_norm": 3.5462547076011304, + "learning_rate": 4.145110610469427e-06, + "loss": 0.196, + "step": 15045 + }, + { + "epoch": 2.124541090087546, + "grad_norm": 3.2665062274682795, + "learning_rate": 4.143874834381947e-06, + "loss": 0.1662, + "step": 15046 + }, + { + "epoch": 2.124682293137532, + "grad_norm": 3.357493733909517, + "learning_rate": 4.142639194388284e-06, + "loss": 0.1582, + "step": 15047 + }, + { + "epoch": 2.124823496187518, + "grad_norm": 2.9172112252489337, + "learning_rate": 4.141403690517163e-06, + "loss": 0.1517, + "step": 15048 + }, + { + "epoch": 2.1249646992375038, + "grad_norm": 3.5797723630560627, + "learning_rate": 4.1401683227972865e-06, + "loss": 0.1704, + "step": 15049 + }, + { + "epoch": 2.1251059022874896, + "grad_norm": 3.0585148957457364, + "learning_rate": 4.138933091257368e-06, + "loss": 0.1597, + "step": 15050 + }, + { + "epoch": 2.1252471053374755, + "grad_norm": 3.0791653110915425, + "learning_rate": 4.137697995926112e-06, + "loss": 0.1569, + "step": 15051 + }, + { + "epoch": 2.1253883083874614, + "grad_norm": 3.7476913945567203, + "learning_rate": 4.13646303683222e-06, + "loss": 0.154, + "step": 15052 + }, + { + "epoch": 2.1255295114374473, + "grad_norm": 3.466153510014836, + "learning_rate": 4.1352282140043985e-06, + "loss": 0.1559, + "step": 15053 + }, + { + "epoch": 2.1256707144874327, + "grad_norm": 4.193919735679608, + "learning_rate": 4.1339935274713404e-06, + "loss": 0.2095, + "step": 15054 + }, + { + "epoch": 2.1258119175374186, + "grad_norm": 3.5933205070182455, + "learning_rate": 4.132758977261739e-06, + "loss": 0.1547, + "step": 15055 + }, + { + "epoch": 2.1259531205874045, + "grad_norm": 2.6516630399879015, + "learning_rate": 4.13152456340429e-06, + "loss": 0.1351, + "step": 15056 + }, + { + "epoch": 2.1260943236373904, + "grad_norm": 2.89272810062591, + "learning_rate": 4.130290285927667e-06, + "loss": 0.1706, + "step": 15057 + }, + { + "epoch": 2.1262355266873763, + "grad_norm": 3.3785242585665487, + "learning_rate": 4.129056144860567e-06, + "loss": 0.1747, + "step": 15058 + }, + { + "epoch": 2.126376729737362, + "grad_norm": 3.135433627434231, + "learning_rate": 4.127822140231668e-06, + "loss": 0.1703, + "step": 15059 + }, + { + "epoch": 2.126517932787348, + "grad_norm": 3.540149061212886, + "learning_rate": 4.126588272069645e-06, + "loss": 0.2135, + "step": 15060 + }, + { + "epoch": 2.126659135837334, + "grad_norm": 2.9357028330020496, + "learning_rate": 4.1253545404031735e-06, + "loss": 0.1514, + "step": 15061 + }, + { + "epoch": 2.12680033888732, + "grad_norm": 3.1794316391888873, + "learning_rate": 4.124120945260927e-06, + "loss": 0.1303, + "step": 15062 + }, + { + "epoch": 2.1269415419373057, + "grad_norm": 3.8592275483359657, + "learning_rate": 4.1228874866715706e-06, + "loss": 0.1838, + "step": 15063 + }, + { + "epoch": 2.1270827449872916, + "grad_norm": 3.052003553252904, + "learning_rate": 4.121654164663769e-06, + "loss": 0.156, + "step": 15064 + }, + { + "epoch": 2.1272239480372774, + "grad_norm": 2.7289109250999997, + "learning_rate": 4.120420979266192e-06, + "loss": 0.1304, + "step": 15065 + }, + { + "epoch": 2.1273651510872633, + "grad_norm": 3.4764045042754734, + "learning_rate": 4.119187930507489e-06, + "loss": 0.1698, + "step": 15066 + }, + { + "epoch": 2.127506354137249, + "grad_norm": 3.3630868009356494, + "learning_rate": 4.117955018416319e-06, + "loss": 0.1671, + "step": 15067 + }, + { + "epoch": 2.127647557187235, + "grad_norm": 3.5640522510184, + "learning_rate": 4.116722243021333e-06, + "loss": 0.1946, + "step": 15068 + }, + { + "epoch": 2.127788760237221, + "grad_norm": 4.219082572583667, + "learning_rate": 4.115489604351178e-06, + "loss": 0.1897, + "step": 15069 + }, + { + "epoch": 2.127929963287207, + "grad_norm": 3.3782169472236965, + "learning_rate": 4.114257102434508e-06, + "loss": 0.1728, + "step": 15070 + }, + { + "epoch": 2.1280711663371927, + "grad_norm": 3.0814984795076126, + "learning_rate": 4.1130247372999595e-06, + "loss": 0.1403, + "step": 15071 + }, + { + "epoch": 2.1282123693871786, + "grad_norm": 3.2247922281572863, + "learning_rate": 4.111792508976175e-06, + "loss": 0.1121, + "step": 15072 + }, + { + "epoch": 2.1283535724371645, + "grad_norm": 3.73470675850116, + "learning_rate": 4.110560417491792e-06, + "loss": 0.1547, + "step": 15073 + }, + { + "epoch": 2.1284947754871504, + "grad_norm": 3.441965473210724, + "learning_rate": 4.1093284628754355e-06, + "loss": 0.1731, + "step": 15074 + }, + { + "epoch": 2.1286359785371363, + "grad_norm": 3.1088202238575704, + "learning_rate": 4.108096645155745e-06, + "loss": 0.1529, + "step": 15075 + }, + { + "epoch": 2.128777181587122, + "grad_norm": 5.926867882436746, + "learning_rate": 4.106864964361343e-06, + "loss": 0.2199, + "step": 15076 + }, + { + "epoch": 2.128918384637108, + "grad_norm": 3.3538342038355853, + "learning_rate": 4.105633420520856e-06, + "loss": 0.1979, + "step": 15077 + }, + { + "epoch": 2.129059587687094, + "grad_norm": 3.33164812848948, + "learning_rate": 4.104402013662901e-06, + "loss": 0.1805, + "step": 15078 + }, + { + "epoch": 2.12920079073708, + "grad_norm": 2.591249641717634, + "learning_rate": 4.103170743816097e-06, + "loss": 0.1485, + "step": 15079 + }, + { + "epoch": 2.1293419937870657, + "grad_norm": 3.8147083144932705, + "learning_rate": 4.101939611009059e-06, + "loss": 0.1581, + "step": 15080 + }, + { + "epoch": 2.1294831968370516, + "grad_norm": 3.704038547663268, + "learning_rate": 4.100708615270395e-06, + "loss": 0.1505, + "step": 15081 + }, + { + "epoch": 2.1296243998870374, + "grad_norm": 2.9692070296532767, + "learning_rate": 4.09947775662872e-06, + "loss": 0.1225, + "step": 15082 + }, + { + "epoch": 2.1297656029370233, + "grad_norm": 3.6741542702938865, + "learning_rate": 4.098247035112631e-06, + "loss": 0.2271, + "step": 15083 + }, + { + "epoch": 2.129906805987009, + "grad_norm": 4.116231123347006, + "learning_rate": 4.097016450750733e-06, + "loss": 0.2155, + "step": 15084 + }, + { + "epoch": 2.130048009036995, + "grad_norm": 3.126971954090494, + "learning_rate": 4.095786003571621e-06, + "loss": 0.1604, + "step": 15085 + }, + { + "epoch": 2.130189212086981, + "grad_norm": 3.683596408656099, + "learning_rate": 4.094555693603891e-06, + "loss": 0.181, + "step": 15086 + }, + { + "epoch": 2.130330415136967, + "grad_norm": 3.5021821377923334, + "learning_rate": 4.093325520876139e-06, + "loss": 0.2267, + "step": 15087 + }, + { + "epoch": 2.1304716181869527, + "grad_norm": 2.081015399487398, + "learning_rate": 4.092095485416952e-06, + "loss": 0.1187, + "step": 15088 + }, + { + "epoch": 2.1306128212369386, + "grad_norm": 2.730292541348565, + "learning_rate": 4.090865587254913e-06, + "loss": 0.1296, + "step": 15089 + }, + { + "epoch": 2.1307540242869245, + "grad_norm": 2.832515701568385, + "learning_rate": 4.0896358264186095e-06, + "loss": 0.1298, + "step": 15090 + }, + { + "epoch": 2.1308952273369104, + "grad_norm": 3.4139305485327758, + "learning_rate": 4.088406202936614e-06, + "loss": 0.172, + "step": 15091 + }, + { + "epoch": 2.1310364303868963, + "grad_norm": 2.928108830981196, + "learning_rate": 4.087176716837502e-06, + "loss": 0.1889, + "step": 15092 + }, + { + "epoch": 2.131177633436882, + "grad_norm": 3.003778646342553, + "learning_rate": 4.085947368149853e-06, + "loss": 0.1254, + "step": 15093 + }, + { + "epoch": 2.131318836486868, + "grad_norm": 3.7382676155308556, + "learning_rate": 4.0847181569022335e-06, + "loss": 0.1865, + "step": 15094 + }, + { + "epoch": 2.131460039536854, + "grad_norm": 3.432698523725392, + "learning_rate": 4.083489083123209e-06, + "loss": 0.1899, + "step": 15095 + }, + { + "epoch": 2.13160124258684, + "grad_norm": 3.2717226901531236, + "learning_rate": 4.0822601468413425e-06, + "loss": 0.1667, + "step": 15096 + }, + { + "epoch": 2.1317424456368257, + "grad_norm": 3.7007068331408406, + "learning_rate": 4.081031348085195e-06, + "loss": 0.1668, + "step": 15097 + }, + { + "epoch": 2.1318836486868116, + "grad_norm": 2.92579617287353, + "learning_rate": 4.079802686883318e-06, + "loss": 0.1504, + "step": 15098 + }, + { + "epoch": 2.1320248517367975, + "grad_norm": 3.081008852157073, + "learning_rate": 4.078574163264278e-06, + "loss": 0.162, + "step": 15099 + }, + { + "epoch": 2.1321660547867833, + "grad_norm": 4.676494500511638, + "learning_rate": 4.077345777256614e-06, + "loss": 0.2159, + "step": 15100 + }, + { + "epoch": 2.132307257836769, + "grad_norm": 3.273177133325917, + "learning_rate": 4.076117528888876e-06, + "loss": 0.1853, + "step": 15101 + }, + { + "epoch": 2.132448460886755, + "grad_norm": 2.3973525284543373, + "learning_rate": 4.074889418189608e-06, + "loss": 0.1114, + "step": 15102 + }, + { + "epoch": 2.132589663936741, + "grad_norm": 3.228568631471149, + "learning_rate": 4.073661445187351e-06, + "loss": 0.1295, + "step": 15103 + }, + { + "epoch": 2.132730866986727, + "grad_norm": 2.969215036599727, + "learning_rate": 4.07243360991064e-06, + "loss": 0.1467, + "step": 15104 + }, + { + "epoch": 2.1328720700367128, + "grad_norm": 2.8815928869127094, + "learning_rate": 4.071205912388015e-06, + "loss": 0.1646, + "step": 15105 + }, + { + "epoch": 2.1330132730866986, + "grad_norm": 2.725606607562211, + "learning_rate": 4.069978352648004e-06, + "loss": 0.1484, + "step": 15106 + }, + { + "epoch": 2.1331544761366845, + "grad_norm": 2.593678166755236, + "learning_rate": 4.068750930719139e-06, + "loss": 0.1331, + "step": 15107 + }, + { + "epoch": 2.1332956791866704, + "grad_norm": 3.473783906379059, + "learning_rate": 4.067523646629938e-06, + "loss": 0.1954, + "step": 15108 + }, + { + "epoch": 2.1334368822366563, + "grad_norm": 3.260515877028411, + "learning_rate": 4.0662965004089195e-06, + "loss": 0.179, + "step": 15109 + }, + { + "epoch": 2.133578085286642, + "grad_norm": 3.326850664080281, + "learning_rate": 4.065069492084614e-06, + "loss": 0.175, + "step": 15110 + }, + { + "epoch": 2.133719288336628, + "grad_norm": 2.59067015179295, + "learning_rate": 4.063842621685529e-06, + "loss": 0.1451, + "step": 15111 + }, + { + "epoch": 2.133860491386614, + "grad_norm": 2.8373891751141764, + "learning_rate": 4.062615889240176e-06, + "loss": 0.1531, + "step": 15112 + }, + { + "epoch": 2.1340016944366, + "grad_norm": 3.029293007074109, + "learning_rate": 4.061389294777068e-06, + "loss": 0.1216, + "step": 15113 + }, + { + "epoch": 2.1341428974865857, + "grad_norm": 3.3179999426202853, + "learning_rate": 4.060162838324708e-06, + "loss": 0.1422, + "step": 15114 + }, + { + "epoch": 2.1342841005365716, + "grad_norm": 3.4714392997108714, + "learning_rate": 4.058936519911598e-06, + "loss": 0.163, + "step": 15115 + }, + { + "epoch": 2.1344253035865575, + "grad_norm": 2.4070650932072546, + "learning_rate": 4.057710339566238e-06, + "loss": 0.1165, + "step": 15116 + }, + { + "epoch": 2.1345665066365433, + "grad_norm": 3.466047058832093, + "learning_rate": 4.0564842973171225e-06, + "loss": 0.141, + "step": 15117 + }, + { + "epoch": 2.1347077096865292, + "grad_norm": 2.972186387799967, + "learning_rate": 4.055258393192746e-06, + "loss": 0.1781, + "step": 15118 + }, + { + "epoch": 2.134848912736515, + "grad_norm": 3.8897642449954346, + "learning_rate": 4.054032627221597e-06, + "loss": 0.2186, + "step": 15119 + }, + { + "epoch": 2.134990115786501, + "grad_norm": 3.2690317664813615, + "learning_rate": 4.052806999432161e-06, + "loss": 0.1855, + "step": 15120 + }, + { + "epoch": 2.135131318836487, + "grad_norm": 2.859182497181163, + "learning_rate": 4.05158150985292e-06, + "loss": 0.1622, + "step": 15121 + }, + { + "epoch": 2.1352725218864728, + "grad_norm": 3.8242214097206304, + "learning_rate": 4.050356158512357e-06, + "loss": 0.2007, + "step": 15122 + }, + { + "epoch": 2.1354137249364586, + "grad_norm": 2.6069501082366044, + "learning_rate": 4.049130945438949e-06, + "loss": 0.1251, + "step": 15123 + }, + { + "epoch": 2.1355549279864445, + "grad_norm": 2.7180317843372417, + "learning_rate": 4.047905870661172e-06, + "loss": 0.1544, + "step": 15124 + }, + { + "epoch": 2.1356961310364304, + "grad_norm": 2.9042577119280417, + "learning_rate": 4.046680934207488e-06, + "loss": 0.1635, + "step": 15125 + }, + { + "epoch": 2.1358373340864163, + "grad_norm": 3.170796151757244, + "learning_rate": 4.045456136106363e-06, + "loss": 0.137, + "step": 15126 + }, + { + "epoch": 2.135978537136402, + "grad_norm": 3.255644785142373, + "learning_rate": 4.044231476386271e-06, + "loss": 0.1428, + "step": 15127 + }, + { + "epoch": 2.136119740186388, + "grad_norm": 2.73315244051077, + "learning_rate": 4.043006955075667e-06, + "loss": 0.1417, + "step": 15128 + }, + { + "epoch": 2.136260943236374, + "grad_norm": 4.063230899008293, + "learning_rate": 4.041782572203009e-06, + "loss": 0.1303, + "step": 15129 + }, + { + "epoch": 2.13640214628636, + "grad_norm": 3.2473876566108912, + "learning_rate": 4.0405583277967506e-06, + "loss": 0.1535, + "step": 15130 + }, + { + "epoch": 2.1365433493363457, + "grad_norm": 3.7070008738291453, + "learning_rate": 4.0393342218853425e-06, + "loss": 0.1599, + "step": 15131 + }, + { + "epoch": 2.1366845523863316, + "grad_norm": 3.4082105076053537, + "learning_rate": 4.038110254497234e-06, + "loss": 0.1476, + "step": 15132 + }, + { + "epoch": 2.1368257554363175, + "grad_norm": 5.208319682110785, + "learning_rate": 4.0368864256608674e-06, + "loss": 0.2258, + "step": 15133 + }, + { + "epoch": 2.1369669584863034, + "grad_norm": 3.0085624690337975, + "learning_rate": 4.0356627354046854e-06, + "loss": 0.1475, + "step": 15134 + }, + { + "epoch": 2.1371081615362892, + "grad_norm": 2.415015866021174, + "learning_rate": 4.034439183757125e-06, + "loss": 0.1138, + "step": 15135 + }, + { + "epoch": 2.137249364586275, + "grad_norm": 3.1360550291141385, + "learning_rate": 4.033215770746622e-06, + "loss": 0.1698, + "step": 15136 + }, + { + "epoch": 2.137390567636261, + "grad_norm": 4.566812597135408, + "learning_rate": 4.0319924964016075e-06, + "loss": 0.2156, + "step": 15137 + }, + { + "epoch": 2.137531770686247, + "grad_norm": 6.247990108210894, + "learning_rate": 4.030769360750507e-06, + "loss": 0.283, + "step": 15138 + }, + { + "epoch": 2.1376729737362328, + "grad_norm": 3.511944283390823, + "learning_rate": 4.029546363821752e-06, + "loss": 0.1779, + "step": 15139 + }, + { + "epoch": 2.1378141767862187, + "grad_norm": 3.048618573623077, + "learning_rate": 4.028323505643762e-06, + "loss": 0.17, + "step": 15140 + }, + { + "epoch": 2.1379553798362045, + "grad_norm": 3.7790333489320553, + "learning_rate": 4.027100786244958e-06, + "loss": 0.1425, + "step": 15141 + }, + { + "epoch": 2.1380965828861904, + "grad_norm": 3.470877394090885, + "learning_rate": 4.025878205653747e-06, + "loss": 0.1668, + "step": 15142 + }, + { + "epoch": 2.1382377859361763, + "grad_norm": 3.960163049755031, + "learning_rate": 4.024655763898544e-06, + "loss": 0.2372, + "step": 15143 + }, + { + "epoch": 2.138378988986162, + "grad_norm": 4.182152833433484, + "learning_rate": 4.023433461007764e-06, + "loss": 0.145, + "step": 15144 + }, + { + "epoch": 2.138520192036148, + "grad_norm": 3.2500553320678467, + "learning_rate": 4.0222112970098095e-06, + "loss": 0.1865, + "step": 15145 + }, + { + "epoch": 2.138661395086134, + "grad_norm": 3.3012387812780988, + "learning_rate": 4.020989271933082e-06, + "loss": 0.154, + "step": 15146 + }, + { + "epoch": 2.13880259813612, + "grad_norm": 3.0022748563567525, + "learning_rate": 4.019767385805983e-06, + "loss": 0.164, + "step": 15147 + }, + { + "epoch": 2.1389438011861057, + "grad_norm": 3.159307467713296, + "learning_rate": 4.0185456386569066e-06, + "loss": 0.1743, + "step": 15148 + }, + { + "epoch": 2.1390850042360916, + "grad_norm": 2.376297777505026, + "learning_rate": 4.017324030514246e-06, + "loss": 0.1294, + "step": 15149 + }, + { + "epoch": 2.1392262072860775, + "grad_norm": 3.462669972751617, + "learning_rate": 4.016102561406392e-06, + "loss": 0.1799, + "step": 15150 + }, + { + "epoch": 2.1393674103360634, + "grad_norm": 2.645935121768809, + "learning_rate": 4.014881231361729e-06, + "loss": 0.165, + "step": 15151 + }, + { + "epoch": 2.1395086133860493, + "grad_norm": 3.791345115388742, + "learning_rate": 4.013660040408643e-06, + "loss": 0.2019, + "step": 15152 + }, + { + "epoch": 2.139649816436035, + "grad_norm": 2.508780759169283, + "learning_rate": 4.012438988575511e-06, + "loss": 0.1248, + "step": 15153 + }, + { + "epoch": 2.139791019486021, + "grad_norm": 3.35665896251655, + "learning_rate": 4.011218075890711e-06, + "loss": 0.1979, + "step": 15154 + }, + { + "epoch": 2.139932222536007, + "grad_norm": 2.929483331155179, + "learning_rate": 4.009997302382614e-06, + "loss": 0.1587, + "step": 15155 + }, + { + "epoch": 2.140073425585993, + "grad_norm": 3.1618003912768797, + "learning_rate": 4.008776668079596e-06, + "loss": 0.1694, + "step": 15156 + }, + { + "epoch": 2.1402146286359787, + "grad_norm": 3.0996051331933314, + "learning_rate": 4.007556173010021e-06, + "loss": 0.1603, + "step": 15157 + }, + { + "epoch": 2.1403558316859645, + "grad_norm": 2.7029980435098557, + "learning_rate": 4.006335817202256e-06, + "loss": 0.1402, + "step": 15158 + }, + { + "epoch": 2.1404970347359504, + "grad_norm": 3.3183063046034547, + "learning_rate": 4.005115600684655e-06, + "loss": 0.1495, + "step": 15159 + }, + { + "epoch": 2.1406382377859363, + "grad_norm": 3.2679324253842856, + "learning_rate": 4.003895523485575e-06, + "loss": 0.1622, + "step": 15160 + }, + { + "epoch": 2.140779440835922, + "grad_norm": 3.079385951513124, + "learning_rate": 4.002675585633375e-06, + "loss": 0.1909, + "step": 15161 + }, + { + "epoch": 2.140920643885908, + "grad_norm": 2.7531241135142106, + "learning_rate": 4.001455787156407e-06, + "loss": 0.1351, + "step": 15162 + }, + { + "epoch": 2.141061846935894, + "grad_norm": 3.187182563919982, + "learning_rate": 4.000236128083015e-06, + "loss": 0.1676, + "step": 15163 + }, + { + "epoch": 2.14120304998588, + "grad_norm": 3.0667966235965234, + "learning_rate": 3.999016608441544e-06, + "loss": 0.1932, + "step": 15164 + }, + { + "epoch": 2.1413442530358657, + "grad_norm": 3.1194157442053454, + "learning_rate": 3.997797228260335e-06, + "loss": 0.1498, + "step": 15165 + }, + { + "epoch": 2.1414854560858516, + "grad_norm": 3.7077525736991275, + "learning_rate": 3.996577987567727e-06, + "loss": 0.2051, + "step": 15166 + }, + { + "epoch": 2.1416266591358375, + "grad_norm": 3.055954528693823, + "learning_rate": 3.9953588863920535e-06, + "loss": 0.1474, + "step": 15167 + }, + { + "epoch": 2.1417678621858234, + "grad_norm": 2.7218662502817565, + "learning_rate": 3.994139924761646e-06, + "loss": 0.143, + "step": 15168 + }, + { + "epoch": 2.1419090652358093, + "grad_norm": 3.496117808398058, + "learning_rate": 3.992921102704834e-06, + "loss": 0.127, + "step": 15169 + }, + { + "epoch": 2.142050268285795, + "grad_norm": 3.6607248806627948, + "learning_rate": 3.991702420249941e-06, + "loss": 0.2107, + "step": 15170 + }, + { + "epoch": 2.142191471335781, + "grad_norm": 2.7851012709042884, + "learning_rate": 3.9904838774252885e-06, + "loss": 0.1676, + "step": 15171 + }, + { + "epoch": 2.142332674385767, + "grad_norm": 3.533113970563802, + "learning_rate": 3.989265474259192e-06, + "loss": 0.1713, + "step": 15172 + }, + { + "epoch": 2.142473877435753, + "grad_norm": 3.199656264636358, + "learning_rate": 3.988047210779973e-06, + "loss": 0.1719, + "step": 15173 + }, + { + "epoch": 2.1426150804857387, + "grad_norm": 3.0967609894905954, + "learning_rate": 3.986829087015941e-06, + "loss": 0.1173, + "step": 15174 + }, + { + "epoch": 2.1427562835357246, + "grad_norm": 3.269090969058446, + "learning_rate": 3.985611102995408e-06, + "loss": 0.1818, + "step": 15175 + }, + { + "epoch": 2.1428974865857104, + "grad_norm": 4.428602579776687, + "learning_rate": 3.984393258746672e-06, + "loss": 0.192, + "step": 15176 + }, + { + "epoch": 2.1430386896356963, + "grad_norm": 3.357909048416758, + "learning_rate": 3.983175554298035e-06, + "loss": 0.1816, + "step": 15177 + }, + { + "epoch": 2.143179892685682, + "grad_norm": 2.497142934246939, + "learning_rate": 3.981957989677803e-06, + "loss": 0.1563, + "step": 15178 + }, + { + "epoch": 2.143321095735668, + "grad_norm": 3.1414115409473906, + "learning_rate": 3.980740564914268e-06, + "loss": 0.1574, + "step": 15179 + }, + { + "epoch": 2.143462298785654, + "grad_norm": 3.267803826234112, + "learning_rate": 3.979523280035723e-06, + "loss": 0.1422, + "step": 15180 + }, + { + "epoch": 2.14360350183564, + "grad_norm": 3.4330432940750986, + "learning_rate": 3.978306135070457e-06, + "loss": 0.1632, + "step": 15181 + }, + { + "epoch": 2.1437447048856253, + "grad_norm": 3.4411052188654576, + "learning_rate": 3.977089130046756e-06, + "loss": 0.1554, + "step": 15182 + }, + { + "epoch": 2.143885907935611, + "grad_norm": 3.558109655085892, + "learning_rate": 3.975872264992903e-06, + "loss": 0.2131, + "step": 15183 + }, + { + "epoch": 2.144027110985597, + "grad_norm": 3.195918262159261, + "learning_rate": 3.974655539937176e-06, + "loss": 0.1716, + "step": 15184 + }, + { + "epoch": 2.144168314035583, + "grad_norm": 4.040756047591546, + "learning_rate": 3.973438954907852e-06, + "loss": 0.1915, + "step": 15185 + }, + { + "epoch": 2.144309517085569, + "grad_norm": 3.5318084546342217, + "learning_rate": 3.972222509933204e-06, + "loss": 0.1929, + "step": 15186 + }, + { + "epoch": 2.1444507201355547, + "grad_norm": 2.962908962613317, + "learning_rate": 3.971006205041503e-06, + "loss": 0.1578, + "step": 15187 + }, + { + "epoch": 2.1445919231855406, + "grad_norm": 3.1601363107077574, + "learning_rate": 3.969790040261013e-06, + "loss": 0.1551, + "step": 15188 + }, + { + "epoch": 2.1447331262355265, + "grad_norm": 3.238774562066638, + "learning_rate": 3.968574015619995e-06, + "loss": 0.1853, + "step": 15189 + }, + { + "epoch": 2.1448743292855124, + "grad_norm": 3.0739655052021053, + "learning_rate": 3.967358131146716e-06, + "loss": 0.1587, + "step": 15190 + }, + { + "epoch": 2.1450155323354982, + "grad_norm": 3.4547586948207205, + "learning_rate": 3.966142386869428e-06, + "loss": 0.1342, + "step": 15191 + }, + { + "epoch": 2.145156735385484, + "grad_norm": 2.7708926701923953, + "learning_rate": 3.96492678281639e-06, + "loss": 0.149, + "step": 15192 + }, + { + "epoch": 2.14529793843547, + "grad_norm": 3.1304062740708614, + "learning_rate": 3.963711319015842e-06, + "loss": 0.1659, + "step": 15193 + }, + { + "epoch": 2.145439141485456, + "grad_norm": 2.610024941630998, + "learning_rate": 3.962495995496037e-06, + "loss": 0.1337, + "step": 15194 + }, + { + "epoch": 2.1455803445354418, + "grad_norm": 2.6931190965156504, + "learning_rate": 3.961280812285213e-06, + "loss": 0.1158, + "step": 15195 + }, + { + "epoch": 2.1457215475854277, + "grad_norm": 3.741099711044749, + "learning_rate": 3.960065769411619e-06, + "loss": 0.2026, + "step": 15196 + }, + { + "epoch": 2.1458627506354135, + "grad_norm": 3.127409213301519, + "learning_rate": 3.958850866903488e-06, + "loss": 0.1582, + "step": 15197 + }, + { + "epoch": 2.1460039536853994, + "grad_norm": 3.489258447439934, + "learning_rate": 3.957636104789056e-06, + "loss": 0.1748, + "step": 15198 + }, + { + "epoch": 2.1461451567353853, + "grad_norm": 3.1813346589851115, + "learning_rate": 3.956421483096548e-06, + "loss": 0.1766, + "step": 15199 + }, + { + "epoch": 2.146286359785371, + "grad_norm": 3.5800884401977164, + "learning_rate": 3.955207001854197e-06, + "loss": 0.203, + "step": 15200 + }, + { + "epoch": 2.146427562835357, + "grad_norm": 3.4382386229566118, + "learning_rate": 3.953992661090224e-06, + "loss": 0.1817, + "step": 15201 + }, + { + "epoch": 2.146568765885343, + "grad_norm": 4.109308435708993, + "learning_rate": 3.952778460832851e-06, + "loss": 0.1811, + "step": 15202 + }, + { + "epoch": 2.146709968935329, + "grad_norm": 4.025631857455741, + "learning_rate": 3.951564401110295e-06, + "loss": 0.2229, + "step": 15203 + }, + { + "epoch": 2.1468511719853147, + "grad_norm": 3.630509849707339, + "learning_rate": 3.95035048195077e-06, + "loss": 0.1426, + "step": 15204 + }, + { + "epoch": 2.1469923750353006, + "grad_norm": 4.073689971394328, + "learning_rate": 3.949136703382487e-06, + "loss": 0.2113, + "step": 15205 + }, + { + "epoch": 2.1471335780852865, + "grad_norm": 2.829861950598597, + "learning_rate": 3.947923065433654e-06, + "loss": 0.145, + "step": 15206 + }, + { + "epoch": 2.1472747811352724, + "grad_norm": 3.5289035426515123, + "learning_rate": 3.9467095681324715e-06, + "loss": 0.1772, + "step": 15207 + }, + { + "epoch": 2.1474159841852583, + "grad_norm": 2.69034786456011, + "learning_rate": 3.945496211507148e-06, + "loss": 0.1617, + "step": 15208 + }, + { + "epoch": 2.147557187235244, + "grad_norm": 3.927892543113057, + "learning_rate": 3.944282995585882e-06, + "loss": 0.2087, + "step": 15209 + }, + { + "epoch": 2.14769839028523, + "grad_norm": 2.5936137684417844, + "learning_rate": 3.943069920396859e-06, + "loss": 0.1344, + "step": 15210 + }, + { + "epoch": 2.147839593335216, + "grad_norm": 4.122725034284877, + "learning_rate": 3.941856985968277e-06, + "loss": 0.217, + "step": 15211 + }, + { + "epoch": 2.147980796385202, + "grad_norm": 3.1809252842661024, + "learning_rate": 3.940644192328317e-06, + "loss": 0.1528, + "step": 15212 + }, + { + "epoch": 2.1481219994351877, + "grad_norm": 5.875969087426035, + "learning_rate": 3.939431539505173e-06, + "loss": 0.1455, + "step": 15213 + }, + { + "epoch": 2.1482632024851736, + "grad_norm": 3.6367583105917825, + "learning_rate": 3.938219027527023e-06, + "loss": 0.1589, + "step": 15214 + }, + { + "epoch": 2.1484044055351594, + "grad_norm": 3.235366826311923, + "learning_rate": 3.937006656422045e-06, + "loss": 0.1686, + "step": 15215 + }, + { + "epoch": 2.1485456085851453, + "grad_norm": 3.620322413932032, + "learning_rate": 3.935794426218415e-06, + "loss": 0.1757, + "step": 15216 + }, + { + "epoch": 2.148686811635131, + "grad_norm": 3.057936909899692, + "learning_rate": 3.934582336944303e-06, + "loss": 0.1995, + "step": 15217 + }, + { + "epoch": 2.148828014685117, + "grad_norm": 2.873163341300102, + "learning_rate": 3.933370388627878e-06, + "loss": 0.1603, + "step": 15218 + }, + { + "epoch": 2.148969217735103, + "grad_norm": 2.587974463879932, + "learning_rate": 3.932158581297304e-06, + "loss": 0.122, + "step": 15219 + }, + { + "epoch": 2.149110420785089, + "grad_norm": 2.6511872126179887, + "learning_rate": 3.930946914980744e-06, + "loss": 0.1325, + "step": 15220 + }, + { + "epoch": 2.1492516238350747, + "grad_norm": 4.194467001381478, + "learning_rate": 3.929735389706359e-06, + "loss": 0.2317, + "step": 15221 + }, + { + "epoch": 2.1493928268850606, + "grad_norm": 3.1830369730238304, + "learning_rate": 3.9285240055023e-06, + "loss": 0.1559, + "step": 15222 + }, + { + "epoch": 2.1495340299350465, + "grad_norm": 3.3265606598456063, + "learning_rate": 3.9273127623967214e-06, + "loss": 0.223, + "step": 15223 + }, + { + "epoch": 2.1496752329850324, + "grad_norm": 3.0650386940784142, + "learning_rate": 3.926101660417769e-06, + "loss": 0.1813, + "step": 15224 + }, + { + "epoch": 2.1498164360350183, + "grad_norm": 4.414065020603255, + "learning_rate": 3.924890699593593e-06, + "loss": 0.2608, + "step": 15225 + }, + { + "epoch": 2.149957639085004, + "grad_norm": 3.3103526540626755, + "learning_rate": 3.9236798799523375e-06, + "loss": 0.1755, + "step": 15226 + }, + { + "epoch": 2.15009884213499, + "grad_norm": 2.7203075192672816, + "learning_rate": 3.9224692015221345e-06, + "loss": 0.1546, + "step": 15227 + }, + { + "epoch": 2.150240045184976, + "grad_norm": 3.443101914034153, + "learning_rate": 3.921258664331122e-06, + "loss": 0.1638, + "step": 15228 + }, + { + "epoch": 2.150381248234962, + "grad_norm": 3.400283166139419, + "learning_rate": 3.9200482684074295e-06, + "loss": 0.1657, + "step": 15229 + }, + { + "epoch": 2.1505224512849477, + "grad_norm": 2.9367851158664, + "learning_rate": 3.9188380137791934e-06, + "loss": 0.1419, + "step": 15230 + }, + { + "epoch": 2.1506636543349336, + "grad_norm": 2.750512390439242, + "learning_rate": 3.917627900474535e-06, + "loss": 0.1196, + "step": 15231 + }, + { + "epoch": 2.1508048573849194, + "grad_norm": 3.112406590871209, + "learning_rate": 3.9164179285215785e-06, + "loss": 0.2019, + "step": 15232 + }, + { + "epoch": 2.1509460604349053, + "grad_norm": 3.121517301643916, + "learning_rate": 3.915208097948441e-06, + "loss": 0.1534, + "step": 15233 + }, + { + "epoch": 2.151087263484891, + "grad_norm": 3.1015066864255734, + "learning_rate": 3.91399840878324e-06, + "loss": 0.1738, + "step": 15234 + }, + { + "epoch": 2.151228466534877, + "grad_norm": 2.6167542035203013, + "learning_rate": 3.9127888610540875e-06, + "loss": 0.1466, + "step": 15235 + }, + { + "epoch": 2.151369669584863, + "grad_norm": 3.2346521186791093, + "learning_rate": 3.911579454789093e-06, + "loss": 0.1544, + "step": 15236 + }, + { + "epoch": 2.151510872634849, + "grad_norm": 2.487383517119653, + "learning_rate": 3.9103701900163625e-06, + "loss": 0.1348, + "step": 15237 + }, + { + "epoch": 2.1516520756848347, + "grad_norm": 3.5960891463619205, + "learning_rate": 3.909161066763999e-06, + "loss": 0.1931, + "step": 15238 + }, + { + "epoch": 2.1517932787348206, + "grad_norm": 3.1006186838104175, + "learning_rate": 3.9079520850601015e-06, + "loss": 0.1578, + "step": 15239 + }, + { + "epoch": 2.1519344817848065, + "grad_norm": 2.8495324753695854, + "learning_rate": 3.906743244932767e-06, + "loss": 0.1366, + "step": 15240 + }, + { + "epoch": 2.1520756848347924, + "grad_norm": 3.489278642149818, + "learning_rate": 3.905534546410085e-06, + "loss": 0.1586, + "step": 15241 + }, + { + "epoch": 2.1522168878847783, + "grad_norm": 3.331838978726516, + "learning_rate": 3.90432598952015e-06, + "loss": 0.1717, + "step": 15242 + }, + { + "epoch": 2.152358090934764, + "grad_norm": 3.4704768721589976, + "learning_rate": 3.903117574291051e-06, + "loss": 0.1476, + "step": 15243 + }, + { + "epoch": 2.15249929398475, + "grad_norm": 2.7336150874614953, + "learning_rate": 3.9019093007508635e-06, + "loss": 0.136, + "step": 15244 + }, + { + "epoch": 2.152640497034736, + "grad_norm": 2.7102379668933034, + "learning_rate": 3.900701168927669e-06, + "loss": 0.1342, + "step": 15245 + }, + { + "epoch": 2.152781700084722, + "grad_norm": 2.592063471231596, + "learning_rate": 3.899493178849544e-06, + "loss": 0.1407, + "step": 15246 + }, + { + "epoch": 2.1529229031347077, + "grad_norm": 4.422908590941699, + "learning_rate": 3.898285330544565e-06, + "loss": 0.2081, + "step": 15247 + }, + { + "epoch": 2.1530641061846936, + "grad_norm": 3.602474572692268, + "learning_rate": 3.8970776240407995e-06, + "loss": 0.2001, + "step": 15248 + }, + { + "epoch": 2.1532053092346795, + "grad_norm": 3.554511121885151, + "learning_rate": 3.895870059366315e-06, + "loss": 0.2069, + "step": 15249 + }, + { + "epoch": 2.1533465122846653, + "grad_norm": 3.240250043461691, + "learning_rate": 3.8946626365491735e-06, + "loss": 0.1669, + "step": 15250 + }, + { + "epoch": 2.153487715334651, + "grad_norm": 3.352868308007309, + "learning_rate": 3.893455355617436e-06, + "loss": 0.1782, + "step": 15251 + }, + { + "epoch": 2.153628918384637, + "grad_norm": 3.4416225348538165, + "learning_rate": 3.892248216599158e-06, + "loss": 0.1948, + "step": 15252 + }, + { + "epoch": 2.153770121434623, + "grad_norm": 3.5919661481983534, + "learning_rate": 3.891041219522394e-06, + "loss": 0.1624, + "step": 15253 + }, + { + "epoch": 2.153911324484609, + "grad_norm": 3.5036134787466207, + "learning_rate": 3.8898343644151945e-06, + "loss": 0.1934, + "step": 15254 + }, + { + "epoch": 2.1540525275345948, + "grad_norm": 3.130168988300437, + "learning_rate": 3.888627651305605e-06, + "loss": 0.129, + "step": 15255 + }, + { + "epoch": 2.1541937305845806, + "grad_norm": 2.866773811252313, + "learning_rate": 3.887421080221671e-06, + "loss": 0.1688, + "step": 15256 + }, + { + "epoch": 2.1543349336345665, + "grad_norm": 3.534645331581875, + "learning_rate": 3.88621465119143e-06, + "loss": 0.1344, + "step": 15257 + }, + { + "epoch": 2.1544761366845524, + "grad_norm": 2.987871963268388, + "learning_rate": 3.885008364242917e-06, + "loss": 0.1371, + "step": 15258 + }, + { + "epoch": 2.1546173397345383, + "grad_norm": 3.286371591947467, + "learning_rate": 3.8838022194041725e-06, + "loss": 0.1774, + "step": 15259 + }, + { + "epoch": 2.154758542784524, + "grad_norm": 2.534551743520572, + "learning_rate": 3.882596216703226e-06, + "loss": 0.1197, + "step": 15260 + }, + { + "epoch": 2.15489974583451, + "grad_norm": 2.9437899527965987, + "learning_rate": 3.881390356168098e-06, + "loss": 0.1562, + "step": 15261 + }, + { + "epoch": 2.155040948884496, + "grad_norm": 2.866136110078719, + "learning_rate": 3.880184637826816e-06, + "loss": 0.1356, + "step": 15262 + }, + { + "epoch": 2.155182151934482, + "grad_norm": 2.6240266093338542, + "learning_rate": 3.878979061707396e-06, + "loss": 0.1432, + "step": 15263 + }, + { + "epoch": 2.1553233549844677, + "grad_norm": 3.415561897251931, + "learning_rate": 3.877773627837863e-06, + "loss": 0.1843, + "step": 15264 + }, + { + "epoch": 2.1554645580344536, + "grad_norm": 2.8768257785004865, + "learning_rate": 3.876568336246228e-06, + "loss": 0.1365, + "step": 15265 + }, + { + "epoch": 2.1556057610844395, + "grad_norm": 3.3656178980087543, + "learning_rate": 3.875363186960499e-06, + "loss": 0.1969, + "step": 15266 + }, + { + "epoch": 2.1557469641344253, + "grad_norm": 3.2037309209292135, + "learning_rate": 3.874158180008685e-06, + "loss": 0.1757, + "step": 15267 + }, + { + "epoch": 2.1558881671844112, + "grad_norm": 3.4781164380295593, + "learning_rate": 3.872953315418793e-06, + "loss": 0.1873, + "step": 15268 + }, + { + "epoch": 2.156029370234397, + "grad_norm": 3.4690197393626985, + "learning_rate": 3.8717485932188124e-06, + "loss": 0.1767, + "step": 15269 + }, + { + "epoch": 2.156170573284383, + "grad_norm": 3.3157076970876047, + "learning_rate": 3.870544013436751e-06, + "loss": 0.2028, + "step": 15270 + }, + { + "epoch": 2.156311776334369, + "grad_norm": 3.3084173472157437, + "learning_rate": 3.8693395761006e-06, + "loss": 0.1312, + "step": 15271 + }, + { + "epoch": 2.1564529793843548, + "grad_norm": 3.918745350214336, + "learning_rate": 3.868135281238351e-06, + "loss": 0.1952, + "step": 15272 + }, + { + "epoch": 2.1565941824343406, + "grad_norm": 2.9830337598343197, + "learning_rate": 3.866931128877989e-06, + "loss": 0.1834, + "step": 15273 + }, + { + "epoch": 2.1567353854843265, + "grad_norm": 2.9262998541081875, + "learning_rate": 3.865727119047497e-06, + "loss": 0.1485, + "step": 15274 + }, + { + "epoch": 2.1568765885343124, + "grad_norm": 2.8374622433568923, + "learning_rate": 3.864523251774856e-06, + "loss": 0.1583, + "step": 15275 + }, + { + "epoch": 2.1570177915842983, + "grad_norm": 3.511293800393851, + "learning_rate": 3.863319527088048e-06, + "loss": 0.1712, + "step": 15276 + }, + { + "epoch": 2.157158994634284, + "grad_norm": 3.468901440733914, + "learning_rate": 3.862115945015048e-06, + "loss": 0.1701, + "step": 15277 + }, + { + "epoch": 2.15730019768427, + "grad_norm": 2.9560518900121924, + "learning_rate": 3.860912505583819e-06, + "loss": 0.1547, + "step": 15278 + }, + { + "epoch": 2.157441400734256, + "grad_norm": 3.9744047529219397, + "learning_rate": 3.859709208822332e-06, + "loss": 0.2261, + "step": 15279 + }, + { + "epoch": 2.157582603784242, + "grad_norm": 3.571880403574438, + "learning_rate": 3.858506054758547e-06, + "loss": 0.1599, + "step": 15280 + }, + { + "epoch": 2.1577238068342277, + "grad_norm": 3.0631027697441615, + "learning_rate": 3.857303043420434e-06, + "loss": 0.1688, + "step": 15281 + }, + { + "epoch": 2.1578650098842136, + "grad_norm": 2.8119924638164187, + "learning_rate": 3.856100174835945e-06, + "loss": 0.1784, + "step": 15282 + }, + { + "epoch": 2.1580062129341995, + "grad_norm": 3.0827524247279987, + "learning_rate": 3.854897449033034e-06, + "loss": 0.155, + "step": 15283 + }, + { + "epoch": 2.1581474159841854, + "grad_norm": 2.68446970016538, + "learning_rate": 3.853694866039652e-06, + "loss": 0.11, + "step": 15284 + }, + { + "epoch": 2.1582886190341712, + "grad_norm": 3.5419555907107028, + "learning_rate": 3.852492425883752e-06, + "loss": 0.1708, + "step": 15285 + }, + { + "epoch": 2.158429822084157, + "grad_norm": 2.953795648035667, + "learning_rate": 3.851290128593265e-06, + "loss": 0.1659, + "step": 15286 + }, + { + "epoch": 2.158571025134143, + "grad_norm": 2.944376672317145, + "learning_rate": 3.850087974196143e-06, + "loss": 0.1562, + "step": 15287 + }, + { + "epoch": 2.158712228184129, + "grad_norm": 3.2035503266163334, + "learning_rate": 3.848885962720321e-06, + "loss": 0.1421, + "step": 15288 + }, + { + "epoch": 2.1588534312341148, + "grad_norm": 3.63941778898501, + "learning_rate": 3.847684094193733e-06, + "loss": 0.2136, + "step": 15289 + }, + { + "epoch": 2.1589946342841007, + "grad_norm": 3.0149872853986106, + "learning_rate": 3.84648236864431e-06, + "loss": 0.1563, + "step": 15290 + }, + { + "epoch": 2.1591358373340865, + "grad_norm": 3.3275718954586653, + "learning_rate": 3.8452807860999785e-06, + "loss": 0.1974, + "step": 15291 + }, + { + "epoch": 2.1592770403840724, + "grad_norm": 3.2755945761329954, + "learning_rate": 3.8440793465886595e-06, + "loss": 0.2, + "step": 15292 + }, + { + "epoch": 2.1594182434340583, + "grad_norm": 3.33383384526692, + "learning_rate": 3.842878050138282e-06, + "loss": 0.128, + "step": 15293 + }, + { + "epoch": 2.159559446484044, + "grad_norm": 3.176719835032803, + "learning_rate": 3.841676896776764e-06, + "loss": 0.1643, + "step": 15294 + }, + { + "epoch": 2.15970064953403, + "grad_norm": 3.613338091187333, + "learning_rate": 3.840475886532011e-06, + "loss": 0.2051, + "step": 15295 + }, + { + "epoch": 2.159841852584016, + "grad_norm": 3.3625537353218165, + "learning_rate": 3.8392750194319385e-06, + "loss": 0.1376, + "step": 15296 + }, + { + "epoch": 2.159983055634002, + "grad_norm": 3.146469835139758, + "learning_rate": 3.838074295504454e-06, + "loss": 0.1657, + "step": 15297 + }, + { + "epoch": 2.1601242586839877, + "grad_norm": 2.6082015758698103, + "learning_rate": 3.836873714777458e-06, + "loss": 0.1632, + "step": 15298 + }, + { + "epoch": 2.1602654617339736, + "grad_norm": 4.540842152243143, + "learning_rate": 3.835673277278858e-06, + "loss": 0.1981, + "step": 15299 + }, + { + "epoch": 2.1604066647839595, + "grad_norm": 3.2490854969366683, + "learning_rate": 3.834472983036551e-06, + "loss": 0.167, + "step": 15300 + }, + { + "epoch": 2.1605478678339454, + "grad_norm": 3.9556932777909286, + "learning_rate": 3.8332728320784275e-06, + "loss": 0.194, + "step": 15301 + }, + { + "epoch": 2.1606890708839313, + "grad_norm": 2.700978814903802, + "learning_rate": 3.832072824432385e-06, + "loss": 0.1285, + "step": 15302 + }, + { + "epoch": 2.160830273933917, + "grad_norm": 3.4352314085462674, + "learning_rate": 3.830872960126297e-06, + "loss": 0.1658, + "step": 15303 + }, + { + "epoch": 2.160971476983903, + "grad_norm": 2.5588646615240687, + "learning_rate": 3.829673239188063e-06, + "loss": 0.1427, + "step": 15304 + }, + { + "epoch": 2.161112680033889, + "grad_norm": 3.5142425041654355, + "learning_rate": 3.828473661645558e-06, + "loss": 0.1978, + "step": 15305 + }, + { + "epoch": 2.161253883083875, + "grad_norm": 3.2926734622901854, + "learning_rate": 3.827274227526658e-06, + "loss": 0.1633, + "step": 15306 + }, + { + "epoch": 2.1613950861338607, + "grad_norm": 2.9461290159189804, + "learning_rate": 3.82607493685924e-06, + "loss": 0.1412, + "step": 15307 + }, + { + "epoch": 2.1615362891838465, + "grad_norm": 3.386064984708725, + "learning_rate": 3.824875789671175e-06, + "loss": 0.1566, + "step": 15308 + }, + { + "epoch": 2.1616774922338324, + "grad_norm": 2.862460929557685, + "learning_rate": 3.823676785990329e-06, + "loss": 0.1278, + "step": 15309 + }, + { + "epoch": 2.1618186952838183, + "grad_norm": 4.086992362211393, + "learning_rate": 3.822477925844564e-06, + "loss": 0.1909, + "step": 15310 + }, + { + "epoch": 2.161959898333804, + "grad_norm": 3.7664697822042954, + "learning_rate": 3.821279209261751e-06, + "loss": 0.1795, + "step": 15311 + }, + { + "epoch": 2.16210110138379, + "grad_norm": 3.3440607693996642, + "learning_rate": 3.820080636269737e-06, + "loss": 0.1616, + "step": 15312 + }, + { + "epoch": 2.162242304433776, + "grad_norm": 3.0048849576878016, + "learning_rate": 3.81888220689638e-06, + "loss": 0.1434, + "step": 15313 + }, + { + "epoch": 2.162383507483762, + "grad_norm": 2.896908253207604, + "learning_rate": 3.8176839211695314e-06, + "loss": 0.133, + "step": 15314 + }, + { + "epoch": 2.1625247105337477, + "grad_norm": 3.2537810170854105, + "learning_rate": 3.816485779117035e-06, + "loss": 0.1665, + "step": 15315 + }, + { + "epoch": 2.1626659135837336, + "grad_norm": 3.0621114896382533, + "learning_rate": 3.815287780766743e-06, + "loss": 0.1639, + "step": 15316 + }, + { + "epoch": 2.1628071166337195, + "grad_norm": 3.6768795872327686, + "learning_rate": 3.8140899261464915e-06, + "loss": 0.1923, + "step": 15317 + }, + { + "epoch": 2.1629483196837054, + "grad_norm": 2.7463785874010997, + "learning_rate": 3.8128922152841188e-06, + "loss": 0.1391, + "step": 15318 + }, + { + "epoch": 2.1630895227336913, + "grad_norm": 3.244986865456969, + "learning_rate": 3.811694648207461e-06, + "loss": 0.1863, + "step": 15319 + }, + { + "epoch": 2.163230725783677, + "grad_norm": 4.673323706078541, + "learning_rate": 3.8104972249443417e-06, + "loss": 0.218, + "step": 15320 + }, + { + "epoch": 2.163371928833663, + "grad_norm": 2.526117921874665, + "learning_rate": 3.8092999455225967e-06, + "loss": 0.1265, + "step": 15321 + }, + { + "epoch": 2.163513131883649, + "grad_norm": 3.296483664903737, + "learning_rate": 3.808102809970048e-06, + "loss": 0.1766, + "step": 15322 + }, + { + "epoch": 2.163654334933635, + "grad_norm": 3.789448211818931, + "learning_rate": 3.806905818314515e-06, + "loss": 0.1762, + "step": 15323 + }, + { + "epoch": 2.1637955379836207, + "grad_norm": 3.9197739271259597, + "learning_rate": 3.805708970583817e-06, + "loss": 0.1974, + "step": 15324 + }, + { + "epoch": 2.1639367410336066, + "grad_norm": 3.1000307937396285, + "learning_rate": 3.8045122668057668e-06, + "loss": 0.141, + "step": 15325 + }, + { + "epoch": 2.164077944083592, + "grad_norm": 2.657344620693299, + "learning_rate": 3.803315707008176e-06, + "loss": 0.15, + "step": 15326 + }, + { + "epoch": 2.164219147133578, + "grad_norm": 2.8089350673781137, + "learning_rate": 3.8021192912188485e-06, + "loss": 0.1478, + "step": 15327 + }, + { + "epoch": 2.1643603501835638, + "grad_norm": 3.7857709409979274, + "learning_rate": 3.800923019465599e-06, + "loss": 0.168, + "step": 15328 + }, + { + "epoch": 2.1645015532335496, + "grad_norm": 3.50589759910851, + "learning_rate": 3.7997268917762185e-06, + "loss": 0.1819, + "step": 15329 + }, + { + "epoch": 2.1646427562835355, + "grad_norm": 2.2082483455816955, + "learning_rate": 3.798530908178506e-06, + "loss": 0.0905, + "step": 15330 + }, + { + "epoch": 2.1647839593335214, + "grad_norm": 3.490753386117512, + "learning_rate": 3.7973350687002584e-06, + "loss": 0.1793, + "step": 15331 + }, + { + "epoch": 2.1649251623835073, + "grad_norm": 2.4821707090911875, + "learning_rate": 3.796139373369261e-06, + "loss": 0.1213, + "step": 15332 + }, + { + "epoch": 2.165066365433493, + "grad_norm": 3.0543426265133684, + "learning_rate": 3.7949438222133085e-06, + "loss": 0.1226, + "step": 15333 + }, + { + "epoch": 2.165207568483479, + "grad_norm": 3.287532178360304, + "learning_rate": 3.7937484152601824e-06, + "loss": 0.1563, + "step": 15334 + }, + { + "epoch": 2.165348771533465, + "grad_norm": 3.243183900110629, + "learning_rate": 3.7925531525376624e-06, + "loss": 0.1382, + "step": 15335 + }, + { + "epoch": 2.165489974583451, + "grad_norm": 3.2933214377181224, + "learning_rate": 3.79135803407353e-06, + "loss": 0.2206, + "step": 15336 + }, + { + "epoch": 2.1656311776334367, + "grad_norm": 3.0993382501902786, + "learning_rate": 3.7901630598955485e-06, + "loss": 0.1694, + "step": 15337 + }, + { + "epoch": 2.1657723806834226, + "grad_norm": 3.3899711113670254, + "learning_rate": 3.7889682300315e-06, + "loss": 0.1695, + "step": 15338 + }, + { + "epoch": 2.1659135837334085, + "grad_norm": 3.005882933121945, + "learning_rate": 3.787773544509147e-06, + "loss": 0.1497, + "step": 15339 + }, + { + "epoch": 2.1660547867833944, + "grad_norm": 4.0668197087918605, + "learning_rate": 3.7865790033562532e-06, + "loss": 0.2043, + "step": 15340 + }, + { + "epoch": 2.1661959898333802, + "grad_norm": 3.642277064534681, + "learning_rate": 3.7853846066005807e-06, + "loss": 0.2199, + "step": 15341 + }, + { + "epoch": 2.166337192883366, + "grad_norm": 3.371790047851958, + "learning_rate": 3.7841903542698855e-06, + "loss": 0.1819, + "step": 15342 + }, + { + "epoch": 2.166478395933352, + "grad_norm": 3.7317281704497307, + "learning_rate": 3.7829962463919236e-06, + "loss": 0.1712, + "step": 15343 + }, + { + "epoch": 2.166619598983338, + "grad_norm": 3.5231949009561214, + "learning_rate": 3.781802282994439e-06, + "loss": 0.1687, + "step": 15344 + }, + { + "epoch": 2.1667608020333238, + "grad_norm": 4.350786320114571, + "learning_rate": 3.780608464105191e-06, + "loss": 0.2178, + "step": 15345 + }, + { + "epoch": 2.1669020050833097, + "grad_norm": 3.5252677665927776, + "learning_rate": 3.7794147897519128e-06, + "loss": 0.1751, + "step": 15346 + }, + { + "epoch": 2.1670432081332955, + "grad_norm": 3.4246569735107126, + "learning_rate": 3.778221259962349e-06, + "loss": 0.1673, + "step": 15347 + }, + { + "epoch": 2.1671844111832814, + "grad_norm": 3.3325876889875126, + "learning_rate": 3.7770278747642364e-06, + "loss": 0.1741, + "step": 15348 + }, + { + "epoch": 2.1673256142332673, + "grad_norm": 2.907369636944353, + "learning_rate": 3.7758346341853045e-06, + "loss": 0.1463, + "step": 15349 + }, + { + "epoch": 2.167466817283253, + "grad_norm": 3.784229481613177, + "learning_rate": 3.7746415382532907e-06, + "loss": 0.1882, + "step": 15350 + }, + { + "epoch": 2.167608020333239, + "grad_norm": 3.8002700779615926, + "learning_rate": 3.7734485869959193e-06, + "loss": 0.1939, + "step": 15351 + }, + { + "epoch": 2.167749223383225, + "grad_norm": 3.2231402742419397, + "learning_rate": 3.7722557804409145e-06, + "loss": 0.1108, + "step": 15352 + }, + { + "epoch": 2.167890426433211, + "grad_norm": 2.6906017540948968, + "learning_rate": 3.7710631186159984e-06, + "loss": 0.1429, + "step": 15353 + }, + { + "epoch": 2.1680316294831967, + "grad_norm": 3.60123040054035, + "learning_rate": 3.7698706015488783e-06, + "loss": 0.1755, + "step": 15354 + }, + { + "epoch": 2.1681728325331826, + "grad_norm": 2.6837982470633093, + "learning_rate": 3.7686782292672797e-06, + "loss": 0.1457, + "step": 15355 + }, + { + "epoch": 2.1683140355831685, + "grad_norm": 3.21164042358376, + "learning_rate": 3.767486001798907e-06, + "loss": 0.1484, + "step": 15356 + }, + { + "epoch": 2.1684552386331544, + "grad_norm": 2.6316559880922816, + "learning_rate": 3.7662939191714678e-06, + "loss": 0.1393, + "step": 15357 + }, + { + "epoch": 2.1685964416831403, + "grad_norm": 3.7521457753440535, + "learning_rate": 3.7651019814126656e-06, + "loss": 0.1746, + "step": 15358 + }, + { + "epoch": 2.168737644733126, + "grad_norm": 3.6845159556381613, + "learning_rate": 3.7639101885502015e-06, + "loss": 0.1509, + "step": 15359 + }, + { + "epoch": 2.168878847783112, + "grad_norm": 2.8416102219929327, + "learning_rate": 3.7627185406117707e-06, + "loss": 0.1396, + "step": 15360 + }, + { + "epoch": 2.169020050833098, + "grad_norm": 3.3991676958603536, + "learning_rate": 3.7615270376250646e-06, + "loss": 0.2031, + "step": 15361 + }, + { + "epoch": 2.169161253883084, + "grad_norm": 3.0516482272611967, + "learning_rate": 3.7603356796177824e-06, + "loss": 0.2067, + "step": 15362 + }, + { + "epoch": 2.1693024569330697, + "grad_norm": 3.755994710291307, + "learning_rate": 3.7591444666176004e-06, + "loss": 0.2202, + "step": 15363 + }, + { + "epoch": 2.1694436599830556, + "grad_norm": 2.9050947108982568, + "learning_rate": 3.7579533986522065e-06, + "loss": 0.1587, + "step": 15364 + }, + { + "epoch": 2.1695848630330414, + "grad_norm": 3.0943909538038774, + "learning_rate": 3.7567624757492795e-06, + "loss": 0.1763, + "step": 15365 + }, + { + "epoch": 2.1697260660830273, + "grad_norm": 4.203254514419069, + "learning_rate": 3.755571697936493e-06, + "loss": 0.2048, + "step": 15366 + }, + { + "epoch": 2.169867269133013, + "grad_norm": 2.4446060555671894, + "learning_rate": 3.754381065241527e-06, + "loss": 0.139, + "step": 15367 + }, + { + "epoch": 2.170008472182999, + "grad_norm": 2.9877278217025465, + "learning_rate": 3.7531905776920474e-06, + "loss": 0.1426, + "step": 15368 + }, + { + "epoch": 2.170149675232985, + "grad_norm": 2.569160547657622, + "learning_rate": 3.7520002353157213e-06, + "loss": 0.1541, + "step": 15369 + }, + { + "epoch": 2.170290878282971, + "grad_norm": 3.3145568733824993, + "learning_rate": 3.7508100381402157e-06, + "loss": 0.1545, + "step": 15370 + }, + { + "epoch": 2.1704320813329567, + "grad_norm": 3.445364492838091, + "learning_rate": 3.7496199861931815e-06, + "loss": 0.141, + "step": 15371 + }, + { + "epoch": 2.1705732843829426, + "grad_norm": 2.8588574401648783, + "learning_rate": 3.7484300795022765e-06, + "loss": 0.1223, + "step": 15372 + }, + { + "epoch": 2.1707144874329285, + "grad_norm": 2.7249217650030375, + "learning_rate": 3.74724031809516e-06, + "loss": 0.1417, + "step": 15373 + }, + { + "epoch": 2.1708556904829144, + "grad_norm": 3.3091110139399564, + "learning_rate": 3.7460507019994775e-06, + "loss": 0.1378, + "step": 15374 + }, + { + "epoch": 2.1709968935329003, + "grad_norm": 3.712664975473104, + "learning_rate": 3.7448612312428765e-06, + "loss": 0.1754, + "step": 15375 + }, + { + "epoch": 2.171138096582886, + "grad_norm": 2.817819294199809, + "learning_rate": 3.743671905852999e-06, + "loss": 0.1415, + "step": 15376 + }, + { + "epoch": 2.171279299632872, + "grad_norm": 3.2452438491015685, + "learning_rate": 3.7424827258574837e-06, + "loss": 0.1871, + "step": 15377 + }, + { + "epoch": 2.171420502682858, + "grad_norm": 2.813564694746286, + "learning_rate": 3.741293691283968e-06, + "loss": 0.1259, + "step": 15378 + }, + { + "epoch": 2.171561705732844, + "grad_norm": 3.1138021737627026, + "learning_rate": 3.740104802160085e-06, + "loss": 0.1872, + "step": 15379 + }, + { + "epoch": 2.1717029087828297, + "grad_norm": 3.052937746372439, + "learning_rate": 3.738916058513462e-06, + "loss": 0.1705, + "step": 15380 + }, + { + "epoch": 2.1718441118328156, + "grad_norm": 2.925382643249554, + "learning_rate": 3.7377274603717262e-06, + "loss": 0.1757, + "step": 15381 + }, + { + "epoch": 2.1719853148828014, + "grad_norm": 4.173324737980824, + "learning_rate": 3.7365390077624985e-06, + "loss": 0.1868, + "step": 15382 + }, + { + "epoch": 2.1721265179327873, + "grad_norm": 3.317451027431448, + "learning_rate": 3.7353507007134005e-06, + "loss": 0.1904, + "step": 15383 + }, + { + "epoch": 2.172267720982773, + "grad_norm": 3.0507985019053985, + "learning_rate": 3.734162539252043e-06, + "loss": 0.133, + "step": 15384 + }, + { + "epoch": 2.172408924032759, + "grad_norm": 3.338489159534771, + "learning_rate": 3.7329745234060457e-06, + "loss": 0.1546, + "step": 15385 + }, + { + "epoch": 2.172550127082745, + "grad_norm": 2.5675335271428463, + "learning_rate": 3.7317866532030145e-06, + "loss": 0.1563, + "step": 15386 + }, + { + "epoch": 2.172691330132731, + "grad_norm": 2.2702537802327427, + "learning_rate": 3.730598928670557e-06, + "loss": 0.1227, + "step": 15387 + }, + { + "epoch": 2.1728325331827167, + "grad_norm": 2.6190434464765997, + "learning_rate": 3.7294113498362695e-06, + "loss": 0.1271, + "step": 15388 + }, + { + "epoch": 2.1729737362327026, + "grad_norm": 3.629753094343204, + "learning_rate": 3.7282239167277513e-06, + "loss": 0.1917, + "step": 15389 + }, + { + "epoch": 2.1731149392826885, + "grad_norm": 3.8668453061453554, + "learning_rate": 3.7270366293726033e-06, + "loss": 0.1975, + "step": 15390 + }, + { + "epoch": 2.1732561423326744, + "grad_norm": 3.0225970713047356, + "learning_rate": 3.7258494877984154e-06, + "loss": 0.1692, + "step": 15391 + }, + { + "epoch": 2.1733973453826603, + "grad_norm": 3.327321908791253, + "learning_rate": 3.7246624920327755e-06, + "loss": 0.1612, + "step": 15392 + }, + { + "epoch": 2.173538548432646, + "grad_norm": 3.0419769941703856, + "learning_rate": 3.7234756421032694e-06, + "loss": 0.1455, + "step": 15393 + }, + { + "epoch": 2.173679751482632, + "grad_norm": 3.0845212846518533, + "learning_rate": 3.722288938037478e-06, + "loss": 0.156, + "step": 15394 + }, + { + "epoch": 2.173820954532618, + "grad_norm": 2.9778173084114212, + "learning_rate": 3.721102379862982e-06, + "loss": 0.1469, + "step": 15395 + }, + { + "epoch": 2.173962157582604, + "grad_norm": 2.8932410477389476, + "learning_rate": 3.719915967607355e-06, + "loss": 0.1483, + "step": 15396 + }, + { + "epoch": 2.1741033606325897, + "grad_norm": 3.2071600955915005, + "learning_rate": 3.718729701298167e-06, + "loss": 0.1381, + "step": 15397 + }, + { + "epoch": 2.1742445636825756, + "grad_norm": 2.857276049412006, + "learning_rate": 3.71754358096299e-06, + "loss": 0.1323, + "step": 15398 + }, + { + "epoch": 2.1743857667325615, + "grad_norm": 2.4863503600481924, + "learning_rate": 3.716357606629386e-06, + "loss": 0.1333, + "step": 15399 + }, + { + "epoch": 2.1745269697825473, + "grad_norm": 4.1421598585326995, + "learning_rate": 3.7151717783249175e-06, + "loss": 0.1688, + "step": 15400 + }, + { + "epoch": 2.174668172832533, + "grad_norm": 2.8460378231879417, + "learning_rate": 3.71398609607714e-06, + "loss": 0.1227, + "step": 15401 + }, + { + "epoch": 2.174809375882519, + "grad_norm": 3.8582424314658006, + "learning_rate": 3.712800559913613e-06, + "loss": 0.1703, + "step": 15402 + }, + { + "epoch": 2.174950578932505, + "grad_norm": 3.2118177604130866, + "learning_rate": 3.7116151698618874e-06, + "loss": 0.1601, + "step": 15403 + }, + { + "epoch": 2.175091781982491, + "grad_norm": 4.1378949072083575, + "learning_rate": 3.7104299259495113e-06, + "loss": 0.2213, + "step": 15404 + }, + { + "epoch": 2.1752329850324768, + "grad_norm": 3.3155670257078507, + "learning_rate": 3.709244828204025e-06, + "loss": 0.1832, + "step": 15405 + }, + { + "epoch": 2.1753741880824626, + "grad_norm": 3.0999955812339794, + "learning_rate": 3.7080598766529686e-06, + "loss": 0.1466, + "step": 15406 + }, + { + "epoch": 2.1755153911324485, + "grad_norm": 3.5242071960954253, + "learning_rate": 3.7068750713238867e-06, + "loss": 0.1811, + "step": 15407 + }, + { + "epoch": 2.1756565941824344, + "grad_norm": 3.150394533370885, + "learning_rate": 3.7056904122443105e-06, + "loss": 0.1602, + "step": 15408 + }, + { + "epoch": 2.1757977972324203, + "grad_norm": 3.414828098630438, + "learning_rate": 3.7045058994417702e-06, + "loss": 0.1608, + "step": 15409 + }, + { + "epoch": 2.175939000282406, + "grad_norm": 3.4177649883738255, + "learning_rate": 3.7033215329437945e-06, + "loss": 0.1634, + "step": 15410 + }, + { + "epoch": 2.176080203332392, + "grad_norm": 4.0936006415291795, + "learning_rate": 3.702137312777907e-06, + "loss": 0.2131, + "step": 15411 + }, + { + "epoch": 2.176221406382378, + "grad_norm": 3.292819154971959, + "learning_rate": 3.7009532389716283e-06, + "loss": 0.1683, + "step": 15412 + }, + { + "epoch": 2.176362609432364, + "grad_norm": 2.566240218603671, + "learning_rate": 3.6997693115524767e-06, + "loss": 0.1377, + "step": 15413 + }, + { + "epoch": 2.1765038124823497, + "grad_norm": 2.9993635308870936, + "learning_rate": 3.6985855305479645e-06, + "loss": 0.1409, + "step": 15414 + }, + { + "epoch": 2.1766450155323356, + "grad_norm": 3.487226435877667, + "learning_rate": 3.697401895985604e-06, + "loss": 0.2029, + "step": 15415 + }, + { + "epoch": 2.1767862185823215, + "grad_norm": 3.082418296204254, + "learning_rate": 3.696218407892901e-06, + "loss": 0.157, + "step": 15416 + }, + { + "epoch": 2.1769274216323073, + "grad_norm": 2.9392330969402853, + "learning_rate": 3.69503506629736e-06, + "loss": 0.1356, + "step": 15417 + }, + { + "epoch": 2.1770686246822932, + "grad_norm": 3.1772935088259664, + "learning_rate": 3.6938518712264783e-06, + "loss": 0.1537, + "step": 15418 + }, + { + "epoch": 2.177209827732279, + "grad_norm": 2.7973819926962378, + "learning_rate": 3.692668822707758e-06, + "loss": 0.1101, + "step": 15419 + }, + { + "epoch": 2.177351030782265, + "grad_norm": 3.5444023652906864, + "learning_rate": 3.6914859207686916e-06, + "loss": 0.1875, + "step": 15420 + }, + { + "epoch": 2.177492233832251, + "grad_norm": 3.07895743309229, + "learning_rate": 3.6903031654367704e-06, + "loss": 0.1655, + "step": 15421 + }, + { + "epoch": 2.1776334368822368, + "grad_norm": 3.431575091072321, + "learning_rate": 3.689120556739475e-06, + "loss": 0.1765, + "step": 15422 + }, + { + "epoch": 2.1777746399322226, + "grad_norm": 3.0308792051961704, + "learning_rate": 3.68793809470429e-06, + "loss": 0.18, + "step": 15423 + }, + { + "epoch": 2.1779158429822085, + "grad_norm": 3.2060547388156104, + "learning_rate": 3.6867557793587005e-06, + "loss": 0.1604, + "step": 15424 + }, + { + "epoch": 2.1780570460321944, + "grad_norm": 2.7486338960706265, + "learning_rate": 3.6855736107301798e-06, + "loss": 0.1486, + "step": 15425 + }, + { + "epoch": 2.1781982490821803, + "grad_norm": 3.371587265788391, + "learning_rate": 3.6843915888462022e-06, + "loss": 0.203, + "step": 15426 + }, + { + "epoch": 2.178339452132166, + "grad_norm": 2.1483417185334965, + "learning_rate": 3.683209713734237e-06, + "loss": 0.1202, + "step": 15427 + }, + { + "epoch": 2.178480655182152, + "grad_norm": 2.8470306628142428, + "learning_rate": 3.682027985421749e-06, + "loss": 0.1494, + "step": 15428 + }, + { + "epoch": 2.178621858232138, + "grad_norm": 3.0798969842550004, + "learning_rate": 3.6808464039362025e-06, + "loss": 0.1611, + "step": 15429 + }, + { + "epoch": 2.178763061282124, + "grad_norm": 2.8322921348360186, + "learning_rate": 3.6796649693050568e-06, + "loss": 0.1306, + "step": 15430 + }, + { + "epoch": 2.1789042643321097, + "grad_norm": 3.625112437056275, + "learning_rate": 3.6784836815557667e-06, + "loss": 0.2418, + "step": 15431 + }, + { + "epoch": 2.1790454673820956, + "grad_norm": 3.0233691197743253, + "learning_rate": 3.677302540715787e-06, + "loss": 0.1293, + "step": 15432 + }, + { + "epoch": 2.1791866704320815, + "grad_norm": 3.874934332062811, + "learning_rate": 3.6761215468125645e-06, + "loss": 0.2121, + "step": 15433 + }, + { + "epoch": 2.1793278734820674, + "grad_norm": 4.180066645550741, + "learning_rate": 3.674940699873547e-06, + "loss": 0.1549, + "step": 15434 + }, + { + "epoch": 2.1794690765320532, + "grad_norm": 2.6413327577496966, + "learning_rate": 3.673759999926173e-06, + "loss": 0.152, + "step": 15435 + }, + { + "epoch": 2.179610279582039, + "grad_norm": 3.541495736770971, + "learning_rate": 3.672579446997887e-06, + "loss": 0.1903, + "step": 15436 + }, + { + "epoch": 2.179751482632025, + "grad_norm": 3.654485372123263, + "learning_rate": 3.6713990411161226e-06, + "loss": 0.1529, + "step": 15437 + }, + { + "epoch": 2.179892685682011, + "grad_norm": 2.8836447095673092, + "learning_rate": 3.6702187823083147e-06, + "loss": 0.1716, + "step": 15438 + }, + { + "epoch": 2.1800338887319968, + "grad_norm": 2.8071583988094218, + "learning_rate": 3.6690386706018845e-06, + "loss": 0.1875, + "step": 15439 + }, + { + "epoch": 2.1801750917819827, + "grad_norm": 2.967512565000615, + "learning_rate": 3.6678587060242586e-06, + "loss": 0.1543, + "step": 15440 + }, + { + "epoch": 2.1803162948319685, + "grad_norm": 2.9725164212249755, + "learning_rate": 3.666678888602866e-06, + "loss": 0.1588, + "step": 15441 + }, + { + "epoch": 2.1804574978819544, + "grad_norm": 3.066526127589556, + "learning_rate": 3.6654992183651193e-06, + "loss": 0.1544, + "step": 15442 + }, + { + "epoch": 2.1805987009319403, + "grad_norm": 2.6982121110481065, + "learning_rate": 3.6643196953384364e-06, + "loss": 0.1409, + "step": 15443 + }, + { + "epoch": 2.180739903981926, + "grad_norm": 3.3729030614087705, + "learning_rate": 3.663140319550227e-06, + "loss": 0.2017, + "step": 15444 + }, + { + "epoch": 2.180881107031912, + "grad_norm": 3.1231475995343825, + "learning_rate": 3.6619610910279002e-06, + "loss": 0.1847, + "step": 15445 + }, + { + "epoch": 2.181022310081898, + "grad_norm": 3.5244871861513847, + "learning_rate": 3.66078200979886e-06, + "loss": 0.2072, + "step": 15446 + }, + { + "epoch": 2.181163513131884, + "grad_norm": 3.7962253937521853, + "learning_rate": 3.659603075890509e-06, + "loss": 0.1711, + "step": 15447 + }, + { + "epoch": 2.1813047161818697, + "grad_norm": 2.752664722262145, + "learning_rate": 3.6584242893302436e-06, + "loss": 0.1511, + "step": 15448 + }, + { + "epoch": 2.1814459192318556, + "grad_norm": 3.0628206682511583, + "learning_rate": 3.657245650145459e-06, + "loss": 0.1665, + "step": 15449 + }, + { + "epoch": 2.1815871222818415, + "grad_norm": 3.2191298711439855, + "learning_rate": 3.6560671583635467e-06, + "loss": 0.1493, + "step": 15450 + }, + { + "epoch": 2.1817283253318274, + "grad_norm": 2.6842545405288023, + "learning_rate": 3.6548888140118943e-06, + "loss": 0.1125, + "step": 15451 + }, + { + "epoch": 2.1818695283818133, + "grad_norm": 3.534330896928706, + "learning_rate": 3.6537106171178817e-06, + "loss": 0.1697, + "step": 15452 + }, + { + "epoch": 2.182010731431799, + "grad_norm": 2.418358411102102, + "learning_rate": 3.652532567708896e-06, + "loss": 0.096, + "step": 15453 + }, + { + "epoch": 2.1821519344817846, + "grad_norm": 3.14295379651231, + "learning_rate": 3.651354665812313e-06, + "loss": 0.1307, + "step": 15454 + }, + { + "epoch": 2.1822931375317705, + "grad_norm": 2.7656813557578332, + "learning_rate": 3.6501769114555098e-06, + "loss": 0.1346, + "step": 15455 + }, + { + "epoch": 2.1824343405817563, + "grad_norm": 3.6881461191630525, + "learning_rate": 3.648999304665849e-06, + "loss": 0.1808, + "step": 15456 + }, + { + "epoch": 2.182575543631742, + "grad_norm": 3.534165451754848, + "learning_rate": 3.6478218454706983e-06, + "loss": 0.1695, + "step": 15457 + }, + { + "epoch": 2.182716746681728, + "grad_norm": 2.6771873401821042, + "learning_rate": 3.6466445338974276e-06, + "loss": 0.1483, + "step": 15458 + }, + { + "epoch": 2.182857949731714, + "grad_norm": 3.6080116822824833, + "learning_rate": 3.6454673699733944e-06, + "loss": 0.1412, + "step": 15459 + }, + { + "epoch": 2.1829991527817, + "grad_norm": 3.5758350250237045, + "learning_rate": 3.6442903537259556e-06, + "loss": 0.2014, + "step": 15460 + }, + { + "epoch": 2.1831403558316858, + "grad_norm": 3.158750203895158, + "learning_rate": 3.6431134851824636e-06, + "loss": 0.191, + "step": 15461 + }, + { + "epoch": 2.1832815588816716, + "grad_norm": 3.8110112606242406, + "learning_rate": 3.6419367643702684e-06, + "loss": 0.1701, + "step": 15462 + }, + { + "epoch": 2.1834227619316575, + "grad_norm": 3.2163903311923443, + "learning_rate": 3.640760191316718e-06, + "loss": 0.165, + "step": 15463 + }, + { + "epoch": 2.1835639649816434, + "grad_norm": 3.193217910165306, + "learning_rate": 3.6395837660491538e-06, + "loss": 0.1494, + "step": 15464 + }, + { + "epoch": 2.1837051680316293, + "grad_norm": 3.5142204541464577, + "learning_rate": 3.638407488594916e-06, + "loss": 0.1474, + "step": 15465 + }, + { + "epoch": 2.183846371081615, + "grad_norm": 3.1809566022361935, + "learning_rate": 3.637231358981341e-06, + "loss": 0.1577, + "step": 15466 + }, + { + "epoch": 2.183987574131601, + "grad_norm": 3.5099659995980295, + "learning_rate": 3.636055377235762e-06, + "loss": 0.1692, + "step": 15467 + }, + { + "epoch": 2.184128777181587, + "grad_norm": 3.2532870930739692, + "learning_rate": 3.6348795433855064e-06, + "loss": 0.1542, + "step": 15468 + }, + { + "epoch": 2.184269980231573, + "grad_norm": 3.2782038186335765, + "learning_rate": 3.6337038574578988e-06, + "loss": 0.1698, + "step": 15469 + }, + { + "epoch": 2.1844111832815587, + "grad_norm": 3.5817459850788276, + "learning_rate": 3.6325283194802675e-06, + "loss": 0.1871, + "step": 15470 + }, + { + "epoch": 2.1845523863315446, + "grad_norm": 2.6448900413850946, + "learning_rate": 3.6313529294799275e-06, + "loss": 0.16, + "step": 15471 + }, + { + "epoch": 2.1846935893815305, + "grad_norm": 2.5208165213065676, + "learning_rate": 3.6301776874841997e-06, + "loss": 0.1581, + "step": 15472 + }, + { + "epoch": 2.1848347924315163, + "grad_norm": 3.1225711057643695, + "learning_rate": 3.6290025935203864e-06, + "loss": 0.1898, + "step": 15473 + }, + { + "epoch": 2.1849759954815022, + "grad_norm": 3.403694052392181, + "learning_rate": 3.6278276476158025e-06, + "loss": 0.1458, + "step": 15474 + }, + { + "epoch": 2.185117198531488, + "grad_norm": 3.1226971520894713, + "learning_rate": 3.6266528497977495e-06, + "loss": 0.1704, + "step": 15475 + }, + { + "epoch": 2.185258401581474, + "grad_norm": 3.0105009340355338, + "learning_rate": 3.625478200093534e-06, + "loss": 0.1883, + "step": 15476 + }, + { + "epoch": 2.18539960463146, + "grad_norm": 3.2162702038479525, + "learning_rate": 3.6243036985304525e-06, + "loss": 0.1557, + "step": 15477 + }, + { + "epoch": 2.1855408076814458, + "grad_norm": 3.246406867101034, + "learning_rate": 3.6231293451357994e-06, + "loss": 0.1541, + "step": 15478 + }, + { + "epoch": 2.1856820107314316, + "grad_norm": 3.1570231730553378, + "learning_rate": 3.621955139936867e-06, + "loss": 0.1598, + "step": 15479 + }, + { + "epoch": 2.1858232137814175, + "grad_norm": 3.0622882854010336, + "learning_rate": 3.6207810829609414e-06, + "loss": 0.1443, + "step": 15480 + }, + { + "epoch": 2.1859644168314034, + "grad_norm": 3.2430220739027398, + "learning_rate": 3.6196071742353087e-06, + "loss": 0.1617, + "step": 15481 + }, + { + "epoch": 2.1861056198813893, + "grad_norm": 3.169693848274505, + "learning_rate": 3.6184334137872514e-06, + "loss": 0.1687, + "step": 15482 + }, + { + "epoch": 2.186246822931375, + "grad_norm": 2.869992721617655, + "learning_rate": 3.6172598016440443e-06, + "loss": 0.137, + "step": 15483 + }, + { + "epoch": 2.186388025981361, + "grad_norm": 3.3487323292351765, + "learning_rate": 3.6160863378329636e-06, + "loss": 0.1711, + "step": 15484 + }, + { + "epoch": 2.186529229031347, + "grad_norm": 2.8589989866442944, + "learning_rate": 3.614913022381279e-06, + "loss": 0.135, + "step": 15485 + }, + { + "epoch": 2.186670432081333, + "grad_norm": 2.3762411535808687, + "learning_rate": 3.613739855316257e-06, + "loss": 0.1128, + "step": 15486 + }, + { + "epoch": 2.1868116351313187, + "grad_norm": 3.4808161481006477, + "learning_rate": 3.6125668366651613e-06, + "loss": 0.1975, + "step": 15487 + }, + { + "epoch": 2.1869528381813046, + "grad_norm": 2.9210077402670014, + "learning_rate": 3.6113939664552567e-06, + "loss": 0.1385, + "step": 15488 + }, + { + "epoch": 2.1870940412312905, + "grad_norm": 3.288617646368304, + "learning_rate": 3.6102212447137995e-06, + "loss": 0.1307, + "step": 15489 + }, + { + "epoch": 2.1872352442812764, + "grad_norm": 3.410335665689771, + "learning_rate": 3.6090486714680396e-06, + "loss": 0.1823, + "step": 15490 + }, + { + "epoch": 2.1873764473312622, + "grad_norm": 3.660905365325739, + "learning_rate": 3.607876246745228e-06, + "loss": 0.202, + "step": 15491 + }, + { + "epoch": 2.187517650381248, + "grad_norm": 3.020272203245384, + "learning_rate": 3.606703970572607e-06, + "loss": 0.1671, + "step": 15492 + }, + { + "epoch": 2.187658853431234, + "grad_norm": 2.6932189650276976, + "learning_rate": 3.6055318429774287e-06, + "loss": 0.1351, + "step": 15493 + }, + { + "epoch": 2.18780005648122, + "grad_norm": 3.0913154482352074, + "learning_rate": 3.6043598639869293e-06, + "loss": 0.1663, + "step": 15494 + }, + { + "epoch": 2.1879412595312058, + "grad_norm": 2.7071655431665045, + "learning_rate": 3.6031880336283453e-06, + "loss": 0.1411, + "step": 15495 + }, + { + "epoch": 2.1880824625811917, + "grad_norm": 3.2039178833115938, + "learning_rate": 3.6020163519289077e-06, + "loss": 0.1524, + "step": 15496 + }, + { + "epoch": 2.1882236656311775, + "grad_norm": 3.205883948218298, + "learning_rate": 3.6008448189158472e-06, + "loss": 0.1371, + "step": 15497 + }, + { + "epoch": 2.1883648686811634, + "grad_norm": 2.8837696156068633, + "learning_rate": 3.5996734346163897e-06, + "loss": 0.1116, + "step": 15498 + }, + { + "epoch": 2.1885060717311493, + "grad_norm": 3.0605450916357606, + "learning_rate": 3.5985021990577575e-06, + "loss": 0.1363, + "step": 15499 + }, + { + "epoch": 2.188647274781135, + "grad_norm": 2.960532549716206, + "learning_rate": 3.5973311122671695e-06, + "loss": 0.1575, + "step": 15500 + }, + { + "epoch": 2.188788477831121, + "grad_norm": 2.556969891109087, + "learning_rate": 3.596160174271841e-06, + "loss": 0.1097, + "step": 15501 + }, + { + "epoch": 2.188929680881107, + "grad_norm": 2.393406266888482, + "learning_rate": 3.594989385098985e-06, + "loss": 0.0893, + "step": 15502 + }, + { + "epoch": 2.189070883931093, + "grad_norm": 2.87106077466674, + "learning_rate": 3.5938187447758098e-06, + "loss": 0.1418, + "step": 15503 + }, + { + "epoch": 2.1892120869810787, + "grad_norm": 3.498501298338676, + "learning_rate": 3.592648253329516e-06, + "loss": 0.1455, + "step": 15504 + }, + { + "epoch": 2.1893532900310646, + "grad_norm": 3.312113969650062, + "learning_rate": 3.5914779107873145e-06, + "loss": 0.1903, + "step": 15505 + }, + { + "epoch": 2.1894944930810505, + "grad_norm": 3.3138531253366783, + "learning_rate": 3.590307717176401e-06, + "loss": 0.1548, + "step": 15506 + }, + { + "epoch": 2.1896356961310364, + "grad_norm": 3.4241178725784516, + "learning_rate": 3.5891376725239648e-06, + "loss": 0.1396, + "step": 15507 + }, + { + "epoch": 2.1897768991810223, + "grad_norm": 3.8812288976148923, + "learning_rate": 3.587967776857201e-06, + "loss": 0.1762, + "step": 15508 + }, + { + "epoch": 2.189918102231008, + "grad_norm": 3.0153213031071555, + "learning_rate": 3.5867980302032925e-06, + "loss": 0.1231, + "step": 15509 + }, + { + "epoch": 2.190059305280994, + "grad_norm": 3.689432467746225, + "learning_rate": 3.585628432589432e-06, + "loss": 0.1773, + "step": 15510 + }, + { + "epoch": 2.19020050833098, + "grad_norm": 4.087967401312385, + "learning_rate": 3.5844589840427968e-06, + "loss": 0.1768, + "step": 15511 + }, + { + "epoch": 2.190341711380966, + "grad_norm": 2.7355034519757053, + "learning_rate": 3.583289684590564e-06, + "loss": 0.1159, + "step": 15512 + }, + { + "epoch": 2.1904829144309517, + "grad_norm": 3.32114138797245, + "learning_rate": 3.5821205342599075e-06, + "loss": 0.132, + "step": 15513 + }, + { + "epoch": 2.1906241174809375, + "grad_norm": 3.3275525807685544, + "learning_rate": 3.5809515330779985e-06, + "loss": 0.1565, + "step": 15514 + }, + { + "epoch": 2.1907653205309234, + "grad_norm": 3.401397357156667, + "learning_rate": 3.579782681072004e-06, + "loss": 0.1589, + "step": 15515 + }, + { + "epoch": 2.1909065235809093, + "grad_norm": 3.896007963400888, + "learning_rate": 3.5786139782690867e-06, + "loss": 0.1544, + "step": 15516 + }, + { + "epoch": 2.191047726630895, + "grad_norm": 3.5068807068076473, + "learning_rate": 3.5774454246964075e-06, + "loss": 0.15, + "step": 15517 + }, + { + "epoch": 2.191188929680881, + "grad_norm": 2.346000358708473, + "learning_rate": 3.5762770203811225e-06, + "loss": 0.0986, + "step": 15518 + }, + { + "epoch": 2.191330132730867, + "grad_norm": 4.108947430965978, + "learning_rate": 3.5751087653503856e-06, + "loss": 0.1797, + "step": 15519 + }, + { + "epoch": 2.191471335780853, + "grad_norm": 3.210085783801295, + "learning_rate": 3.5739406596313474e-06, + "loss": 0.1647, + "step": 15520 + }, + { + "epoch": 2.1916125388308387, + "grad_norm": 3.149979224664698, + "learning_rate": 3.5727727032511483e-06, + "loss": 0.1503, + "step": 15521 + }, + { + "epoch": 2.1917537418808246, + "grad_norm": 3.0772933012260206, + "learning_rate": 3.57160489623694e-06, + "loss": 0.1483, + "step": 15522 + }, + { + "epoch": 2.1918949449308105, + "grad_norm": 3.0396943831990257, + "learning_rate": 3.5704372386158602e-06, + "loss": 0.1241, + "step": 15523 + }, + { + "epoch": 2.1920361479807964, + "grad_norm": 3.1724040118368615, + "learning_rate": 3.56926973041504e-06, + "loss": 0.1607, + "step": 15524 + }, + { + "epoch": 2.1921773510307823, + "grad_norm": 3.56742572941602, + "learning_rate": 3.5681023716616136e-06, + "loss": 0.2228, + "step": 15525 + }, + { + "epoch": 2.192318554080768, + "grad_norm": 3.046897843931735, + "learning_rate": 3.5669351623827077e-06, + "loss": 0.1642, + "step": 15526 + }, + { + "epoch": 2.192459757130754, + "grad_norm": 2.889145676504674, + "learning_rate": 3.5657681026054523e-06, + "loss": 0.1663, + "step": 15527 + }, + { + "epoch": 2.19260096018074, + "grad_norm": 3.598844002476634, + "learning_rate": 3.564601192356968e-06, + "loss": 0.1782, + "step": 15528 + }, + { + "epoch": 2.192742163230726, + "grad_norm": 3.6396644539254535, + "learning_rate": 3.5634344316643734e-06, + "loss": 0.1486, + "step": 15529 + }, + { + "epoch": 2.1928833662807117, + "grad_norm": 3.5003476093235104, + "learning_rate": 3.5622678205547824e-06, + "loss": 0.1812, + "step": 15530 + }, + { + "epoch": 2.1930245693306976, + "grad_norm": 4.349272828361988, + "learning_rate": 3.5611013590553066e-06, + "loss": 0.2196, + "step": 15531 + }, + { + "epoch": 2.1931657723806834, + "grad_norm": 3.1974056349920827, + "learning_rate": 3.559935047193055e-06, + "loss": 0.1188, + "step": 15532 + }, + { + "epoch": 2.1933069754306693, + "grad_norm": 3.158794859379246, + "learning_rate": 3.558768884995132e-06, + "loss": 0.1648, + "step": 15533 + }, + { + "epoch": 2.193448178480655, + "grad_norm": 2.9644594413014334, + "learning_rate": 3.557602872488638e-06, + "loss": 0.1459, + "step": 15534 + }, + { + "epoch": 2.193589381530641, + "grad_norm": 3.203096089221971, + "learning_rate": 3.5564370097006706e-06, + "loss": 0.1641, + "step": 15535 + }, + { + "epoch": 2.193730584580627, + "grad_norm": 3.29050921376092, + "learning_rate": 3.5552712966583247e-06, + "loss": 0.1559, + "step": 15536 + }, + { + "epoch": 2.193871787630613, + "grad_norm": 2.928743308749797, + "learning_rate": 3.5541057333886906e-06, + "loss": 0.1482, + "step": 15537 + }, + { + "epoch": 2.1940129906805987, + "grad_norm": 2.9213264167337436, + "learning_rate": 3.5529403199188517e-06, + "loss": 0.174, + "step": 15538 + }, + { + "epoch": 2.1941541937305846, + "grad_norm": 4.008914285312878, + "learning_rate": 3.5517750562759025e-06, + "loss": 0.1611, + "step": 15539 + }, + { + "epoch": 2.1942953967805705, + "grad_norm": 2.977424809507221, + "learning_rate": 3.5506099424869133e-06, + "loss": 0.1398, + "step": 15540 + }, + { + "epoch": 2.1944365998305564, + "grad_norm": 2.7763184853018874, + "learning_rate": 3.5494449785789633e-06, + "loss": 0.1376, + "step": 15541 + }, + { + "epoch": 2.1945778028805423, + "grad_norm": 2.6082195398341126, + "learning_rate": 3.5482801645791266e-06, + "loss": 0.1344, + "step": 15542 + }, + { + "epoch": 2.194719005930528, + "grad_norm": 3.660159219676567, + "learning_rate": 3.5471155005144685e-06, + "loss": 0.1815, + "step": 15543 + }, + { + "epoch": 2.194860208980514, + "grad_norm": 3.7277000069190387, + "learning_rate": 3.545950986412063e-06, + "loss": 0.1983, + "step": 15544 + }, + { + "epoch": 2.1950014120305, + "grad_norm": 3.239905666572818, + "learning_rate": 3.5447866222989703e-06, + "loss": 0.1383, + "step": 15545 + }, + { + "epoch": 2.195142615080486, + "grad_norm": 3.696004265027511, + "learning_rate": 3.5436224082022476e-06, + "loss": 0.1737, + "step": 15546 + }, + { + "epoch": 2.1952838181304717, + "grad_norm": 3.6919058263885693, + "learning_rate": 3.5424583441489525e-06, + "loss": 0.1749, + "step": 15547 + }, + { + "epoch": 2.1954250211804576, + "grad_norm": 2.9146650316629095, + "learning_rate": 3.5412944301661356e-06, + "loss": 0.1354, + "step": 15548 + }, + { + "epoch": 2.1955662242304435, + "grad_norm": 2.8525384281847868, + "learning_rate": 3.5401306662808476e-06, + "loss": 0.1452, + "step": 15549 + }, + { + "epoch": 2.1957074272804293, + "grad_norm": 3.4674071214332702, + "learning_rate": 3.5389670525201335e-06, + "loss": 0.1594, + "step": 15550 + }, + { + "epoch": 2.195848630330415, + "grad_norm": 3.744334989956807, + "learning_rate": 3.537803588911034e-06, + "loss": 0.168, + "step": 15551 + }, + { + "epoch": 2.195989833380401, + "grad_norm": 2.628210090421226, + "learning_rate": 3.5366402754805885e-06, + "loss": 0.111, + "step": 15552 + }, + { + "epoch": 2.196131036430387, + "grad_norm": 3.6926593068564064, + "learning_rate": 3.5354771122558317e-06, + "loss": 0.1899, + "step": 15553 + }, + { + "epoch": 2.196272239480373, + "grad_norm": 3.050958870320738, + "learning_rate": 3.5343140992637946e-06, + "loss": 0.1469, + "step": 15554 + }, + { + "epoch": 2.1964134425303588, + "grad_norm": 4.066587017311011, + "learning_rate": 3.5331512365315035e-06, + "loss": 0.1865, + "step": 15555 + }, + { + "epoch": 2.1965546455803446, + "grad_norm": 3.174462163781929, + "learning_rate": 3.531988524085991e-06, + "loss": 0.1781, + "step": 15556 + }, + { + "epoch": 2.1966958486303305, + "grad_norm": 2.958540575444288, + "learning_rate": 3.5308259619542675e-06, + "loss": 0.1338, + "step": 15557 + }, + { + "epoch": 2.1968370516803164, + "grad_norm": 3.4296430864089653, + "learning_rate": 3.5296635501633558e-06, + "loss": 0.1531, + "step": 15558 + }, + { + "epoch": 2.1969782547303023, + "grad_norm": 3.420116708503005, + "learning_rate": 3.5285012887402693e-06, + "loss": 0.1668, + "step": 15559 + }, + { + "epoch": 2.197119457780288, + "grad_norm": 2.604031384113819, + "learning_rate": 3.5273391777120136e-06, + "loss": 0.1446, + "step": 15560 + }, + { + "epoch": 2.197260660830274, + "grad_norm": 3.5575521341827065, + "learning_rate": 3.5261772171056043e-06, + "loss": 0.165, + "step": 15561 + }, + { + "epoch": 2.19740186388026, + "grad_norm": 2.5490296077341927, + "learning_rate": 3.525015406948039e-06, + "loss": 0.1243, + "step": 15562 + }, + { + "epoch": 2.197543066930246, + "grad_norm": 2.9491939267605267, + "learning_rate": 3.5238537472663214e-06, + "loss": 0.1397, + "step": 15563 + }, + { + "epoch": 2.1976842699802317, + "grad_norm": 2.79821879202331, + "learning_rate": 3.522692238087445e-06, + "loss": 0.1281, + "step": 15564 + }, + { + "epoch": 2.1978254730302176, + "grad_norm": 4.1682155702094414, + "learning_rate": 3.5215308794384073e-06, + "loss": 0.221, + "step": 15565 + }, + { + "epoch": 2.1979666760802035, + "grad_norm": 3.7080448116804634, + "learning_rate": 3.5203696713461866e-06, + "loss": 0.1559, + "step": 15566 + }, + { + "epoch": 2.1981078791301893, + "grad_norm": 2.792999718789201, + "learning_rate": 3.5192086138377803e-06, + "loss": 0.1457, + "step": 15567 + }, + { + "epoch": 2.1982490821801752, + "grad_norm": 3.6272383634385177, + "learning_rate": 3.5180477069401664e-06, + "loss": 0.1744, + "step": 15568 + }, + { + "epoch": 2.198390285230161, + "grad_norm": 3.53276842630283, + "learning_rate": 3.516886950680326e-06, + "loss": 0.198, + "step": 15569 + }, + { + "epoch": 2.198531488280147, + "grad_norm": 2.527188342883634, + "learning_rate": 3.515726345085232e-06, + "loss": 0.1339, + "step": 15570 + }, + { + "epoch": 2.198672691330133, + "grad_norm": 2.9393858275727687, + "learning_rate": 3.514565890181857e-06, + "loss": 0.1429, + "step": 15571 + }, + { + "epoch": 2.1988138943801188, + "grad_norm": 3.6430317408716233, + "learning_rate": 3.5134055859971674e-06, + "loss": 0.1757, + "step": 15572 + }, + { + "epoch": 2.1989550974301046, + "grad_norm": 3.649663171762288, + "learning_rate": 3.5122454325581368e-06, + "loss": 0.1391, + "step": 15573 + }, + { + "epoch": 2.1990963004800905, + "grad_norm": 3.5075618484218074, + "learning_rate": 3.511085429891716e-06, + "loss": 0.1453, + "step": 15574 + }, + { + "epoch": 2.1992375035300764, + "grad_norm": 2.6316673569793068, + "learning_rate": 3.5099255780248696e-06, + "loss": 0.1384, + "step": 15575 + }, + { + "epoch": 2.1993787065800623, + "grad_norm": 3.39901094578447, + "learning_rate": 3.508765876984549e-06, + "loss": 0.1402, + "step": 15576 + }, + { + "epoch": 2.199519909630048, + "grad_norm": 3.047342225131142, + "learning_rate": 3.5076063267977055e-06, + "loss": 0.1509, + "step": 15577 + }, + { + "epoch": 2.199661112680034, + "grad_norm": 3.932402688371889, + "learning_rate": 3.5064469274912847e-06, + "loss": 0.2017, + "step": 15578 + }, + { + "epoch": 2.19980231573002, + "grad_norm": 2.6601033492229433, + "learning_rate": 3.5052876790922364e-06, + "loss": 0.144, + "step": 15579 + }, + { + "epoch": 2.199943518780006, + "grad_norm": 3.4448177441183034, + "learning_rate": 3.504128581627497e-06, + "loss": 0.1826, + "step": 15580 + }, + { + "epoch": 2.2000847218299917, + "grad_norm": 3.2706589267061683, + "learning_rate": 3.5029696351240038e-06, + "loss": 0.1807, + "step": 15581 + }, + { + "epoch": 2.2002259248799776, + "grad_norm": 2.742753398924691, + "learning_rate": 3.5018108396086945e-06, + "loss": 0.174, + "step": 15582 + }, + { + "epoch": 2.2003671279299635, + "grad_norm": 2.953193861342629, + "learning_rate": 3.5006521951084873e-06, + "loss": 0.1661, + "step": 15583 + }, + { + "epoch": 2.2005083309799494, + "grad_norm": 2.526300017828238, + "learning_rate": 3.4994937016503206e-06, + "loss": 0.1097, + "step": 15584 + }, + { + "epoch": 2.2006495340299352, + "grad_norm": 4.370387192762125, + "learning_rate": 3.4983353592611124e-06, + "loss": 0.2159, + "step": 15585 + }, + { + "epoch": 2.200790737079921, + "grad_norm": 2.9884731553413197, + "learning_rate": 3.4971771679677826e-06, + "loss": 0.1593, + "step": 15586 + }, + { + "epoch": 2.200931940129907, + "grad_norm": 3.4606977705397877, + "learning_rate": 3.4960191277972466e-06, + "loss": 0.1884, + "step": 15587 + }, + { + "epoch": 2.201073143179893, + "grad_norm": 2.9784144373373094, + "learning_rate": 3.494861238776418e-06, + "loss": 0.1246, + "step": 15588 + }, + { + "epoch": 2.2012143462298788, + "grad_norm": 3.3163326437687464, + "learning_rate": 3.493703500932205e-06, + "loss": 0.1451, + "step": 15589 + }, + { + "epoch": 2.2013555492798647, + "grad_norm": 2.5299314026716004, + "learning_rate": 3.492545914291512e-06, + "loss": 0.1484, + "step": 15590 + }, + { + "epoch": 2.2014967523298505, + "grad_norm": 3.720526483263536, + "learning_rate": 3.4913884788812426e-06, + "loss": 0.2066, + "step": 15591 + }, + { + "epoch": 2.2016379553798364, + "grad_norm": 2.8546504844420375, + "learning_rate": 3.490231194728293e-06, + "loss": 0.1573, + "step": 15592 + }, + { + "epoch": 2.2017791584298223, + "grad_norm": 3.145963293252708, + "learning_rate": 3.4890740618595597e-06, + "loss": 0.1495, + "step": 15593 + }, + { + "epoch": 2.201920361479808, + "grad_norm": 2.6492356628309106, + "learning_rate": 3.4879170803019336e-06, + "loss": 0.1589, + "step": 15594 + }, + { + "epoch": 2.202061564529794, + "grad_norm": 3.4177223058894612, + "learning_rate": 3.4867602500822984e-06, + "loss": 0.1905, + "step": 15595 + }, + { + "epoch": 2.20220276757978, + "grad_norm": 3.2071947879860376, + "learning_rate": 3.485603571227545e-06, + "loss": 0.146, + "step": 15596 + }, + { + "epoch": 2.202343970629766, + "grad_norm": 2.604289033682239, + "learning_rate": 3.4844470437645515e-06, + "loss": 0.1496, + "step": 15597 + }, + { + "epoch": 2.2024851736797517, + "grad_norm": 3.3015462211953333, + "learning_rate": 3.483290667720196e-06, + "loss": 0.1804, + "step": 15598 + }, + { + "epoch": 2.202626376729737, + "grad_norm": 3.141592440690824, + "learning_rate": 3.4821344431213533e-06, + "loss": 0.1309, + "step": 15599 + }, + { + "epoch": 2.202767579779723, + "grad_norm": 3.2791581681809157, + "learning_rate": 3.480978369994885e-06, + "loss": 0.186, + "step": 15600 + }, + { + "epoch": 2.202908782829709, + "grad_norm": 3.794432429743621, + "learning_rate": 3.4798224483676667e-06, + "loss": 0.2188, + "step": 15601 + }, + { + "epoch": 2.203049985879695, + "grad_norm": 2.818365810982154, + "learning_rate": 3.478666678266559e-06, + "loss": 0.158, + "step": 15602 + }, + { + "epoch": 2.2031911889296807, + "grad_norm": 2.877744586637356, + "learning_rate": 3.4775110597184226e-06, + "loss": 0.1725, + "step": 15603 + }, + { + "epoch": 2.2033323919796666, + "grad_norm": 3.206000978097044, + "learning_rate": 3.4763555927501113e-06, + "loss": 0.153, + "step": 15604 + }, + { + "epoch": 2.2034735950296525, + "grad_norm": 3.1125095190367604, + "learning_rate": 3.4752002773884784e-06, + "loss": 0.1737, + "step": 15605 + }, + { + "epoch": 2.2036147980796383, + "grad_norm": 3.0557712796713203, + "learning_rate": 3.4740451136603737e-06, + "loss": 0.1476, + "step": 15606 + }, + { + "epoch": 2.203756001129624, + "grad_norm": 3.5228812255998383, + "learning_rate": 3.472890101592642e-06, + "loss": 0.1831, + "step": 15607 + }, + { + "epoch": 2.20389720417961, + "grad_norm": 2.4463382648254317, + "learning_rate": 3.4717352412121254e-06, + "loss": 0.1241, + "step": 15608 + }, + { + "epoch": 2.204038407229596, + "grad_norm": 2.3047849335814425, + "learning_rate": 3.4705805325456632e-06, + "loss": 0.1304, + "step": 15609 + }, + { + "epoch": 2.204179610279582, + "grad_norm": 2.7671663549180487, + "learning_rate": 3.4694259756200888e-06, + "loss": 0.1214, + "step": 15610 + }, + { + "epoch": 2.2043208133295678, + "grad_norm": 3.154662128019658, + "learning_rate": 3.468271570462235e-06, + "loss": 0.1813, + "step": 15611 + }, + { + "epoch": 2.2044620163795536, + "grad_norm": 3.921755315844086, + "learning_rate": 3.467117317098925e-06, + "loss": 0.1957, + "step": 15612 + }, + { + "epoch": 2.2046032194295395, + "grad_norm": 3.4731848085552617, + "learning_rate": 3.465963215556991e-06, + "loss": 0.13, + "step": 15613 + }, + { + "epoch": 2.2047444224795254, + "grad_norm": 3.136391809862563, + "learning_rate": 3.4648092658632506e-06, + "loss": 0.1492, + "step": 15614 + }, + { + "epoch": 2.2048856255295113, + "grad_norm": 3.7541042780481337, + "learning_rate": 3.463655468044519e-06, + "loss": 0.178, + "step": 15615 + }, + { + "epoch": 2.205026828579497, + "grad_norm": 2.9272664229772407, + "learning_rate": 3.4625018221276165e-06, + "loss": 0.1399, + "step": 15616 + }, + { + "epoch": 2.205168031629483, + "grad_norm": 2.870549763141004, + "learning_rate": 3.46134832813934e-06, + "loss": 0.1595, + "step": 15617 + }, + { + "epoch": 2.205309234679469, + "grad_norm": 2.544576975705254, + "learning_rate": 3.4601949861065086e-06, + "loss": 0.1311, + "step": 15618 + }, + { + "epoch": 2.205450437729455, + "grad_norm": 3.069497534194778, + "learning_rate": 3.459041796055922e-06, + "loss": 0.1586, + "step": 15619 + }, + { + "epoch": 2.2055916407794407, + "grad_norm": 3.1570581807545515, + "learning_rate": 3.4578887580143793e-06, + "loss": 0.1427, + "step": 15620 + }, + { + "epoch": 2.2057328438294266, + "grad_norm": 3.286948367213691, + "learning_rate": 3.4567358720086753e-06, + "loss": 0.1467, + "step": 15621 + }, + { + "epoch": 2.2058740468794125, + "grad_norm": 2.9849117498271327, + "learning_rate": 3.4555831380656044e-06, + "loss": 0.1221, + "step": 15622 + }, + { + "epoch": 2.2060152499293983, + "grad_norm": 3.879846768865645, + "learning_rate": 3.4544305562119562e-06, + "loss": 0.1821, + "step": 15623 + }, + { + "epoch": 2.2061564529793842, + "grad_norm": 3.679068931812458, + "learning_rate": 3.453278126474513e-06, + "loss": 0.1598, + "step": 15624 + }, + { + "epoch": 2.20629765602937, + "grad_norm": 3.1669463318673126, + "learning_rate": 3.45212584888006e-06, + "loss": 0.1704, + "step": 15625 + }, + { + "epoch": 2.206438859079356, + "grad_norm": 3.573749137795458, + "learning_rate": 3.4509737234553752e-06, + "loss": 0.2042, + "step": 15626 + }, + { + "epoch": 2.206580062129342, + "grad_norm": 4.138196376238599, + "learning_rate": 3.4498217502272313e-06, + "loss": 0.2017, + "step": 15627 + }, + { + "epoch": 2.2067212651793278, + "grad_norm": 3.1905621942805835, + "learning_rate": 3.4486699292224022e-06, + "loss": 0.1498, + "step": 15628 + }, + { + "epoch": 2.2068624682293136, + "grad_norm": 3.091015484893001, + "learning_rate": 3.4475182604676505e-06, + "loss": 0.1624, + "step": 15629 + }, + { + "epoch": 2.2070036712792995, + "grad_norm": 3.788176280245027, + "learning_rate": 3.4463667439897486e-06, + "loss": 0.1888, + "step": 15630 + }, + { + "epoch": 2.2071448743292854, + "grad_norm": 4.032984696661167, + "learning_rate": 3.445215379815453e-06, + "loss": 0.2018, + "step": 15631 + }, + { + "epoch": 2.2072860773792713, + "grad_norm": 3.1215908446261396, + "learning_rate": 3.4440641679715204e-06, + "loss": 0.1527, + "step": 15632 + }, + { + "epoch": 2.207427280429257, + "grad_norm": 3.2332655314644745, + "learning_rate": 3.44291310848471e-06, + "loss": 0.1586, + "step": 15633 + }, + { + "epoch": 2.207568483479243, + "grad_norm": 3.312279772390816, + "learning_rate": 3.4417622013817597e-06, + "loss": 0.1738, + "step": 15634 + }, + { + "epoch": 2.207709686529229, + "grad_norm": 2.832093298271664, + "learning_rate": 3.4406114466894267e-06, + "loss": 0.1421, + "step": 15635 + }, + { + "epoch": 2.207850889579215, + "grad_norm": 3.9741969377328177, + "learning_rate": 3.4394608444344514e-06, + "loss": 0.1664, + "step": 15636 + }, + { + "epoch": 2.2079920926292007, + "grad_norm": 3.07307251897038, + "learning_rate": 3.4383103946435727e-06, + "loss": 0.1267, + "step": 15637 + }, + { + "epoch": 2.2081332956791866, + "grad_norm": 3.050982191181699, + "learning_rate": 3.437160097343526e-06, + "loss": 0.1488, + "step": 15638 + }, + { + "epoch": 2.2082744987291725, + "grad_norm": 2.5997733705230006, + "learning_rate": 3.4360099525610457e-06, + "loss": 0.1345, + "step": 15639 + }, + { + "epoch": 2.2084157017791584, + "grad_norm": 3.001367584192087, + "learning_rate": 3.4348599603228584e-06, + "loss": 0.1721, + "step": 15640 + }, + { + "epoch": 2.2085569048291442, + "grad_norm": 3.3351313907572355, + "learning_rate": 3.43371012065569e-06, + "loss": 0.1912, + "step": 15641 + }, + { + "epoch": 2.20869810787913, + "grad_norm": 3.029817811029279, + "learning_rate": 3.432560433586264e-06, + "loss": 0.1428, + "step": 15642 + }, + { + "epoch": 2.208839310929116, + "grad_norm": 3.6562105950354757, + "learning_rate": 3.4314108991412962e-06, + "loss": 0.1633, + "step": 15643 + }, + { + "epoch": 2.208980513979102, + "grad_norm": 3.092036722756669, + "learning_rate": 3.4302615173475017e-06, + "loss": 0.1451, + "step": 15644 + }, + { + "epoch": 2.2091217170290878, + "grad_norm": 3.9595077309896407, + "learning_rate": 3.429112288231594e-06, + "loss": 0.2064, + "step": 15645 + }, + { + "epoch": 2.2092629200790737, + "grad_norm": 4.028040810081268, + "learning_rate": 3.4279632118202744e-06, + "loss": 0.1862, + "step": 15646 + }, + { + "epoch": 2.2094041231290595, + "grad_norm": 3.350279285656922, + "learning_rate": 3.4268142881402556e-06, + "loss": 0.1603, + "step": 15647 + }, + { + "epoch": 2.2095453261790454, + "grad_norm": 2.9739180107893053, + "learning_rate": 3.4256655172182328e-06, + "loss": 0.162, + "step": 15648 + }, + { + "epoch": 2.2096865292290313, + "grad_norm": 3.262406257037821, + "learning_rate": 3.424516899080905e-06, + "loss": 0.195, + "step": 15649 + }, + { + "epoch": 2.209827732279017, + "grad_norm": 3.388567344602636, + "learning_rate": 3.423368433754969e-06, + "loss": 0.1289, + "step": 15650 + }, + { + "epoch": 2.209968935329003, + "grad_norm": 3.0903830180643848, + "learning_rate": 3.4222201212671026e-06, + "loss": 0.152, + "step": 15651 + }, + { + "epoch": 2.210110138378989, + "grad_norm": 2.858756922274672, + "learning_rate": 3.4210719616440025e-06, + "loss": 0.1717, + "step": 15652 + }, + { + "epoch": 2.210251341428975, + "grad_norm": 3.6556604304406863, + "learning_rate": 3.4199239549123497e-06, + "loss": 0.1778, + "step": 15653 + }, + { + "epoch": 2.2103925444789607, + "grad_norm": 3.724922685326456, + "learning_rate": 3.418776101098823e-06, + "loss": 0.181, + "step": 15654 + }, + { + "epoch": 2.2105337475289466, + "grad_norm": 3.537408361320555, + "learning_rate": 3.4176284002300963e-06, + "loss": 0.1876, + "step": 15655 + }, + { + "epoch": 2.2106749505789325, + "grad_norm": 3.3643299438794307, + "learning_rate": 3.416480852332844e-06, + "loss": 0.1657, + "step": 15656 + }, + { + "epoch": 2.2108161536289184, + "grad_norm": 2.5821043413242806, + "learning_rate": 3.415333457433733e-06, + "loss": 0.1603, + "step": 15657 + }, + { + "epoch": 2.2109573566789043, + "grad_norm": 2.656925866407468, + "learning_rate": 3.4141862155594283e-06, + "loss": 0.1511, + "step": 15658 + }, + { + "epoch": 2.21109855972889, + "grad_norm": 3.268806224645829, + "learning_rate": 3.4130391267365926e-06, + "loss": 0.1627, + "step": 15659 + }, + { + "epoch": 2.211239762778876, + "grad_norm": 3.2957430994450516, + "learning_rate": 3.411892190991882e-06, + "loss": 0.1641, + "step": 15660 + }, + { + "epoch": 2.211380965828862, + "grad_norm": 2.800099091748301, + "learning_rate": 3.410745408351952e-06, + "loss": 0.1089, + "step": 15661 + }, + { + "epoch": 2.211522168878848, + "grad_norm": 2.611094992840617, + "learning_rate": 3.4095987788434538e-06, + "loss": 0.1506, + "step": 15662 + }, + { + "epoch": 2.2116633719288337, + "grad_norm": 3.6397298474767545, + "learning_rate": 3.4084523024930295e-06, + "loss": 0.1734, + "step": 15663 + }, + { + "epoch": 2.2118045749788195, + "grad_norm": 3.214676487361643, + "learning_rate": 3.407305979327331e-06, + "loss": 0.1821, + "step": 15664 + }, + { + "epoch": 2.2119457780288054, + "grad_norm": 3.45248651732651, + "learning_rate": 3.4061598093729942e-06, + "loss": 0.1696, + "step": 15665 + }, + { + "epoch": 2.2120869810787913, + "grad_norm": 3.562286855656202, + "learning_rate": 3.405013792656655e-06, + "loss": 0.2142, + "step": 15666 + }, + { + "epoch": 2.212228184128777, + "grad_norm": 2.952491991553645, + "learning_rate": 3.4038679292049516e-06, + "loss": 0.1368, + "step": 15667 + }, + { + "epoch": 2.212369387178763, + "grad_norm": 2.8960036063708365, + "learning_rate": 3.402722219044505e-06, + "loss": 0.1605, + "step": 15668 + }, + { + "epoch": 2.212510590228749, + "grad_norm": 2.7518029862601128, + "learning_rate": 3.401576662201942e-06, + "loss": 0.1374, + "step": 15669 + }, + { + "epoch": 2.212651793278735, + "grad_norm": 3.542001261614874, + "learning_rate": 3.40043125870389e-06, + "loss": 0.1816, + "step": 15670 + }, + { + "epoch": 2.2127929963287207, + "grad_norm": 3.278195091539961, + "learning_rate": 3.3992860085769665e-06, + "loss": 0.1882, + "step": 15671 + }, + { + "epoch": 2.2129341993787066, + "grad_norm": 3.670846777473275, + "learning_rate": 3.3981409118477847e-06, + "loss": 0.1549, + "step": 15672 + }, + { + "epoch": 2.2130754024286925, + "grad_norm": 3.041137694025137, + "learning_rate": 3.3969959685429575e-06, + "loss": 0.1461, + "step": 15673 + }, + { + "epoch": 2.2132166054786784, + "grad_norm": 2.596643663606384, + "learning_rate": 3.3958511786890923e-06, + "loss": 0.1394, + "step": 15674 + }, + { + "epoch": 2.2133578085286643, + "grad_norm": 3.5255164401121224, + "learning_rate": 3.3947065423127933e-06, + "loss": 0.1966, + "step": 15675 + }, + { + "epoch": 2.21349901157865, + "grad_norm": 2.8280480584871106, + "learning_rate": 3.3935620594406614e-06, + "loss": 0.1318, + "step": 15676 + }, + { + "epoch": 2.213640214628636, + "grad_norm": 3.4198892023183824, + "learning_rate": 3.3924177300992956e-06, + "loss": 0.1669, + "step": 15677 + }, + { + "epoch": 2.213781417678622, + "grad_norm": 3.9889423897436664, + "learning_rate": 3.3912735543152864e-06, + "loss": 0.2316, + "step": 15678 + }, + { + "epoch": 2.213922620728608, + "grad_norm": 3.2195569890920184, + "learning_rate": 3.3901295321152273e-06, + "loss": 0.1557, + "step": 15679 + }, + { + "epoch": 2.2140638237785937, + "grad_norm": 3.7673095821594615, + "learning_rate": 3.3889856635257024e-06, + "loss": 0.2016, + "step": 15680 + }, + { + "epoch": 2.2142050268285796, + "grad_norm": 3.095199991575782, + "learning_rate": 3.3878419485732915e-06, + "loss": 0.1684, + "step": 15681 + }, + { + "epoch": 2.2143462298785654, + "grad_norm": 2.6545902731109505, + "learning_rate": 3.3866983872845826e-06, + "loss": 0.1361, + "step": 15682 + }, + { + "epoch": 2.2144874329285513, + "grad_norm": 2.308066488363653, + "learning_rate": 3.3855549796861476e-06, + "loss": 0.1236, + "step": 15683 + }, + { + "epoch": 2.214628635978537, + "grad_norm": 3.6116324993092492, + "learning_rate": 3.3844117258045604e-06, + "loss": 0.1873, + "step": 15684 + }, + { + "epoch": 2.214769839028523, + "grad_norm": 3.0545957303844244, + "learning_rate": 3.383268625666385e-06, + "loss": 0.1434, + "step": 15685 + }, + { + "epoch": 2.214911042078509, + "grad_norm": 2.9313223858955197, + "learning_rate": 3.382125679298185e-06, + "loss": 0.128, + "step": 15686 + }, + { + "epoch": 2.215052245128495, + "grad_norm": 3.3385319862182197, + "learning_rate": 3.3809828867265305e-06, + "loss": 0.1878, + "step": 15687 + }, + { + "epoch": 2.2151934481784807, + "grad_norm": 3.245513740761717, + "learning_rate": 3.3798402479779747e-06, + "loss": 0.1969, + "step": 15688 + }, + { + "epoch": 2.2153346512284666, + "grad_norm": 2.8316793996429515, + "learning_rate": 3.378697763079073e-06, + "loss": 0.1149, + "step": 15689 + }, + { + "epoch": 2.2154758542784525, + "grad_norm": 3.1631492443029723, + "learning_rate": 3.3775554320563753e-06, + "loss": 0.1541, + "step": 15690 + }, + { + "epoch": 2.2156170573284384, + "grad_norm": 3.693464537336146, + "learning_rate": 3.3764132549364292e-06, + "loss": 0.1553, + "step": 15691 + }, + { + "epoch": 2.2157582603784243, + "grad_norm": 4.036733371291441, + "learning_rate": 3.375271231745779e-06, + "loss": 0.2444, + "step": 15692 + }, + { + "epoch": 2.21589946342841, + "grad_norm": 3.6903047302979597, + "learning_rate": 3.3741293625109637e-06, + "loss": 0.1982, + "step": 15693 + }, + { + "epoch": 2.216040666478396, + "grad_norm": 3.1248599482853967, + "learning_rate": 3.372987647258521e-06, + "loss": 0.1411, + "step": 15694 + }, + { + "epoch": 2.216181869528382, + "grad_norm": 3.3021161750669927, + "learning_rate": 3.3718460860149837e-06, + "loss": 0.1436, + "step": 15695 + }, + { + "epoch": 2.216323072578368, + "grad_norm": 2.9327442605610896, + "learning_rate": 3.370704678806881e-06, + "loss": 0.1363, + "step": 15696 + }, + { + "epoch": 2.2164642756283537, + "grad_norm": 3.2576245442388636, + "learning_rate": 3.369563425660738e-06, + "loss": 0.1326, + "step": 15697 + }, + { + "epoch": 2.2166054786783396, + "grad_norm": 2.608601636073404, + "learning_rate": 3.368422326603075e-06, + "loss": 0.1618, + "step": 15698 + }, + { + "epoch": 2.2167466817283255, + "grad_norm": 2.9267272403692055, + "learning_rate": 3.3672813816604155e-06, + "loss": 0.1838, + "step": 15699 + }, + { + "epoch": 2.2168878847783113, + "grad_norm": 2.6470409405898794, + "learning_rate": 3.366140590859276e-06, + "loss": 0.1233, + "step": 15700 + }, + { + "epoch": 2.217029087828297, + "grad_norm": 3.188742773804492, + "learning_rate": 3.3649999542261604e-06, + "loss": 0.1515, + "step": 15701 + }, + { + "epoch": 2.217170290878283, + "grad_norm": 2.4595490228561974, + "learning_rate": 3.3638594717875807e-06, + "loss": 0.1654, + "step": 15702 + }, + { + "epoch": 2.217311493928269, + "grad_norm": 3.3076970065592373, + "learning_rate": 3.362719143570038e-06, + "loss": 0.1737, + "step": 15703 + }, + { + "epoch": 2.217452696978255, + "grad_norm": 3.338391469243303, + "learning_rate": 3.361578969600039e-06, + "loss": 0.1788, + "step": 15704 + }, + { + "epoch": 2.2175939000282407, + "grad_norm": 2.8281440340202875, + "learning_rate": 3.360438949904078e-06, + "loss": 0.1507, + "step": 15705 + }, + { + "epoch": 2.2177351030782266, + "grad_norm": 4.294600393660219, + "learning_rate": 3.359299084508648e-06, + "loss": 0.2204, + "step": 15706 + }, + { + "epoch": 2.2178763061282125, + "grad_norm": 3.4297215318704386, + "learning_rate": 3.3581593734402397e-06, + "loss": 0.1463, + "step": 15707 + }, + { + "epoch": 2.2180175091781984, + "grad_norm": 2.6719295412169664, + "learning_rate": 3.3570198167253386e-06, + "loss": 0.1609, + "step": 15708 + }, + { + "epoch": 2.2181587122281843, + "grad_norm": 2.660703693790063, + "learning_rate": 3.355880414390429e-06, + "loss": 0.1419, + "step": 15709 + }, + { + "epoch": 2.21829991527817, + "grad_norm": 3.646976458465951, + "learning_rate": 3.354741166461989e-06, + "loss": 0.1764, + "step": 15710 + }, + { + "epoch": 2.218441118328156, + "grad_norm": 3.9364138475962283, + "learning_rate": 3.353602072966494e-06, + "loss": 0.2237, + "step": 15711 + }, + { + "epoch": 2.218582321378142, + "grad_norm": 3.4177395692243637, + "learning_rate": 3.352463133930417e-06, + "loss": 0.1595, + "step": 15712 + }, + { + "epoch": 2.218723524428128, + "grad_norm": 2.3873253761224067, + "learning_rate": 3.351324349380226e-06, + "loss": 0.1001, + "step": 15713 + }, + { + "epoch": 2.2188647274781137, + "grad_norm": 2.9148826388815126, + "learning_rate": 3.350185719342385e-06, + "loss": 0.1376, + "step": 15714 + }, + { + "epoch": 2.2190059305280996, + "grad_norm": 2.405205692890868, + "learning_rate": 3.349047243843353e-06, + "loss": 0.1486, + "step": 15715 + }, + { + "epoch": 2.2191471335780855, + "grad_norm": 2.616417958798766, + "learning_rate": 3.347908922909594e-06, + "loss": 0.1229, + "step": 15716 + }, + { + "epoch": 2.2192883366280713, + "grad_norm": 2.885408835520429, + "learning_rate": 3.346770756567562e-06, + "loss": 0.1106, + "step": 15717 + }, + { + "epoch": 2.2194295396780572, + "grad_norm": 3.181057840772467, + "learning_rate": 3.345632744843702e-06, + "loss": 0.1203, + "step": 15718 + }, + { + "epoch": 2.219570742728043, + "grad_norm": 3.2419250683903758, + "learning_rate": 3.344494887764462e-06, + "loss": 0.1388, + "step": 15719 + }, + { + "epoch": 2.219711945778029, + "grad_norm": 3.821736947692024, + "learning_rate": 3.343357185356284e-06, + "loss": 0.1585, + "step": 15720 + }, + { + "epoch": 2.219853148828015, + "grad_norm": 3.3239355777948334, + "learning_rate": 3.342219637645614e-06, + "loss": 0.1676, + "step": 15721 + }, + { + "epoch": 2.2199943518780008, + "grad_norm": 3.6430035692325418, + "learning_rate": 3.3410822446588833e-06, + "loss": 0.2172, + "step": 15722 + }, + { + "epoch": 2.2201355549279866, + "grad_norm": 3.403442294379391, + "learning_rate": 3.339945006422526e-06, + "loss": 0.1393, + "step": 15723 + }, + { + "epoch": 2.2202767579779725, + "grad_norm": 2.913081381127837, + "learning_rate": 3.33880792296297e-06, + "loss": 0.1571, + "step": 15724 + }, + { + "epoch": 2.2204179610279584, + "grad_norm": 3.49595504738506, + "learning_rate": 3.337670994306641e-06, + "loss": 0.1607, + "step": 15725 + }, + { + "epoch": 2.220559164077944, + "grad_norm": 2.8436860530913535, + "learning_rate": 3.3365342204799613e-06, + "loss": 0.1621, + "step": 15726 + }, + { + "epoch": 2.2207003671279297, + "grad_norm": 3.4043610527765504, + "learning_rate": 3.3353976015093492e-06, + "loss": 0.1527, + "step": 15727 + }, + { + "epoch": 2.2208415701779156, + "grad_norm": 3.2610050998026456, + "learning_rate": 3.3342611374212176e-06, + "loss": 0.1661, + "step": 15728 + }, + { + "epoch": 2.2209827732279015, + "grad_norm": 3.401444687883845, + "learning_rate": 3.333124828241978e-06, + "loss": 0.1811, + "step": 15729 + }, + { + "epoch": 2.2211239762778874, + "grad_norm": 3.466748811619931, + "learning_rate": 3.33198867399804e-06, + "loss": 0.1527, + "step": 15730 + }, + { + "epoch": 2.2212651793278733, + "grad_norm": 2.641741944783832, + "learning_rate": 3.3308526747158045e-06, + "loss": 0.1417, + "step": 15731 + }, + { + "epoch": 2.221406382377859, + "grad_norm": 3.371473758765488, + "learning_rate": 3.3297168304216686e-06, + "loss": 0.1445, + "step": 15732 + }, + { + "epoch": 2.221547585427845, + "grad_norm": 2.6942970632325043, + "learning_rate": 3.3285811411420376e-06, + "loss": 0.1019, + "step": 15733 + }, + { + "epoch": 2.221688788477831, + "grad_norm": 3.3082701614976147, + "learning_rate": 3.3274456069033024e-06, + "loss": 0.1399, + "step": 15734 + }, + { + "epoch": 2.221829991527817, + "grad_norm": 4.015160064827532, + "learning_rate": 3.326310227731846e-06, + "loss": 0.2104, + "step": 15735 + }, + { + "epoch": 2.2219711945778027, + "grad_norm": 3.613621652353988, + "learning_rate": 3.3251750036540585e-06, + "loss": 0.1947, + "step": 15736 + }, + { + "epoch": 2.2221123976277886, + "grad_norm": 3.3326432832376613, + "learning_rate": 3.324039934696317e-06, + "loss": 0.1408, + "step": 15737 + }, + { + "epoch": 2.2222536006777744, + "grad_norm": 2.876790017961229, + "learning_rate": 3.322905020885009e-06, + "loss": 0.136, + "step": 15738 + }, + { + "epoch": 2.2223948037277603, + "grad_norm": 3.518112132181415, + "learning_rate": 3.321770262246503e-06, + "loss": 0.2031, + "step": 15739 + }, + { + "epoch": 2.222536006777746, + "grad_norm": 2.7449825368553658, + "learning_rate": 3.3206356588071733e-06, + "loss": 0.1441, + "step": 15740 + }, + { + "epoch": 2.222677209827732, + "grad_norm": 3.243134391336527, + "learning_rate": 3.3195012105933856e-06, + "loss": 0.1371, + "step": 15741 + }, + { + "epoch": 2.222818412877718, + "grad_norm": 3.973782071016101, + "learning_rate": 3.3183669176315046e-06, + "loss": 0.2039, + "step": 15742 + }, + { + "epoch": 2.222959615927704, + "grad_norm": 2.801072500632294, + "learning_rate": 3.317232779947891e-06, + "loss": 0.1478, + "step": 15743 + }, + { + "epoch": 2.2231008189776897, + "grad_norm": 3.1966330183294853, + "learning_rate": 3.3160987975689017e-06, + "loss": 0.1631, + "step": 15744 + }, + { + "epoch": 2.2232420220276756, + "grad_norm": 3.53213152390162, + "learning_rate": 3.3149649705208908e-06, + "loss": 0.1783, + "step": 15745 + }, + { + "epoch": 2.2233832250776615, + "grad_norm": 3.245032997211984, + "learning_rate": 3.3138312988302055e-06, + "loss": 0.1614, + "step": 15746 + }, + { + "epoch": 2.2235244281276474, + "grad_norm": 2.481262988513125, + "learning_rate": 3.312697782523193e-06, + "loss": 0.1006, + "step": 15747 + }, + { + "epoch": 2.2236656311776333, + "grad_norm": 3.0742634060552145, + "learning_rate": 3.3115644216261977e-06, + "loss": 0.1925, + "step": 15748 + }, + { + "epoch": 2.223806834227619, + "grad_norm": 2.656242117122283, + "learning_rate": 3.3104312161655516e-06, + "loss": 0.1176, + "step": 15749 + }, + { + "epoch": 2.223948037277605, + "grad_norm": 3.1514652220339525, + "learning_rate": 3.309298166167599e-06, + "loss": 0.136, + "step": 15750 + }, + { + "epoch": 2.224089240327591, + "grad_norm": 2.934619275953598, + "learning_rate": 3.308165271658672e-06, + "loss": 0.1342, + "step": 15751 + }, + { + "epoch": 2.224230443377577, + "grad_norm": 3.9173100613234175, + "learning_rate": 3.30703253266509e-06, + "loss": 0.1944, + "step": 15752 + }, + { + "epoch": 2.2243716464275627, + "grad_norm": 3.423566457899086, + "learning_rate": 3.3058999492131805e-06, + "loss": 0.1702, + "step": 15753 + }, + { + "epoch": 2.2245128494775486, + "grad_norm": 2.827232034685807, + "learning_rate": 3.304767521329263e-06, + "loss": 0.1436, + "step": 15754 + }, + { + "epoch": 2.2246540525275345, + "grad_norm": 3.3143109130086934, + "learning_rate": 3.3036352490396596e-06, + "loss": 0.1806, + "step": 15755 + }, + { + "epoch": 2.2247952555775203, + "grad_norm": 3.9504949170327253, + "learning_rate": 3.302503132370681e-06, + "loss": 0.1892, + "step": 15756 + }, + { + "epoch": 2.224936458627506, + "grad_norm": 3.2278424428399073, + "learning_rate": 3.3013711713486383e-06, + "loss": 0.165, + "step": 15757 + }, + { + "epoch": 2.225077661677492, + "grad_norm": 4.162476806025594, + "learning_rate": 3.3002393659998357e-06, + "loss": 0.1947, + "step": 15758 + }, + { + "epoch": 2.225218864727478, + "grad_norm": 2.5375440743812026, + "learning_rate": 3.299107716350577e-06, + "loss": 0.112, + "step": 15759 + }, + { + "epoch": 2.225360067777464, + "grad_norm": 3.8191121919592335, + "learning_rate": 3.2979762224271616e-06, + "loss": 0.1647, + "step": 15760 + }, + { + "epoch": 2.2255012708274498, + "grad_norm": 3.639624214346567, + "learning_rate": 3.2968448842558833e-06, + "loss": 0.1761, + "step": 15761 + }, + { + "epoch": 2.2256424738774356, + "grad_norm": 2.5112926778753173, + "learning_rate": 3.295713701863036e-06, + "loss": 0.1379, + "step": 15762 + }, + { + "epoch": 2.2257836769274215, + "grad_norm": 3.1773061138532532, + "learning_rate": 3.2945826752749068e-06, + "loss": 0.1446, + "step": 15763 + }, + { + "epoch": 2.2259248799774074, + "grad_norm": 3.282223576742086, + "learning_rate": 3.2934518045177798e-06, + "loss": 0.1809, + "step": 15764 + }, + { + "epoch": 2.2260660830273933, + "grad_norm": 3.1246810208531692, + "learning_rate": 3.2923210896179368e-06, + "loss": 0.1915, + "step": 15765 + }, + { + "epoch": 2.226207286077379, + "grad_norm": 2.6996383947306413, + "learning_rate": 3.291190530601651e-06, + "loss": 0.1199, + "step": 15766 + }, + { + "epoch": 2.226348489127365, + "grad_norm": 2.560053471344069, + "learning_rate": 3.2900601274952038e-06, + "loss": 0.1192, + "step": 15767 + }, + { + "epoch": 2.226489692177351, + "grad_norm": 2.848529329842376, + "learning_rate": 3.288929880324865e-06, + "loss": 0.1539, + "step": 15768 + }, + { + "epoch": 2.226630895227337, + "grad_norm": 3.370693737409179, + "learning_rate": 3.2877997891168944e-06, + "loss": 0.1623, + "step": 15769 + }, + { + "epoch": 2.2267720982773227, + "grad_norm": 3.2796455126552164, + "learning_rate": 3.2866698538975572e-06, + "loss": 0.157, + "step": 15770 + }, + { + "epoch": 2.2269133013273086, + "grad_norm": 3.553660648103173, + "learning_rate": 3.285540074693113e-06, + "loss": 0.2096, + "step": 15771 + }, + { + "epoch": 2.2270545043772945, + "grad_norm": 3.614546115684095, + "learning_rate": 3.284410451529816e-06, + "loss": 0.1524, + "step": 15772 + }, + { + "epoch": 2.2271957074272803, + "grad_norm": 3.574656513702258, + "learning_rate": 3.283280984433922e-06, + "loss": 0.1674, + "step": 15773 + }, + { + "epoch": 2.2273369104772662, + "grad_norm": 3.1688333827297575, + "learning_rate": 3.2821516734316772e-06, + "loss": 0.1192, + "step": 15774 + }, + { + "epoch": 2.227478113527252, + "grad_norm": 2.9402583307325436, + "learning_rate": 3.281022518549326e-06, + "loss": 0.1741, + "step": 15775 + }, + { + "epoch": 2.227619316577238, + "grad_norm": 2.6930249940212367, + "learning_rate": 3.279893519813111e-06, + "loss": 0.1265, + "step": 15776 + }, + { + "epoch": 2.227760519627224, + "grad_norm": 2.531940110547849, + "learning_rate": 3.2787646772492678e-06, + "loss": 0.1459, + "step": 15777 + }, + { + "epoch": 2.2279017226772098, + "grad_norm": 2.9759601258806985, + "learning_rate": 3.2776359908840306e-06, + "loss": 0.1577, + "step": 15778 + }, + { + "epoch": 2.2280429257271956, + "grad_norm": 2.446051303653981, + "learning_rate": 3.27650746074363e-06, + "loss": 0.1298, + "step": 15779 + }, + { + "epoch": 2.2281841287771815, + "grad_norm": 2.9814563690726206, + "learning_rate": 3.275379086854292e-06, + "loss": 0.1364, + "step": 15780 + }, + { + "epoch": 2.2283253318271674, + "grad_norm": 4.507686672899418, + "learning_rate": 3.2742508692422405e-06, + "loss": 0.2231, + "step": 15781 + }, + { + "epoch": 2.2284665348771533, + "grad_norm": 3.2229993667142156, + "learning_rate": 3.2731228079336928e-06, + "loss": 0.1572, + "step": 15782 + }, + { + "epoch": 2.228607737927139, + "grad_norm": 3.874126947162224, + "learning_rate": 3.271994902954867e-06, + "loss": 0.1598, + "step": 15783 + }, + { + "epoch": 2.228748940977125, + "grad_norm": 3.1945813095760403, + "learning_rate": 3.2708671543319712e-06, + "loss": 0.1678, + "step": 15784 + }, + { + "epoch": 2.228890144027111, + "grad_norm": 2.9808193684312334, + "learning_rate": 3.2697395620912233e-06, + "loss": 0.1405, + "step": 15785 + }, + { + "epoch": 2.229031347077097, + "grad_norm": 3.4930336564464652, + "learning_rate": 3.2686121262588165e-06, + "loss": 0.1851, + "step": 15786 + }, + { + "epoch": 2.2291725501270827, + "grad_norm": 2.576363944572552, + "learning_rate": 3.267484846860959e-06, + "loss": 0.1348, + "step": 15787 + }, + { + "epoch": 2.2293137531770686, + "grad_norm": 3.299302836018291, + "learning_rate": 3.2663577239238443e-06, + "loss": 0.1475, + "step": 15788 + }, + { + "epoch": 2.2294549562270545, + "grad_norm": 3.802476557226168, + "learning_rate": 3.2652307574736652e-06, + "loss": 0.1494, + "step": 15789 + }, + { + "epoch": 2.2295961592770404, + "grad_norm": 3.0891643024726365, + "learning_rate": 3.264103947536619e-06, + "loss": 0.162, + "step": 15790 + }, + { + "epoch": 2.2297373623270262, + "grad_norm": 3.220751587552492, + "learning_rate": 3.262977294138887e-06, + "loss": 0.1764, + "step": 15791 + }, + { + "epoch": 2.229878565377012, + "grad_norm": 3.600283631454464, + "learning_rate": 3.2618507973066536e-06, + "loss": 0.2034, + "step": 15792 + }, + { + "epoch": 2.230019768426998, + "grad_norm": 3.832411048974628, + "learning_rate": 3.2607244570660966e-06, + "loss": 0.1606, + "step": 15793 + }, + { + "epoch": 2.230160971476984, + "grad_norm": 3.452655645634177, + "learning_rate": 3.259598273443394e-06, + "loss": 0.1431, + "step": 15794 + }, + { + "epoch": 2.2303021745269698, + "grad_norm": 3.3750096297459704, + "learning_rate": 3.2584722464647167e-06, + "loss": 0.1548, + "step": 15795 + }, + { + "epoch": 2.2304433775769557, + "grad_norm": 3.876893663678782, + "learning_rate": 3.257346376156233e-06, + "loss": 0.2217, + "step": 15796 + }, + { + "epoch": 2.2305845806269415, + "grad_norm": 3.516298596902472, + "learning_rate": 3.2562206625441073e-06, + "loss": 0.1586, + "step": 15797 + }, + { + "epoch": 2.2307257836769274, + "grad_norm": 5.100071104683038, + "learning_rate": 3.2550951056545e-06, + "loss": 0.2107, + "step": 15798 + }, + { + "epoch": 2.2308669867269133, + "grad_norm": 3.0635435297438267, + "learning_rate": 3.253969705513571e-06, + "loss": 0.1619, + "step": 15799 + }, + { + "epoch": 2.231008189776899, + "grad_norm": 3.290102134201919, + "learning_rate": 3.252844462147472e-06, + "loss": 0.1503, + "step": 15800 + }, + { + "epoch": 2.231149392826885, + "grad_norm": 3.2213203841735387, + "learning_rate": 3.2517193755823506e-06, + "loss": 0.1785, + "step": 15801 + }, + { + "epoch": 2.231290595876871, + "grad_norm": 3.4559718305140263, + "learning_rate": 3.2505944458443626e-06, + "loss": 0.1921, + "step": 15802 + }, + { + "epoch": 2.231431798926857, + "grad_norm": 3.043782857459016, + "learning_rate": 3.249469672959642e-06, + "loss": 0.1291, + "step": 15803 + }, + { + "epoch": 2.2315730019768427, + "grad_norm": 3.476721674804354, + "learning_rate": 3.248345056954332e-06, + "loss": 0.1599, + "step": 15804 + }, + { + "epoch": 2.2317142050268286, + "grad_norm": 3.541982370811625, + "learning_rate": 3.2472205978545657e-06, + "loss": 0.2124, + "step": 15805 + }, + { + "epoch": 2.2318554080768145, + "grad_norm": 2.2358291536121007, + "learning_rate": 3.2460962956864727e-06, + "loss": 0.1296, + "step": 15806 + }, + { + "epoch": 2.2319966111268004, + "grad_norm": 3.1897054814495296, + "learning_rate": 3.24497215047619e-06, + "loss": 0.1411, + "step": 15807 + }, + { + "epoch": 2.2321378141767863, + "grad_norm": 3.792611424106291, + "learning_rate": 3.243848162249835e-06, + "loss": 0.1887, + "step": 15808 + }, + { + "epoch": 2.232279017226772, + "grad_norm": 3.3794472879664452, + "learning_rate": 3.2427243310335333e-06, + "loss": 0.1672, + "step": 15809 + }, + { + "epoch": 2.232420220276758, + "grad_norm": 3.806036255943824, + "learning_rate": 3.2416006568533987e-06, + "loss": 0.1769, + "step": 15810 + }, + { + "epoch": 2.232561423326744, + "grad_norm": 2.6768019808929924, + "learning_rate": 3.240477139735546e-06, + "loss": 0.1371, + "step": 15811 + }, + { + "epoch": 2.23270262637673, + "grad_norm": 2.689263077890952, + "learning_rate": 3.239353779706086e-06, + "loss": 0.1479, + "step": 15812 + }, + { + "epoch": 2.2328438294267157, + "grad_norm": 3.0010048157067715, + "learning_rate": 3.238230576791124e-06, + "loss": 0.141, + "step": 15813 + }, + { + "epoch": 2.2329850324767015, + "grad_norm": 3.008188105246033, + "learning_rate": 3.2371075310167634e-06, + "loss": 0.1622, + "step": 15814 + }, + { + "epoch": 2.2331262355266874, + "grad_norm": 3.830703862528595, + "learning_rate": 3.2359846424091025e-06, + "loss": 0.1755, + "step": 15815 + }, + { + "epoch": 2.2332674385766733, + "grad_norm": 2.394735671310006, + "learning_rate": 3.234861910994238e-06, + "loss": 0.1174, + "step": 15816 + }, + { + "epoch": 2.233408641626659, + "grad_norm": 3.199872236203357, + "learning_rate": 3.2337393367982604e-06, + "loss": 0.1567, + "step": 15817 + }, + { + "epoch": 2.233549844676645, + "grad_norm": 3.119621940053606, + "learning_rate": 3.2326169198472555e-06, + "loss": 0.136, + "step": 15818 + }, + { + "epoch": 2.233691047726631, + "grad_norm": 3.541852655813336, + "learning_rate": 3.2314946601673182e-06, + "loss": 0.2019, + "step": 15819 + }, + { + "epoch": 2.233832250776617, + "grad_norm": 2.822158488349907, + "learning_rate": 3.230372557784518e-06, + "loss": 0.1494, + "step": 15820 + }, + { + "epoch": 2.2339734538266027, + "grad_norm": 3.0618241337314602, + "learning_rate": 3.229250612724936e-06, + "loss": 0.1732, + "step": 15821 + }, + { + "epoch": 2.2341146568765886, + "grad_norm": 2.9978500780495603, + "learning_rate": 3.2281288250146447e-06, + "loss": 0.1627, + "step": 15822 + }, + { + "epoch": 2.2342558599265745, + "grad_norm": 2.718509759523067, + "learning_rate": 3.2270071946797133e-06, + "loss": 0.1194, + "step": 15823 + }, + { + "epoch": 2.2343970629765604, + "grad_norm": 2.903898698654555, + "learning_rate": 3.2258857217462115e-06, + "loss": 0.1357, + "step": 15824 + }, + { + "epoch": 2.2345382660265463, + "grad_norm": 3.209175156048876, + "learning_rate": 3.2247644062402006e-06, + "loss": 0.1678, + "step": 15825 + }, + { + "epoch": 2.234679469076532, + "grad_norm": 3.0851691796309133, + "learning_rate": 3.22364324818774e-06, + "loss": 0.1597, + "step": 15826 + }, + { + "epoch": 2.234820672126518, + "grad_norm": 3.1755785771990475, + "learning_rate": 3.222522247614882e-06, + "loss": 0.1214, + "step": 15827 + }, + { + "epoch": 2.234961875176504, + "grad_norm": 2.978499115420236, + "learning_rate": 3.2214014045476815e-06, + "loss": 0.1444, + "step": 15828 + }, + { + "epoch": 2.23510307822649, + "grad_norm": 2.722058577816949, + "learning_rate": 3.2202807190121845e-06, + "loss": 0.1455, + "step": 15829 + }, + { + "epoch": 2.2352442812764757, + "grad_norm": 2.9257897412642007, + "learning_rate": 3.219160191034435e-06, + "loss": 0.1611, + "step": 15830 + }, + { + "epoch": 2.2353854843264616, + "grad_norm": 3.1998483477656205, + "learning_rate": 3.218039820640475e-06, + "loss": 0.1486, + "step": 15831 + }, + { + "epoch": 2.2355266873764474, + "grad_norm": 2.6025966837140926, + "learning_rate": 3.2169196078563403e-06, + "loss": 0.1406, + "step": 15832 + }, + { + "epoch": 2.2356678904264333, + "grad_norm": 2.8673524021998356, + "learning_rate": 3.2157995527080643e-06, + "loss": 0.1319, + "step": 15833 + }, + { + "epoch": 2.235809093476419, + "grad_norm": 3.07294829306463, + "learning_rate": 3.2146796552216773e-06, + "loss": 0.1709, + "step": 15834 + }, + { + "epoch": 2.235950296526405, + "grad_norm": 3.84976571884867, + "learning_rate": 3.213559915423201e-06, + "loss": 0.1822, + "step": 15835 + }, + { + "epoch": 2.236091499576391, + "grad_norm": 2.662954528506825, + "learning_rate": 3.212440333338668e-06, + "loss": 0.1354, + "step": 15836 + }, + { + "epoch": 2.236232702626377, + "grad_norm": 2.758285163170542, + "learning_rate": 3.2113209089940877e-06, + "loss": 0.1187, + "step": 15837 + }, + { + "epoch": 2.2363739056763627, + "grad_norm": 3.097287271170392, + "learning_rate": 3.210201642415477e-06, + "loss": 0.1322, + "step": 15838 + }, + { + "epoch": 2.2365151087263486, + "grad_norm": 3.8483336559767096, + "learning_rate": 3.209082533628848e-06, + "loss": 0.1818, + "step": 15839 + }, + { + "epoch": 2.2366563117763345, + "grad_norm": 3.5069573421109856, + "learning_rate": 3.2079635826602053e-06, + "loss": 0.1837, + "step": 15840 + }, + { + "epoch": 2.2367975148263204, + "grad_norm": 3.392051636355483, + "learning_rate": 3.2068447895355583e-06, + "loss": 0.1193, + "step": 15841 + }, + { + "epoch": 2.2369387178763063, + "grad_norm": 3.1196356479350125, + "learning_rate": 3.205726154280905e-06, + "loss": 0.1255, + "step": 15842 + }, + { + "epoch": 2.237079920926292, + "grad_norm": 3.3370370270753327, + "learning_rate": 3.2046076769222424e-06, + "loss": 0.1579, + "step": 15843 + }, + { + "epoch": 2.237221123976278, + "grad_norm": 3.3159125755142957, + "learning_rate": 3.203489357485562e-06, + "loss": 0.1346, + "step": 15844 + }, + { + "epoch": 2.237362327026264, + "grad_norm": 3.2045150794590387, + "learning_rate": 3.2023711959968564e-06, + "loss": 0.145, + "step": 15845 + }, + { + "epoch": 2.23750353007625, + "grad_norm": 3.749461074359461, + "learning_rate": 3.201253192482102e-06, + "loss": 0.1758, + "step": 15846 + }, + { + "epoch": 2.2376447331262357, + "grad_norm": 2.781934547777391, + "learning_rate": 3.2001353469672915e-06, + "loss": 0.1207, + "step": 15847 + }, + { + "epoch": 2.2377859361762216, + "grad_norm": 3.1923845545123077, + "learning_rate": 3.1990176594783983e-06, + "loss": 0.1899, + "step": 15848 + }, + { + "epoch": 2.2379271392262075, + "grad_norm": 3.2078944156858484, + "learning_rate": 3.197900130041398e-06, + "loss": 0.139, + "step": 15849 + }, + { + "epoch": 2.2380683422761933, + "grad_norm": 4.0143361773549, + "learning_rate": 3.196782758682261e-06, + "loss": 0.1908, + "step": 15850 + }, + { + "epoch": 2.238209545326179, + "grad_norm": 2.3972979350786545, + "learning_rate": 3.1956655454269546e-06, + "loss": 0.1161, + "step": 15851 + }, + { + "epoch": 2.238350748376165, + "grad_norm": 2.8943112271287945, + "learning_rate": 3.194548490301439e-06, + "loss": 0.1606, + "step": 15852 + }, + { + "epoch": 2.238491951426151, + "grad_norm": 3.334731102435365, + "learning_rate": 3.193431593331684e-06, + "loss": 0.17, + "step": 15853 + }, + { + "epoch": 2.238633154476137, + "grad_norm": 2.7965698448827427, + "learning_rate": 3.1923148545436357e-06, + "loss": 0.13, + "step": 15854 + }, + { + "epoch": 2.2387743575261227, + "grad_norm": 3.9895113855189606, + "learning_rate": 3.1911982739632497e-06, + "loss": 0.2183, + "step": 15855 + }, + { + "epoch": 2.2389155605761086, + "grad_norm": 3.210887499823668, + "learning_rate": 3.1900818516164766e-06, + "loss": 0.192, + "step": 15856 + }, + { + "epoch": 2.2390567636260945, + "grad_norm": 3.257334984010536, + "learning_rate": 3.1889655875292593e-06, + "loss": 0.1381, + "step": 15857 + }, + { + "epoch": 2.2391979666760804, + "grad_norm": 2.9906469517929506, + "learning_rate": 3.1878494817275374e-06, + "loss": 0.11, + "step": 15858 + }, + { + "epoch": 2.2393391697260663, + "grad_norm": 3.811559738745294, + "learning_rate": 3.186733534237255e-06, + "loss": 0.1829, + "step": 15859 + }, + { + "epoch": 2.239480372776052, + "grad_norm": 3.8492309941615224, + "learning_rate": 3.185617745084343e-06, + "loss": 0.1815, + "step": 15860 + }, + { + "epoch": 2.239621575826038, + "grad_norm": 3.461277293738979, + "learning_rate": 3.184502114294734e-06, + "loss": 0.127, + "step": 15861 + }, + { + "epoch": 2.239762778876024, + "grad_norm": 3.1402572340250834, + "learning_rate": 3.1833866418943503e-06, + "loss": 0.1462, + "step": 15862 + }, + { + "epoch": 2.23990398192601, + "grad_norm": 3.2871019226399145, + "learning_rate": 3.182271327909113e-06, + "loss": 0.1333, + "step": 15863 + }, + { + "epoch": 2.2400451849759957, + "grad_norm": 3.4959349004449773, + "learning_rate": 3.1811561723649496e-06, + "loss": 0.1747, + "step": 15864 + }, + { + "epoch": 2.2401863880259816, + "grad_norm": 4.445077663584838, + "learning_rate": 3.1800411752877714e-06, + "loss": 0.1761, + "step": 15865 + }, + { + "epoch": 2.2403275910759675, + "grad_norm": 3.0816792748174855, + "learning_rate": 3.1789263367034918e-06, + "loss": 0.1652, + "step": 15866 + }, + { + "epoch": 2.2404687941259533, + "grad_norm": 4.434447591311851, + "learning_rate": 3.177811656638018e-06, + "loss": 0.2141, + "step": 15867 + }, + { + "epoch": 2.2406099971759392, + "grad_norm": 4.966880514867716, + "learning_rate": 3.1766971351172547e-06, + "loss": 0.1809, + "step": 15868 + }, + { + "epoch": 2.240751200225925, + "grad_norm": 3.423051853674513, + "learning_rate": 3.1755827721670996e-06, + "loss": 0.1367, + "step": 15869 + }, + { + "epoch": 2.240892403275911, + "grad_norm": 3.1112951251115755, + "learning_rate": 3.174468567813461e-06, + "loss": 0.1604, + "step": 15870 + }, + { + "epoch": 2.2410336063258964, + "grad_norm": 3.233901127699001, + "learning_rate": 3.1733545220822215e-06, + "loss": 0.1897, + "step": 15871 + }, + { + "epoch": 2.2411748093758823, + "grad_norm": 2.9739009789151507, + "learning_rate": 3.172240634999275e-06, + "loss": 0.1371, + "step": 15872 + }, + { + "epoch": 2.241316012425868, + "grad_norm": 3.064408607564881, + "learning_rate": 3.171126906590507e-06, + "loss": 0.1674, + "step": 15873 + }, + { + "epoch": 2.241457215475854, + "grad_norm": 3.3950684455346467, + "learning_rate": 3.170013336881801e-06, + "loss": 0.155, + "step": 15874 + }, + { + "epoch": 2.24159841852584, + "grad_norm": 3.809174389804828, + "learning_rate": 3.1688999258990318e-06, + "loss": 0.164, + "step": 15875 + }, + { + "epoch": 2.241739621575826, + "grad_norm": 2.74010217457674, + "learning_rate": 3.167786673668082e-06, + "loss": 0.1476, + "step": 15876 + }, + { + "epoch": 2.2418808246258117, + "grad_norm": 3.215284281306833, + "learning_rate": 3.1666735802148185e-06, + "loss": 0.1566, + "step": 15877 + }, + { + "epoch": 2.2420220276757976, + "grad_norm": 3.7626124595794437, + "learning_rate": 3.1655606455651134e-06, + "loss": 0.1963, + "step": 15878 + }, + { + "epoch": 2.2421632307257835, + "grad_norm": 3.3043046952199657, + "learning_rate": 3.1644478697448245e-06, + "loss": 0.1443, + "step": 15879 + }, + { + "epoch": 2.2423044337757694, + "grad_norm": 3.152287193813122, + "learning_rate": 3.163335252779811e-06, + "loss": 0.17, + "step": 15880 + }, + { + "epoch": 2.2424456368257553, + "grad_norm": 3.0468035283569805, + "learning_rate": 3.1622227946959374e-06, + "loss": 0.1416, + "step": 15881 + }, + { + "epoch": 2.242586839875741, + "grad_norm": 3.1232788711366166, + "learning_rate": 3.161110495519053e-06, + "loss": 0.1328, + "step": 15882 + }, + { + "epoch": 2.242728042925727, + "grad_norm": 2.7699712923709914, + "learning_rate": 3.159998355275008e-06, + "loss": 0.1397, + "step": 15883 + }, + { + "epoch": 2.242869245975713, + "grad_norm": 2.9541531176428526, + "learning_rate": 3.1588863739896457e-06, + "loss": 0.1604, + "step": 15884 + }, + { + "epoch": 2.243010449025699, + "grad_norm": 5.173710067010855, + "learning_rate": 3.1577745516888103e-06, + "loss": 0.2172, + "step": 15885 + }, + { + "epoch": 2.2431516520756847, + "grad_norm": 3.1815845762067734, + "learning_rate": 3.1566628883983395e-06, + "loss": 0.1577, + "step": 15886 + }, + { + "epoch": 2.2432928551256706, + "grad_norm": 3.302840927747817, + "learning_rate": 3.1555513841440686e-06, + "loss": 0.1287, + "step": 15887 + }, + { + "epoch": 2.2434340581756564, + "grad_norm": 3.012746810947023, + "learning_rate": 3.154440038951827e-06, + "loss": 0.1243, + "step": 15888 + }, + { + "epoch": 2.2435752612256423, + "grad_norm": 3.041528129081475, + "learning_rate": 3.1533288528474416e-06, + "loss": 0.1277, + "step": 15889 + }, + { + "epoch": 2.243716464275628, + "grad_norm": 3.0470392290795747, + "learning_rate": 3.152217825856738e-06, + "loss": 0.1717, + "step": 15890 + }, + { + "epoch": 2.243857667325614, + "grad_norm": 3.2505469831367506, + "learning_rate": 3.1511069580055343e-06, + "loss": 0.1781, + "step": 15891 + }, + { + "epoch": 2.2439988703756, + "grad_norm": 3.3793950873713072, + "learning_rate": 3.1499962493196446e-06, + "loss": 0.1635, + "step": 15892 + }, + { + "epoch": 2.244140073425586, + "grad_norm": 3.7653232216057524, + "learning_rate": 3.1488856998248864e-06, + "loss": 0.1676, + "step": 15893 + }, + { + "epoch": 2.2442812764755717, + "grad_norm": 2.8224492854927288, + "learning_rate": 3.1477753095470654e-06, + "loss": 0.1236, + "step": 15894 + }, + { + "epoch": 2.2444224795255576, + "grad_norm": 3.2063279978494497, + "learning_rate": 3.14666507851199e-06, + "loss": 0.1603, + "step": 15895 + }, + { + "epoch": 2.2445636825755435, + "grad_norm": 2.424023335894876, + "learning_rate": 3.1455550067454555e-06, + "loss": 0.1352, + "step": 15896 + }, + { + "epoch": 2.2447048856255294, + "grad_norm": 3.7022919323429315, + "learning_rate": 3.1444450942732594e-06, + "loss": 0.1466, + "step": 15897 + }, + { + "epoch": 2.2448460886755153, + "grad_norm": 3.4593736347396633, + "learning_rate": 3.143335341121202e-06, + "loss": 0.1435, + "step": 15898 + }, + { + "epoch": 2.244987291725501, + "grad_norm": 3.0199088302280175, + "learning_rate": 3.142225747315071e-06, + "loss": 0.1321, + "step": 15899 + }, + { + "epoch": 2.245128494775487, + "grad_norm": 3.184749326140049, + "learning_rate": 3.1411163128806497e-06, + "loss": 0.1628, + "step": 15900 + }, + { + "epoch": 2.245269697825473, + "grad_norm": 3.295091311281202, + "learning_rate": 3.1400070378437253e-06, + "loss": 0.1658, + "step": 15901 + }, + { + "epoch": 2.245410900875459, + "grad_norm": 4.093782775114767, + "learning_rate": 3.138897922230074e-06, + "loss": 0.1932, + "step": 15902 + }, + { + "epoch": 2.2455521039254447, + "grad_norm": 2.405948643867988, + "learning_rate": 3.1377889660654712e-06, + "loss": 0.1042, + "step": 15903 + }, + { + "epoch": 2.2456933069754306, + "grad_norm": 3.016171103988904, + "learning_rate": 3.13668016937569e-06, + "loss": 0.1745, + "step": 15904 + }, + { + "epoch": 2.2458345100254165, + "grad_norm": 4.092768364385749, + "learning_rate": 3.1355715321864978e-06, + "loss": 0.1805, + "step": 15905 + }, + { + "epoch": 2.2459757130754023, + "grad_norm": 3.941052728568223, + "learning_rate": 3.1344630545236576e-06, + "loss": 0.1824, + "step": 15906 + }, + { + "epoch": 2.246116916125388, + "grad_norm": 3.3009733451825936, + "learning_rate": 3.1333547364129324e-06, + "loss": 0.1525, + "step": 15907 + }, + { + "epoch": 2.246258119175374, + "grad_norm": 3.6683328966442095, + "learning_rate": 3.132246577880077e-06, + "loss": 0.1555, + "step": 15908 + }, + { + "epoch": 2.24639932222536, + "grad_norm": 2.71014464828207, + "learning_rate": 3.131138578950842e-06, + "loss": 0.1253, + "step": 15909 + }, + { + "epoch": 2.246540525275346, + "grad_norm": 3.0675011168195123, + "learning_rate": 3.1300307396509833e-06, + "loss": 0.1323, + "step": 15910 + }, + { + "epoch": 2.2466817283253318, + "grad_norm": 4.337292705587969, + "learning_rate": 3.1289230600062427e-06, + "loss": 0.1847, + "step": 15911 + }, + { + "epoch": 2.2468229313753176, + "grad_norm": 3.284005147548238, + "learning_rate": 3.1278155400423673e-06, + "loss": 0.1471, + "step": 15912 + }, + { + "epoch": 2.2469641344253035, + "grad_norm": 3.425738761719479, + "learning_rate": 3.1267081797850862e-06, + "loss": 0.1798, + "step": 15913 + }, + { + "epoch": 2.2471053374752894, + "grad_norm": 3.465761075890443, + "learning_rate": 3.125600979260136e-06, + "loss": 0.179, + "step": 15914 + }, + { + "epoch": 2.2472465405252753, + "grad_norm": 3.289471314331126, + "learning_rate": 3.1244939384932537e-06, + "loss": 0.1636, + "step": 15915 + }, + { + "epoch": 2.247387743575261, + "grad_norm": 2.852613123414985, + "learning_rate": 3.123387057510162e-06, + "loss": 0.1335, + "step": 15916 + }, + { + "epoch": 2.247528946625247, + "grad_norm": 2.9120573353869497, + "learning_rate": 3.122280336336587e-06, + "loss": 0.1565, + "step": 15917 + }, + { + "epoch": 2.247670149675233, + "grad_norm": 3.239016036494344, + "learning_rate": 3.121173774998245e-06, + "loss": 0.1511, + "step": 15918 + }, + { + "epoch": 2.247811352725219, + "grad_norm": 3.6861611062135835, + "learning_rate": 3.1200673735208555e-06, + "loss": 0.1654, + "step": 15919 + }, + { + "epoch": 2.2479525557752047, + "grad_norm": 3.604016945906634, + "learning_rate": 3.118961131930127e-06, + "loss": 0.1529, + "step": 15920 + }, + { + "epoch": 2.2480937588251906, + "grad_norm": 3.418576965347991, + "learning_rate": 3.1178550502517725e-06, + "loss": 0.1689, + "step": 15921 + }, + { + "epoch": 2.2482349618751765, + "grad_norm": 3.1445663175799012, + "learning_rate": 3.1167491285114928e-06, + "loss": 0.1435, + "step": 15922 + }, + { + "epoch": 2.2483761649251623, + "grad_norm": 3.5229814517803804, + "learning_rate": 3.1156433667349907e-06, + "loss": 0.1661, + "step": 15923 + }, + { + "epoch": 2.2485173679751482, + "grad_norm": 3.6752757964804696, + "learning_rate": 3.1145377649479635e-06, + "loss": 0.1609, + "step": 15924 + }, + { + "epoch": 2.248658571025134, + "grad_norm": 2.680679589476101, + "learning_rate": 3.1134323231761064e-06, + "loss": 0.148, + "step": 15925 + }, + { + "epoch": 2.24879977407512, + "grad_norm": 2.8394880152347746, + "learning_rate": 3.1123270414451035e-06, + "loss": 0.154, + "step": 15926 + }, + { + "epoch": 2.248940977125106, + "grad_norm": 2.5030502874232106, + "learning_rate": 3.1112219197806492e-06, + "loss": 0.1417, + "step": 15927 + }, + { + "epoch": 2.2490821801750918, + "grad_norm": 3.408623693516801, + "learning_rate": 3.110116958208422e-06, + "loss": 0.1409, + "step": 15928 + }, + { + "epoch": 2.2492233832250776, + "grad_norm": 2.725913603722745, + "learning_rate": 3.1090121567541052e-06, + "loss": 0.1295, + "step": 15929 + }, + { + "epoch": 2.2493645862750635, + "grad_norm": 3.1812211474243535, + "learning_rate": 3.107907515443367e-06, + "loss": 0.2012, + "step": 15930 + }, + { + "epoch": 2.2495057893250494, + "grad_norm": 2.7095059986685874, + "learning_rate": 3.1068030343018773e-06, + "loss": 0.1511, + "step": 15931 + }, + { + "epoch": 2.2496469923750353, + "grad_norm": 3.730795415951922, + "learning_rate": 3.1056987133553118e-06, + "loss": 0.1667, + "step": 15932 + }, + { + "epoch": 2.249788195425021, + "grad_norm": 2.9546025696526916, + "learning_rate": 3.1045945526293307e-06, + "loss": 0.143, + "step": 15933 + }, + { + "epoch": 2.249929398475007, + "grad_norm": 2.9821731714340096, + "learning_rate": 3.103490552149595e-06, + "loss": 0.1519, + "step": 15934 + }, + { + "epoch": 2.250070601524993, + "grad_norm": 2.7749955406618474, + "learning_rate": 3.1023867119417595e-06, + "loss": 0.1658, + "step": 15935 + }, + { + "epoch": 2.250211804574979, + "grad_norm": 2.719883160818092, + "learning_rate": 3.1012830320314793e-06, + "loss": 0.1338, + "step": 15936 + }, + { + "epoch": 2.2503530076249647, + "grad_norm": 2.877843803498858, + "learning_rate": 3.1001795124444003e-06, + "loss": 0.1459, + "step": 15937 + }, + { + "epoch": 2.2504942106749506, + "grad_norm": 3.1247153215527987, + "learning_rate": 3.0990761532061707e-06, + "loss": 0.1487, + "step": 15938 + }, + { + "epoch": 2.2506354137249365, + "grad_norm": 3.5857319961021603, + "learning_rate": 3.097972954342431e-06, + "loss": 0.1488, + "step": 15939 + }, + { + "epoch": 2.2507766167749224, + "grad_norm": 2.777287054092524, + "learning_rate": 3.0968699158788185e-06, + "loss": 0.1348, + "step": 15940 + }, + { + "epoch": 2.2509178198249082, + "grad_norm": 2.5823167660859796, + "learning_rate": 3.095767037840969e-06, + "loss": 0.1207, + "step": 15941 + }, + { + "epoch": 2.251059022874894, + "grad_norm": 2.662811348252803, + "learning_rate": 3.0946643202545113e-06, + "loss": 0.1344, + "step": 15942 + }, + { + "epoch": 2.25120022592488, + "grad_norm": 3.0392760124487426, + "learning_rate": 3.0935617631450686e-06, + "loss": 0.1548, + "step": 15943 + }, + { + "epoch": 2.251341428974866, + "grad_norm": 3.3129323799772803, + "learning_rate": 3.092459366538272e-06, + "loss": 0.1369, + "step": 15944 + }, + { + "epoch": 2.2514826320248518, + "grad_norm": 4.3771175647428295, + "learning_rate": 3.0913571304597367e-06, + "loss": 0.2273, + "step": 15945 + }, + { + "epoch": 2.2516238350748377, + "grad_norm": 3.6558673535206707, + "learning_rate": 3.090255054935081e-06, + "loss": 0.1701, + "step": 15946 + }, + { + "epoch": 2.2517650381248235, + "grad_norm": 3.3488803535084783, + "learning_rate": 3.08915313998991e-06, + "loss": 0.197, + "step": 15947 + }, + { + "epoch": 2.2519062411748094, + "grad_norm": 3.0106710633937874, + "learning_rate": 3.0880513856498363e-06, + "loss": 0.1539, + "step": 15948 + }, + { + "epoch": 2.2520474442247953, + "grad_norm": 2.9668269133519654, + "learning_rate": 3.08694979194046e-06, + "loss": 0.1347, + "step": 15949 + }, + { + "epoch": 2.252188647274781, + "grad_norm": 2.6103102428897524, + "learning_rate": 3.0858483588873878e-06, + "loss": 0.1256, + "step": 15950 + }, + { + "epoch": 2.252329850324767, + "grad_norm": 2.6959023154672925, + "learning_rate": 3.0847470865162143e-06, + "loss": 0.1128, + "step": 15951 + }, + { + "epoch": 2.252471053374753, + "grad_norm": 4.2879187031012975, + "learning_rate": 3.0836459748525316e-06, + "loss": 0.1874, + "step": 15952 + }, + { + "epoch": 2.252612256424739, + "grad_norm": 2.710821606168807, + "learning_rate": 3.082545023921929e-06, + "loss": 0.1333, + "step": 15953 + }, + { + "epoch": 2.2527534594747247, + "grad_norm": 2.8568121927839476, + "learning_rate": 3.081444233749994e-06, + "loss": 0.1517, + "step": 15954 + }, + { + "epoch": 2.2528946625247106, + "grad_norm": 3.3319661943869834, + "learning_rate": 3.080343604362306e-06, + "loss": 0.1604, + "step": 15955 + }, + { + "epoch": 2.2530358655746965, + "grad_norm": 2.71091420696974, + "learning_rate": 3.0792431357844444e-06, + "loss": 0.1147, + "step": 15956 + }, + { + "epoch": 2.2531770686246824, + "grad_norm": 3.0946893320311526, + "learning_rate": 3.0781428280419833e-06, + "loss": 0.1484, + "step": 15957 + }, + { + "epoch": 2.2533182716746682, + "grad_norm": 3.6209029125809593, + "learning_rate": 3.0770426811604946e-06, + "loss": 0.1724, + "step": 15958 + }, + { + "epoch": 2.253459474724654, + "grad_norm": 2.8521644815799223, + "learning_rate": 3.0759426951655437e-06, + "loss": 0.1521, + "step": 15959 + }, + { + "epoch": 2.25360067777464, + "grad_norm": 2.9331253847008862, + "learning_rate": 3.0748428700826938e-06, + "loss": 0.161, + "step": 15960 + }, + { + "epoch": 2.253741880824626, + "grad_norm": 3.1128675442047142, + "learning_rate": 3.073743205937502e-06, + "loss": 0.1423, + "step": 15961 + }, + { + "epoch": 2.253883083874612, + "grad_norm": 3.4116175772612287, + "learning_rate": 3.07264370275553e-06, + "loss": 0.2127, + "step": 15962 + }, + { + "epoch": 2.2540242869245977, + "grad_norm": 3.1708613284402842, + "learning_rate": 3.0715443605623296e-06, + "loss": 0.1744, + "step": 15963 + }, + { + "epoch": 2.2541654899745835, + "grad_norm": 2.8340557324158064, + "learning_rate": 3.0704451793834433e-06, + "loss": 0.1197, + "step": 15964 + }, + { + "epoch": 2.2543066930245694, + "grad_norm": 2.5715013298222256, + "learning_rate": 3.0693461592444184e-06, + "loss": 0.1084, + "step": 15965 + }, + { + "epoch": 2.2544478960745553, + "grad_norm": 3.155852206362035, + "learning_rate": 3.0682473001707925e-06, + "loss": 0.0962, + "step": 15966 + }, + { + "epoch": 2.254589099124541, + "grad_norm": 2.854515415717756, + "learning_rate": 3.067148602188108e-06, + "loss": 0.1431, + "step": 15967 + }, + { + "epoch": 2.254730302174527, + "grad_norm": 3.3971794135987157, + "learning_rate": 3.0660500653218973e-06, + "loss": 0.1616, + "step": 15968 + }, + { + "epoch": 2.254871505224513, + "grad_norm": 4.052321120595911, + "learning_rate": 3.0649516895976883e-06, + "loss": 0.1879, + "step": 15969 + }, + { + "epoch": 2.255012708274499, + "grad_norm": 3.6848999717705944, + "learning_rate": 3.0638534750410065e-06, + "loss": 0.2059, + "step": 15970 + }, + { + "epoch": 2.2551539113244847, + "grad_norm": 3.84793696262174, + "learning_rate": 3.0627554216773736e-06, + "loss": 0.1728, + "step": 15971 + }, + { + "epoch": 2.2552951143744706, + "grad_norm": 3.3204669781264378, + "learning_rate": 3.0616575295323105e-06, + "loss": 0.148, + "step": 15972 + }, + { + "epoch": 2.2554363174244565, + "grad_norm": 3.2646171201971397, + "learning_rate": 3.0605597986313284e-06, + "loss": 0.1766, + "step": 15973 + }, + { + "epoch": 2.2555775204744424, + "grad_norm": 3.613147587267159, + "learning_rate": 3.059462228999941e-06, + "loss": 0.2013, + "step": 15974 + }, + { + "epoch": 2.2557187235244283, + "grad_norm": 3.076365644579705, + "learning_rate": 3.0583648206636542e-06, + "loss": 0.1365, + "step": 15975 + }, + { + "epoch": 2.255859926574414, + "grad_norm": 2.7974674810364033, + "learning_rate": 3.0572675736479696e-06, + "loss": 0.1454, + "step": 15976 + }, + { + "epoch": 2.2560011296244, + "grad_norm": 2.910538353965002, + "learning_rate": 3.0561704879783894e-06, + "loss": 0.1601, + "step": 15977 + }, + { + "epoch": 2.256142332674386, + "grad_norm": 2.6287449440057298, + "learning_rate": 3.055073563680404e-06, + "loss": 0.1075, + "step": 15978 + }, + { + "epoch": 2.256283535724372, + "grad_norm": 3.4962777920009436, + "learning_rate": 3.0539768007795134e-06, + "loss": 0.1837, + "step": 15979 + }, + { + "epoch": 2.2564247387743577, + "grad_norm": 3.42997305081233, + "learning_rate": 3.0528801993012056e-06, + "loss": 0.1689, + "step": 15980 + }, + { + "epoch": 2.2565659418243436, + "grad_norm": 2.904836379274577, + "learning_rate": 3.051783759270959e-06, + "loss": 0.1229, + "step": 15981 + }, + { + "epoch": 2.2567071448743294, + "grad_norm": 2.7457922712241647, + "learning_rate": 3.050687480714256e-06, + "loss": 0.1234, + "step": 15982 + }, + { + "epoch": 2.2568483479243153, + "grad_norm": 2.9450437037687798, + "learning_rate": 3.0495913636565735e-06, + "loss": 0.1468, + "step": 15983 + }, + { + "epoch": 2.256989550974301, + "grad_norm": 2.9528265666902103, + "learning_rate": 3.0484954081233877e-06, + "loss": 0.1237, + "step": 15984 + }, + { + "epoch": 2.257130754024287, + "grad_norm": 3.4168451118246352, + "learning_rate": 3.047399614140166e-06, + "loss": 0.1464, + "step": 15985 + }, + { + "epoch": 2.257271957074273, + "grad_norm": 3.022705208641973, + "learning_rate": 3.046303981732376e-06, + "loss": 0.1638, + "step": 15986 + }, + { + "epoch": 2.257413160124259, + "grad_norm": 3.036554577486767, + "learning_rate": 3.045208510925478e-06, + "loss": 0.1434, + "step": 15987 + }, + { + "epoch": 2.2575543631742447, + "grad_norm": 3.3519187953596234, + "learning_rate": 3.0441132017449305e-06, + "loss": 0.1558, + "step": 15988 + }, + { + "epoch": 2.2576955662242306, + "grad_norm": 2.9712221247839974, + "learning_rate": 3.043018054216188e-06, + "loss": 0.0979, + "step": 15989 + }, + { + "epoch": 2.2578367692742165, + "grad_norm": 3.447719653517833, + "learning_rate": 3.0419230683647018e-06, + "loss": 0.2329, + "step": 15990 + }, + { + "epoch": 2.2579779723242024, + "grad_norm": 2.791136456275009, + "learning_rate": 3.0408282442159177e-06, + "loss": 0.1322, + "step": 15991 + }, + { + "epoch": 2.2581191753741883, + "grad_norm": 3.0448515643531775, + "learning_rate": 3.03973358179528e-06, + "loss": 0.1283, + "step": 15992 + }, + { + "epoch": 2.258260378424174, + "grad_norm": 2.6388983330895877, + "learning_rate": 3.0386390811282283e-06, + "loss": 0.1278, + "step": 15993 + }, + { + "epoch": 2.25840158147416, + "grad_norm": 3.2585495685342636, + "learning_rate": 3.0375447422401982e-06, + "loss": 0.1451, + "step": 15994 + }, + { + "epoch": 2.2585427845241455, + "grad_norm": 3.434081807522444, + "learning_rate": 3.036450565156618e-06, + "loss": 0.1619, + "step": 15995 + }, + { + "epoch": 2.2586839875741314, + "grad_norm": 3.10284723456881, + "learning_rate": 3.0353565499029223e-06, + "loss": 0.1309, + "step": 15996 + }, + { + "epoch": 2.2588251906241172, + "grad_norm": 2.8422981321280574, + "learning_rate": 3.034262696504536e-06, + "loss": 0.1551, + "step": 15997 + }, + { + "epoch": 2.258966393674103, + "grad_norm": 3.3110039711558574, + "learning_rate": 3.0331690049868733e-06, + "loss": 0.1623, + "step": 15998 + }, + { + "epoch": 2.259107596724089, + "grad_norm": 3.51784801673797, + "learning_rate": 3.0320754753753544e-06, + "loss": 0.1737, + "step": 15999 + }, + { + "epoch": 2.259248799774075, + "grad_norm": 3.1596735455445706, + "learning_rate": 3.0309821076953893e-06, + "loss": 0.1686, + "step": 16000 + }, + { + "epoch": 2.2593900028240608, + "grad_norm": 4.713380853169764, + "learning_rate": 3.0298889019723933e-06, + "loss": 0.2414, + "step": 16001 + }, + { + "epoch": 2.2595312058740467, + "grad_norm": 3.5702066039163656, + "learning_rate": 3.028795858231768e-06, + "loss": 0.1509, + "step": 16002 + }, + { + "epoch": 2.2596724089240325, + "grad_norm": 3.3120204009703107, + "learning_rate": 3.0277029764989173e-06, + "loss": 0.1672, + "step": 16003 + }, + { + "epoch": 2.2598136119740184, + "grad_norm": 3.4051309893706168, + "learning_rate": 3.026610256799238e-06, + "loss": 0.135, + "step": 16004 + }, + { + "epoch": 2.2599548150240043, + "grad_norm": 3.90973031788945, + "learning_rate": 3.0255176991581246e-06, + "loss": 0.2069, + "step": 16005 + }, + { + "epoch": 2.26009601807399, + "grad_norm": 2.217134446012374, + "learning_rate": 3.0244253036009684e-06, + "loss": 0.0909, + "step": 16006 + }, + { + "epoch": 2.260237221123976, + "grad_norm": 2.6364313137531474, + "learning_rate": 3.023333070153155e-06, + "loss": 0.141, + "step": 16007 + }, + { + "epoch": 2.260378424173962, + "grad_norm": 4.030734919948177, + "learning_rate": 3.022240998840068e-06, + "loss": 0.2134, + "step": 16008 + }, + { + "epoch": 2.260519627223948, + "grad_norm": 2.8905871539988444, + "learning_rate": 3.0211490896870876e-06, + "loss": 0.1122, + "step": 16009 + }, + { + "epoch": 2.2606608302739337, + "grad_norm": 3.388308306586784, + "learning_rate": 3.0200573427195877e-06, + "loss": 0.1787, + "step": 16010 + }, + { + "epoch": 2.2608020333239196, + "grad_norm": 3.0451652989550575, + "learning_rate": 3.0189657579629405e-06, + "loss": 0.1239, + "step": 16011 + }, + { + "epoch": 2.2609432363739055, + "grad_norm": 3.8794496482929537, + "learning_rate": 3.017874335442512e-06, + "loss": 0.165, + "step": 16012 + }, + { + "epoch": 2.2610844394238914, + "grad_norm": 3.5655350910800716, + "learning_rate": 3.0167830751836712e-06, + "loss": 0.1798, + "step": 16013 + }, + { + "epoch": 2.2612256424738773, + "grad_norm": 3.1560702071672377, + "learning_rate": 3.0156919772117788e-06, + "loss": 0.1423, + "step": 16014 + }, + { + "epoch": 2.261366845523863, + "grad_norm": 3.542024086845946, + "learning_rate": 3.0146010415521865e-06, + "loss": 0.2055, + "step": 16015 + }, + { + "epoch": 2.261508048573849, + "grad_norm": 3.6945103169119884, + "learning_rate": 3.0135102682302477e-06, + "loss": 0.1634, + "step": 16016 + }, + { + "epoch": 2.261649251623835, + "grad_norm": 3.3382928728700856, + "learning_rate": 3.0124196572713104e-06, + "loss": 0.1598, + "step": 16017 + }, + { + "epoch": 2.261790454673821, + "grad_norm": 2.912432573371749, + "learning_rate": 3.011329208700726e-06, + "loss": 0.127, + "step": 16018 + }, + { + "epoch": 2.2619316577238067, + "grad_norm": 2.41699042314935, + "learning_rate": 3.010238922543833e-06, + "loss": 0.1123, + "step": 16019 + }, + { + "epoch": 2.2620728607737925, + "grad_norm": 3.4726001818702765, + "learning_rate": 3.0091487988259684e-06, + "loss": 0.1711, + "step": 16020 + }, + { + "epoch": 2.2622140638237784, + "grad_norm": 3.3626638423332387, + "learning_rate": 3.008058837572466e-06, + "loss": 0.1754, + "step": 16021 + }, + { + "epoch": 2.2623552668737643, + "grad_norm": 3.5004705995564014, + "learning_rate": 3.006969038808658e-06, + "loss": 0.1453, + "step": 16022 + }, + { + "epoch": 2.26249646992375, + "grad_norm": 2.452390999853178, + "learning_rate": 3.005879402559868e-06, + "loss": 0.1088, + "step": 16023 + }, + { + "epoch": 2.262637672973736, + "grad_norm": 2.573946230773676, + "learning_rate": 3.0047899288514213e-06, + "loss": 0.1136, + "step": 16024 + }, + { + "epoch": 2.262778876023722, + "grad_norm": 3.9620008065560968, + "learning_rate": 3.0037006177086347e-06, + "loss": 0.1945, + "step": 16025 + }, + { + "epoch": 2.262920079073708, + "grad_norm": 3.2743056978481144, + "learning_rate": 3.0026114691568255e-06, + "loss": 0.1994, + "step": 16026 + }, + { + "epoch": 2.2630612821236937, + "grad_norm": 2.4687385589606774, + "learning_rate": 3.001522483221302e-06, + "loss": 0.1078, + "step": 16027 + }, + { + "epoch": 2.2632024851736796, + "grad_norm": 3.5934979011028947, + "learning_rate": 3.000433659927375e-06, + "loss": 0.2042, + "step": 16028 + }, + { + "epoch": 2.2633436882236655, + "grad_norm": 3.1195366067675887, + "learning_rate": 2.999344999300343e-06, + "loss": 0.131, + "step": 16029 + }, + { + "epoch": 2.2634848912736514, + "grad_norm": 3.1727138783561717, + "learning_rate": 2.998256501365514e-06, + "loss": 0.1542, + "step": 16030 + }, + { + "epoch": 2.2636260943236373, + "grad_norm": 3.2748814209678923, + "learning_rate": 2.9971681661481823e-06, + "loss": 0.181, + "step": 16031 + }, + { + "epoch": 2.263767297373623, + "grad_norm": 2.935185447841041, + "learning_rate": 2.9960799936736353e-06, + "loss": 0.1354, + "step": 16032 + }, + { + "epoch": 2.263908500423609, + "grad_norm": 2.6563256465441665, + "learning_rate": 2.994991983967165e-06, + "loss": 0.1323, + "step": 16033 + }, + { + "epoch": 2.264049703473595, + "grad_norm": 2.458422506112852, + "learning_rate": 2.993904137054051e-06, + "loss": 0.1259, + "step": 16034 + }, + { + "epoch": 2.264190906523581, + "grad_norm": 2.5679818980366176, + "learning_rate": 2.9928164529595836e-06, + "loss": 0.1203, + "step": 16035 + }, + { + "epoch": 2.2643321095735667, + "grad_norm": 3.593043971331139, + "learning_rate": 2.9917289317090357e-06, + "loss": 0.1544, + "step": 16036 + }, + { + "epoch": 2.2644733126235526, + "grad_norm": 3.851631795913417, + "learning_rate": 2.9906415733276808e-06, + "loss": 0.1807, + "step": 16037 + }, + { + "epoch": 2.2646145156735384, + "grad_norm": 3.6202498583359075, + "learning_rate": 2.9895543778407875e-06, + "loss": 0.1829, + "step": 16038 + }, + { + "epoch": 2.2647557187235243, + "grad_norm": 2.6222753276268254, + "learning_rate": 2.988467345273628e-06, + "loss": 0.1373, + "step": 16039 + }, + { + "epoch": 2.26489692177351, + "grad_norm": 2.809072727836093, + "learning_rate": 2.9873804756514513e-06, + "loss": 0.15, + "step": 16040 + }, + { + "epoch": 2.265038124823496, + "grad_norm": 4.489722921360118, + "learning_rate": 2.9862937689995276e-06, + "loss": 0.2504, + "step": 16041 + }, + { + "epoch": 2.265179327873482, + "grad_norm": 3.5371003871130546, + "learning_rate": 2.9852072253431073e-06, + "loss": 0.1892, + "step": 16042 + }, + { + "epoch": 2.265320530923468, + "grad_norm": 3.2655261010044736, + "learning_rate": 2.984120844707442e-06, + "loss": 0.177, + "step": 16043 + }, + { + "epoch": 2.2654617339734537, + "grad_norm": 3.8855359691683047, + "learning_rate": 2.983034627117779e-06, + "loss": 0.1842, + "step": 16044 + }, + { + "epoch": 2.2656029370234396, + "grad_norm": 3.367519937580669, + "learning_rate": 2.9819485725993603e-06, + "loss": 0.1632, + "step": 16045 + }, + { + "epoch": 2.2657441400734255, + "grad_norm": 2.7435414540425658, + "learning_rate": 2.9808626811774222e-06, + "loss": 0.1455, + "step": 16046 + }, + { + "epoch": 2.2658853431234114, + "grad_norm": 3.297177136591904, + "learning_rate": 2.979776952877208e-06, + "loss": 0.1707, + "step": 16047 + }, + { + "epoch": 2.2660265461733973, + "grad_norm": 3.0626618114999964, + "learning_rate": 2.9786913877239486e-06, + "loss": 0.1593, + "step": 16048 + }, + { + "epoch": 2.266167749223383, + "grad_norm": 2.857778522701081, + "learning_rate": 2.977605985742866e-06, + "loss": 0.1298, + "step": 16049 + }, + { + "epoch": 2.266308952273369, + "grad_norm": 2.8512928199918735, + "learning_rate": 2.976520746959187e-06, + "loss": 0.1599, + "step": 16050 + }, + { + "epoch": 2.266450155323355, + "grad_norm": 3.317713400738987, + "learning_rate": 2.9754356713981337e-06, + "loss": 0.1367, + "step": 16051 + }, + { + "epoch": 2.266591358373341, + "grad_norm": 2.956077434625852, + "learning_rate": 2.9743507590849176e-06, + "loss": 0.1595, + "step": 16052 + }, + { + "epoch": 2.2667325614233267, + "grad_norm": 4.050857510296772, + "learning_rate": 2.9732660100447586e-06, + "loss": 0.2144, + "step": 16053 + }, + { + "epoch": 2.2668737644733126, + "grad_norm": 2.8097234141020127, + "learning_rate": 2.9721814243028635e-06, + "loss": 0.112, + "step": 16054 + }, + { + "epoch": 2.2670149675232985, + "grad_norm": 3.3288259867311454, + "learning_rate": 2.9710970018844378e-06, + "loss": 0.1917, + "step": 16055 + }, + { + "epoch": 2.2671561705732843, + "grad_norm": 3.0252208567989007, + "learning_rate": 2.970012742814684e-06, + "loss": 0.1426, + "step": 16056 + }, + { + "epoch": 2.26729737362327, + "grad_norm": 3.074976023278469, + "learning_rate": 2.968928647118793e-06, + "loss": 0.1334, + "step": 16057 + }, + { + "epoch": 2.267438576673256, + "grad_norm": 3.6919051641301737, + "learning_rate": 2.967844714821966e-06, + "loss": 0.1564, + "step": 16058 + }, + { + "epoch": 2.267579779723242, + "grad_norm": 3.6800699964147117, + "learning_rate": 2.9667609459493907e-06, + "loss": 0.185, + "step": 16059 + }, + { + "epoch": 2.267720982773228, + "grad_norm": 3.608491671446746, + "learning_rate": 2.965677340526254e-06, + "loss": 0.1512, + "step": 16060 + }, + { + "epoch": 2.2678621858232137, + "grad_norm": 3.454797445263629, + "learning_rate": 2.964593898577738e-06, + "loss": 0.1614, + "step": 16061 + }, + { + "epoch": 2.2680033888731996, + "grad_norm": 3.6555995891092756, + "learning_rate": 2.963510620129021e-06, + "loss": 0.1706, + "step": 16062 + }, + { + "epoch": 2.2681445919231855, + "grad_norm": 3.2079649262013783, + "learning_rate": 2.962427505205279e-06, + "loss": 0.1596, + "step": 16063 + }, + { + "epoch": 2.2682857949731714, + "grad_norm": 2.8725066713181024, + "learning_rate": 2.961344553831679e-06, + "loss": 0.1209, + "step": 16064 + }, + { + "epoch": 2.2684269980231573, + "grad_norm": 3.3632100792599777, + "learning_rate": 2.9602617660333988e-06, + "loss": 0.1564, + "step": 16065 + }, + { + "epoch": 2.268568201073143, + "grad_norm": 2.73124856029468, + "learning_rate": 2.959179141835591e-06, + "loss": 0.1284, + "step": 16066 + }, + { + "epoch": 2.268709404123129, + "grad_norm": 3.188947229162904, + "learning_rate": 2.9580966812634194e-06, + "loss": 0.1451, + "step": 16067 + }, + { + "epoch": 2.268850607173115, + "grad_norm": 2.9520718210608226, + "learning_rate": 2.9570143843420394e-06, + "loss": 0.1447, + "step": 16068 + }, + { + "epoch": 2.268991810223101, + "grad_norm": 2.6766851072878044, + "learning_rate": 2.9559322510966004e-06, + "loss": 0.1382, + "step": 16069 + }, + { + "epoch": 2.2691330132730867, + "grad_norm": 2.731130354208133, + "learning_rate": 2.9548502815522573e-06, + "loss": 0.1553, + "step": 16070 + }, + { + "epoch": 2.2692742163230726, + "grad_norm": 3.1233032121382065, + "learning_rate": 2.953768475734151e-06, + "loss": 0.145, + "step": 16071 + }, + { + "epoch": 2.2694154193730585, + "grad_norm": 2.895813151024847, + "learning_rate": 2.952686833667423e-06, + "loss": 0.1397, + "step": 16072 + }, + { + "epoch": 2.2695566224230443, + "grad_norm": 3.668831065355843, + "learning_rate": 2.9516053553772116e-06, + "loss": 0.1747, + "step": 16073 + }, + { + "epoch": 2.2696978254730302, + "grad_norm": 3.283275066842753, + "learning_rate": 2.9505240408886417e-06, + "loss": 0.155, + "step": 16074 + }, + { + "epoch": 2.269839028523016, + "grad_norm": 3.4158401549923516, + "learning_rate": 2.9494428902268524e-06, + "loss": 0.1539, + "step": 16075 + }, + { + "epoch": 2.269980231573002, + "grad_norm": 2.951928272704684, + "learning_rate": 2.948361903416965e-06, + "loss": 0.1324, + "step": 16076 + }, + { + "epoch": 2.270121434622988, + "grad_norm": 3.580675371716236, + "learning_rate": 2.947281080484101e-06, + "loss": 0.1703, + "step": 16077 + }, + { + "epoch": 2.2702626376729738, + "grad_norm": 3.046173921325846, + "learning_rate": 2.9462004214533803e-06, + "loss": 0.1514, + "step": 16078 + }, + { + "epoch": 2.2704038407229596, + "grad_norm": 3.975863160832691, + "learning_rate": 2.945119926349914e-06, + "loss": 0.207, + "step": 16079 + }, + { + "epoch": 2.2705450437729455, + "grad_norm": 3.450429182877458, + "learning_rate": 2.944039595198814e-06, + "loss": 0.1848, + "step": 16080 + }, + { + "epoch": 2.2706862468229314, + "grad_norm": 3.085378082509213, + "learning_rate": 2.942959428025185e-06, + "loss": 0.128, + "step": 16081 + }, + { + "epoch": 2.2708274498729173, + "grad_norm": 3.080534411275803, + "learning_rate": 2.9418794248541362e-06, + "loss": 0.1445, + "step": 16082 + }, + { + "epoch": 2.270968652922903, + "grad_norm": 3.2780679950187217, + "learning_rate": 2.9407995857107584e-06, + "loss": 0.1601, + "step": 16083 + }, + { + "epoch": 2.271109855972889, + "grad_norm": 3.0218570548605226, + "learning_rate": 2.9397199106201492e-06, + "loss": 0.1452, + "step": 16084 + }, + { + "epoch": 2.271251059022875, + "grad_norm": 3.47739250889305, + "learning_rate": 2.938640399607401e-06, + "loss": 0.1787, + "step": 16085 + }, + { + "epoch": 2.271392262072861, + "grad_norm": 2.9404597187305446, + "learning_rate": 2.937561052697597e-06, + "loss": 0.1245, + "step": 16086 + }, + { + "epoch": 2.2715334651228467, + "grad_norm": 3.9411792189755137, + "learning_rate": 2.9364818699158272e-06, + "loss": 0.1651, + "step": 16087 + }, + { + "epoch": 2.2716746681728326, + "grad_norm": 2.857453763521429, + "learning_rate": 2.935402851287168e-06, + "loss": 0.1127, + "step": 16088 + }, + { + "epoch": 2.2718158712228185, + "grad_norm": 3.4317992471579917, + "learning_rate": 2.9343239968366956e-06, + "loss": 0.1798, + "step": 16089 + }, + { + "epoch": 2.2719570742728044, + "grad_norm": 2.886906852786769, + "learning_rate": 2.933245306589485e-06, + "loss": 0.1357, + "step": 16090 + }, + { + "epoch": 2.2720982773227902, + "grad_norm": 3.2961275322561137, + "learning_rate": 2.9321667805705955e-06, + "loss": 0.1787, + "step": 16091 + }, + { + "epoch": 2.272239480372776, + "grad_norm": 3.6216246093294986, + "learning_rate": 2.9310884188051013e-06, + "loss": 0.1803, + "step": 16092 + }, + { + "epoch": 2.272380683422762, + "grad_norm": 3.143553354790983, + "learning_rate": 2.930010221318059e-06, + "loss": 0.1554, + "step": 16093 + }, + { + "epoch": 2.272521886472748, + "grad_norm": 3.3982342576423448, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.1354, + "step": 16094 + }, + { + "epoch": 2.2726630895227338, + "grad_norm": 3.658347651914011, + "learning_rate": 2.927854319279555e-06, + "loss": 0.2104, + "step": 16095 + }, + { + "epoch": 2.2728042925727197, + "grad_norm": 4.0475246092893755, + "learning_rate": 2.926776614778195e-06, + "loss": 0.174, + "step": 16096 + }, + { + "epoch": 2.2729454956227055, + "grad_norm": 2.7303802434421045, + "learning_rate": 2.9256990746554926e-06, + "loss": 0.1163, + "step": 16097 + }, + { + "epoch": 2.2730866986726914, + "grad_norm": 3.008462571478294, + "learning_rate": 2.924621698936485e-06, + "loss": 0.1274, + "step": 16098 + }, + { + "epoch": 2.2732279017226773, + "grad_norm": 3.2946756691883494, + "learning_rate": 2.9235444876462194e-06, + "loss": 0.1736, + "step": 16099 + }, + { + "epoch": 2.273369104772663, + "grad_norm": 3.4466583571208234, + "learning_rate": 2.9224674408097207e-06, + "loss": 0.1541, + "step": 16100 + }, + { + "epoch": 2.273510307822649, + "grad_norm": 3.509852167462246, + "learning_rate": 2.921390558452023e-06, + "loss": 0.1716, + "step": 16101 + }, + { + "epoch": 2.273651510872635, + "grad_norm": 2.9591048243140103, + "learning_rate": 2.9203138405981514e-06, + "loss": 0.136, + "step": 16102 + }, + { + "epoch": 2.273792713922621, + "grad_norm": 3.7624845468218338, + "learning_rate": 2.9192372872731255e-06, + "loss": 0.1906, + "step": 16103 + }, + { + "epoch": 2.2739339169726067, + "grad_norm": 3.332172015065085, + "learning_rate": 2.9181608985019705e-06, + "loss": 0.1636, + "step": 16104 + }, + { + "epoch": 2.2740751200225926, + "grad_norm": 3.806964540947606, + "learning_rate": 2.917084674309697e-06, + "loss": 0.1646, + "step": 16105 + }, + { + "epoch": 2.2742163230725785, + "grad_norm": 3.0832064187893486, + "learning_rate": 2.9160086147213174e-06, + "loss": 0.1408, + "step": 16106 + }, + { + "epoch": 2.2743575261225644, + "grad_norm": 3.5271772292147423, + "learning_rate": 2.9149327197618405e-06, + "loss": 0.139, + "step": 16107 + }, + { + "epoch": 2.2744987291725502, + "grad_norm": 3.267779605261069, + "learning_rate": 2.913856989456262e-06, + "loss": 0.142, + "step": 16108 + }, + { + "epoch": 2.274639932222536, + "grad_norm": 3.213130036633731, + "learning_rate": 2.9127814238295904e-06, + "loss": 0.1281, + "step": 16109 + }, + { + "epoch": 2.274781135272522, + "grad_norm": 3.2771299997528454, + "learning_rate": 2.911706022906816e-06, + "loss": 0.1527, + "step": 16110 + }, + { + "epoch": 2.274922338322508, + "grad_norm": 3.281075162465656, + "learning_rate": 2.9106307867129347e-06, + "loss": 0.1357, + "step": 16111 + }, + { + "epoch": 2.275063541372494, + "grad_norm": 3.995887019693327, + "learning_rate": 2.90955571527293e-06, + "loss": 0.1807, + "step": 16112 + }, + { + "epoch": 2.2752047444224797, + "grad_norm": 2.519750536824236, + "learning_rate": 2.90848080861179e-06, + "loss": 0.128, + "step": 16113 + }, + { + "epoch": 2.2753459474724655, + "grad_norm": 3.0061302800307006, + "learning_rate": 2.907406066754492e-06, + "loss": 0.1212, + "step": 16114 + }, + { + "epoch": 2.2754871505224514, + "grad_norm": 3.201032334456864, + "learning_rate": 2.906331489726012e-06, + "loss": 0.1468, + "step": 16115 + }, + { + "epoch": 2.2756283535724373, + "grad_norm": 3.323340927858849, + "learning_rate": 2.905257077551331e-06, + "loss": 0.1444, + "step": 16116 + }, + { + "epoch": 2.275769556622423, + "grad_norm": 3.2159702675212642, + "learning_rate": 2.904182830255408e-06, + "loss": 0.1591, + "step": 16117 + }, + { + "epoch": 2.275910759672409, + "grad_norm": 3.2233240608632845, + "learning_rate": 2.9031087478632116e-06, + "loss": 0.1308, + "step": 16118 + }, + { + "epoch": 2.276051962722395, + "grad_norm": 3.1842154198087758, + "learning_rate": 2.9020348303997024e-06, + "loss": 0.1604, + "step": 16119 + }, + { + "epoch": 2.276193165772381, + "grad_norm": 2.852307533738364, + "learning_rate": 2.900961077889837e-06, + "loss": 0.1508, + "step": 16120 + }, + { + "epoch": 2.2763343688223667, + "grad_norm": 2.8394724809355885, + "learning_rate": 2.8998874903585716e-06, + "loss": 0.1441, + "step": 16121 + }, + { + "epoch": 2.2764755718723526, + "grad_norm": 3.833017519579199, + "learning_rate": 2.898814067830855e-06, + "loss": 0.1797, + "step": 16122 + }, + { + "epoch": 2.2766167749223385, + "grad_norm": 3.641567866132091, + "learning_rate": 2.8977408103316327e-06, + "loss": 0.2046, + "step": 16123 + }, + { + "epoch": 2.2767579779723244, + "grad_norm": 3.7412715231506417, + "learning_rate": 2.8966677178858506e-06, + "loss": 0.1671, + "step": 16124 + }, + { + "epoch": 2.2768991810223103, + "grad_norm": 3.186999532013299, + "learning_rate": 2.895594790518437e-06, + "loss": 0.167, + "step": 16125 + }, + { + "epoch": 2.277040384072296, + "grad_norm": 3.499351900208958, + "learning_rate": 2.894522028254334e-06, + "loss": 0.1419, + "step": 16126 + }, + { + "epoch": 2.277181587122282, + "grad_norm": 3.1364647995039565, + "learning_rate": 2.8934494311184715e-06, + "loss": 0.1695, + "step": 16127 + }, + { + "epoch": 2.277322790172268, + "grad_norm": 3.1022172338303706, + "learning_rate": 2.892376999135774e-06, + "loss": 0.1569, + "step": 16128 + }, + { + "epoch": 2.277463993222254, + "grad_norm": 3.251660456231205, + "learning_rate": 2.891304732331167e-06, + "loss": 0.1788, + "step": 16129 + }, + { + "epoch": 2.2776051962722397, + "grad_norm": 2.525042373940704, + "learning_rate": 2.890232630729567e-06, + "loss": 0.1148, + "step": 16130 + }, + { + "epoch": 2.2777463993222256, + "grad_norm": 2.8033016117591925, + "learning_rate": 2.8891606943558904e-06, + "loss": 0.1487, + "step": 16131 + }, + { + "epoch": 2.2778876023722114, + "grad_norm": 2.6325624485514627, + "learning_rate": 2.888088923235045e-06, + "loss": 0.134, + "step": 16132 + }, + { + "epoch": 2.2780288054221973, + "grad_norm": 3.5994094114302717, + "learning_rate": 2.8870173173919493e-06, + "loss": 0.1856, + "step": 16133 + }, + { + "epoch": 2.278170008472183, + "grad_norm": 3.678760229406727, + "learning_rate": 2.885945876851495e-06, + "loss": 0.1696, + "step": 16134 + }, + { + "epoch": 2.278311211522169, + "grad_norm": 2.6138023410968447, + "learning_rate": 2.8848746016385855e-06, + "loss": 0.0902, + "step": 16135 + }, + { + "epoch": 2.278452414572155, + "grad_norm": 2.9002269368750477, + "learning_rate": 2.8838034917781187e-06, + "loss": 0.1308, + "step": 16136 + }, + { + "epoch": 2.278593617622141, + "grad_norm": 2.579318566007883, + "learning_rate": 2.8827325472949817e-06, + "loss": 0.1105, + "step": 16137 + }, + { + "epoch": 2.2787348206721267, + "grad_norm": 2.95355286746537, + "learning_rate": 2.8816617682140703e-06, + "loss": 0.1441, + "step": 16138 + }, + { + "epoch": 2.2788760237221126, + "grad_norm": 3.2984793360567832, + "learning_rate": 2.8805911545602647e-06, + "loss": 0.169, + "step": 16139 + }, + { + "epoch": 2.2790172267720985, + "grad_norm": 3.220457821942883, + "learning_rate": 2.879520706358446e-06, + "loss": 0.1592, + "step": 16140 + }, + { + "epoch": 2.2791584298220844, + "grad_norm": 2.9911521821162403, + "learning_rate": 2.878450423633494e-06, + "loss": 0.1566, + "step": 16141 + }, + { + "epoch": 2.2792996328720703, + "grad_norm": 4.387026578602758, + "learning_rate": 2.8773803064102758e-06, + "loss": 0.2511, + "step": 16142 + }, + { + "epoch": 2.279440835922056, + "grad_norm": 2.978360052431601, + "learning_rate": 2.876310354713661e-06, + "loss": 0.1453, + "step": 16143 + }, + { + "epoch": 2.279582038972042, + "grad_norm": 3.8948114173134987, + "learning_rate": 2.875240568568518e-06, + "loss": 0.1868, + "step": 16144 + }, + { + "epoch": 2.279723242022028, + "grad_norm": 4.071909341580717, + "learning_rate": 2.8741709479997095e-06, + "loss": 0.1901, + "step": 16145 + }, + { + "epoch": 2.279864445072014, + "grad_norm": 2.7004379865966626, + "learning_rate": 2.873101493032089e-06, + "loss": 0.1322, + "step": 16146 + }, + { + "epoch": 2.2800056481219997, + "grad_norm": 2.9351653660375887, + "learning_rate": 2.8720322036905133e-06, + "loss": 0.1469, + "step": 16147 + }, + { + "epoch": 2.280146851171985, + "grad_norm": 2.8043995509722213, + "learning_rate": 2.87096307999983e-06, + "loss": 0.1312, + "step": 16148 + }, + { + "epoch": 2.280288054221971, + "grad_norm": 3.2627653682644833, + "learning_rate": 2.869894121984883e-06, + "loss": 0.1867, + "step": 16149 + }, + { + "epoch": 2.280429257271957, + "grad_norm": 4.495830857893143, + "learning_rate": 2.868825329670524e-06, + "loss": 0.2592, + "step": 16150 + }, + { + "epoch": 2.2805704603219428, + "grad_norm": 2.89965539871878, + "learning_rate": 2.867756703081581e-06, + "loss": 0.1415, + "step": 16151 + }, + { + "epoch": 2.2807116633719287, + "grad_norm": 3.0679773705895528, + "learning_rate": 2.866688242242892e-06, + "loss": 0.1259, + "step": 16152 + }, + { + "epoch": 2.2808528664219145, + "grad_norm": 4.149630647793469, + "learning_rate": 2.865619947179288e-06, + "loss": 0.2112, + "step": 16153 + }, + { + "epoch": 2.2809940694719004, + "grad_norm": 2.2882413855194024, + "learning_rate": 2.8645518179155953e-06, + "loss": 0.1054, + "step": 16154 + }, + { + "epoch": 2.2811352725218863, + "grad_norm": 3.115698132113545, + "learning_rate": 2.863483854476633e-06, + "loss": 0.157, + "step": 16155 + }, + { + "epoch": 2.281276475571872, + "grad_norm": 4.14475923061119, + "learning_rate": 2.8624160568872273e-06, + "loss": 0.1857, + "step": 16156 + }, + { + "epoch": 2.281417678621858, + "grad_norm": 3.236885607514476, + "learning_rate": 2.86134842517219e-06, + "loss": 0.1408, + "step": 16157 + }, + { + "epoch": 2.281558881671844, + "grad_norm": 4.0075885026696225, + "learning_rate": 2.860280959356336e-06, + "loss": 0.164, + "step": 16158 + }, + { + "epoch": 2.28170008472183, + "grad_norm": 3.128480883071816, + "learning_rate": 2.859213659464466e-06, + "loss": 0.1194, + "step": 16159 + }, + { + "epoch": 2.2818412877718157, + "grad_norm": 2.8884005836271274, + "learning_rate": 2.8581465255213834e-06, + "loss": 0.1406, + "step": 16160 + }, + { + "epoch": 2.2819824908218016, + "grad_norm": 3.126582247591714, + "learning_rate": 2.857079557551894e-06, + "loss": 0.1195, + "step": 16161 + }, + { + "epoch": 2.2821236938717875, + "grad_norm": 2.97370140365119, + "learning_rate": 2.8560127555807902e-06, + "loss": 0.1145, + "step": 16162 + }, + { + "epoch": 2.2822648969217734, + "grad_norm": 2.5379760695646945, + "learning_rate": 2.8549461196328667e-06, + "loss": 0.137, + "step": 16163 + }, + { + "epoch": 2.2824060999717593, + "grad_norm": 3.639556872888662, + "learning_rate": 2.853879649732908e-06, + "loss": 0.1644, + "step": 16164 + }, + { + "epoch": 2.282547303021745, + "grad_norm": 2.770458636117854, + "learning_rate": 2.8528133459057006e-06, + "loss": 0.1104, + "step": 16165 + }, + { + "epoch": 2.282688506071731, + "grad_norm": 4.157126343238318, + "learning_rate": 2.8517472081760243e-06, + "loss": 0.1556, + "step": 16166 + }, + { + "epoch": 2.282829709121717, + "grad_norm": 2.8682976525914614, + "learning_rate": 2.8506812365686553e-06, + "loss": 0.1219, + "step": 16167 + }, + { + "epoch": 2.282970912171703, + "grad_norm": 3.561886145752162, + "learning_rate": 2.849615431108368e-06, + "loss": 0.1743, + "step": 16168 + }, + { + "epoch": 2.2831121152216887, + "grad_norm": 3.5630927191255477, + "learning_rate": 2.848549791819929e-06, + "loss": 0.1933, + "step": 16169 + }, + { + "epoch": 2.2832533182716745, + "grad_norm": 2.8654721228769215, + "learning_rate": 2.847484318728105e-06, + "loss": 0.1106, + "step": 16170 + }, + { + "epoch": 2.2833945213216604, + "grad_norm": 3.1760998089957746, + "learning_rate": 2.8464190118576564e-06, + "loss": 0.1552, + "step": 16171 + }, + { + "epoch": 2.2835357243716463, + "grad_norm": 2.5939127100281323, + "learning_rate": 2.845353871233337e-06, + "loss": 0.1194, + "step": 16172 + }, + { + "epoch": 2.283676927421632, + "grad_norm": 3.27282576643028, + "learning_rate": 2.8442888968799075e-06, + "loss": 0.1521, + "step": 16173 + }, + { + "epoch": 2.283818130471618, + "grad_norm": 3.520623297717128, + "learning_rate": 2.843224088822113e-06, + "loss": 0.2081, + "step": 16174 + }, + { + "epoch": 2.283959333521604, + "grad_norm": 2.849044914525224, + "learning_rate": 2.8421594470847038e-06, + "loss": 0.1487, + "step": 16175 + }, + { + "epoch": 2.28410053657159, + "grad_norm": 3.016861071385987, + "learning_rate": 2.841094971692414e-06, + "loss": 0.1173, + "step": 16176 + }, + { + "epoch": 2.2842417396215757, + "grad_norm": 3.552710721577386, + "learning_rate": 2.8400306626699835e-06, + "loss": 0.199, + "step": 16177 + }, + { + "epoch": 2.2843829426715616, + "grad_norm": 2.914693611275363, + "learning_rate": 2.83896652004215e-06, + "loss": 0.1307, + "step": 16178 + }, + { + "epoch": 2.2845241457215475, + "grad_norm": 2.7661374613893632, + "learning_rate": 2.8379025438336426e-06, + "loss": 0.1538, + "step": 16179 + }, + { + "epoch": 2.2846653487715334, + "grad_norm": 3.1976316180120254, + "learning_rate": 2.836838734069187e-06, + "loss": 0.1879, + "step": 16180 + }, + { + "epoch": 2.2848065518215193, + "grad_norm": 3.2321613043896313, + "learning_rate": 2.835775090773506e-06, + "loss": 0.198, + "step": 16181 + }, + { + "epoch": 2.284947754871505, + "grad_norm": 2.4183054306725276, + "learning_rate": 2.834711613971317e-06, + "loss": 0.1037, + "step": 16182 + }, + { + "epoch": 2.285088957921491, + "grad_norm": 3.3455648607745108, + "learning_rate": 2.833648303687336e-06, + "loss": 0.1771, + "step": 16183 + }, + { + "epoch": 2.285230160971477, + "grad_norm": 3.233269220970347, + "learning_rate": 2.832585159946274e-06, + "loss": 0.1446, + "step": 16184 + }, + { + "epoch": 2.285371364021463, + "grad_norm": 2.788893748542876, + "learning_rate": 2.831522182772837e-06, + "loss": 0.1216, + "step": 16185 + }, + { + "epoch": 2.2855125670714487, + "grad_norm": 3.364026722353922, + "learning_rate": 2.8304593721917283e-06, + "loss": 0.1396, + "step": 16186 + }, + { + "epoch": 2.2856537701214346, + "grad_norm": 3.4594601238355303, + "learning_rate": 2.829396728227648e-06, + "loss": 0.1564, + "step": 16187 + }, + { + "epoch": 2.2857949731714204, + "grad_norm": 3.4510983797044315, + "learning_rate": 2.8283342509052915e-06, + "loss": 0.1907, + "step": 16188 + }, + { + "epoch": 2.2859361762214063, + "grad_norm": 2.9210346073321425, + "learning_rate": 2.827271940249345e-06, + "loss": 0.1144, + "step": 16189 + }, + { + "epoch": 2.286077379271392, + "grad_norm": 2.8289541045240956, + "learning_rate": 2.8262097962845058e-06, + "loss": 0.1514, + "step": 16190 + }, + { + "epoch": 2.286218582321378, + "grad_norm": 3.4686969302543944, + "learning_rate": 2.825147819035452e-06, + "loss": 0.1227, + "step": 16191 + }, + { + "epoch": 2.286359785371364, + "grad_norm": 3.001410351566293, + "learning_rate": 2.8240860085268683e-06, + "loss": 0.113, + "step": 16192 + }, + { + "epoch": 2.28650098842135, + "grad_norm": 3.697146904381123, + "learning_rate": 2.8230243647834222e-06, + "loss": 0.1682, + "step": 16193 + }, + { + "epoch": 2.2866421914713357, + "grad_norm": 3.0825384812152845, + "learning_rate": 2.821962887829789e-06, + "loss": 0.145, + "step": 16194 + }, + { + "epoch": 2.2867833945213216, + "grad_norm": 2.5799916707628, + "learning_rate": 2.8209015776906402e-06, + "loss": 0.1394, + "step": 16195 + }, + { + "epoch": 2.2869245975713075, + "grad_norm": 3.5460090896044565, + "learning_rate": 2.819840434390638e-06, + "loss": 0.1884, + "step": 16196 + }, + { + "epoch": 2.2870658006212934, + "grad_norm": 2.8847101321946496, + "learning_rate": 2.8187794579544434e-06, + "loss": 0.1251, + "step": 16197 + }, + { + "epoch": 2.2872070036712793, + "grad_norm": 2.762101768516108, + "learning_rate": 2.8177186484067143e-06, + "loss": 0.1392, + "step": 16198 + }, + { + "epoch": 2.287348206721265, + "grad_norm": 2.8149064006393956, + "learning_rate": 2.8166580057721003e-06, + "loss": 0.158, + "step": 16199 + }, + { + "epoch": 2.287489409771251, + "grad_norm": 2.7569289490056432, + "learning_rate": 2.8155975300752524e-06, + "loss": 0.1333, + "step": 16200 + }, + { + "epoch": 2.287630612821237, + "grad_norm": 2.6819643345600896, + "learning_rate": 2.814537221340816e-06, + "loss": 0.1474, + "step": 16201 + }, + { + "epoch": 2.287771815871223, + "grad_norm": 3.171087646834457, + "learning_rate": 2.8134770795934307e-06, + "loss": 0.16, + "step": 16202 + }, + { + "epoch": 2.2879130189212087, + "grad_norm": 3.0825494978607906, + "learning_rate": 2.8124171048577354e-06, + "loss": 0.1421, + "step": 16203 + }, + { + "epoch": 2.2880542219711946, + "grad_norm": 3.246109040319658, + "learning_rate": 2.811357297158361e-06, + "loss": 0.1304, + "step": 16204 + }, + { + "epoch": 2.2881954250211805, + "grad_norm": 3.319768485926407, + "learning_rate": 2.8102976565199393e-06, + "loss": 0.1414, + "step": 16205 + }, + { + "epoch": 2.2883366280711663, + "grad_norm": 2.935836236378914, + "learning_rate": 2.809238182967092e-06, + "loss": 0.16, + "step": 16206 + }, + { + "epoch": 2.288477831121152, + "grad_norm": 2.958639881425088, + "learning_rate": 2.8081788765244465e-06, + "loss": 0.1209, + "step": 16207 + }, + { + "epoch": 2.288619034171138, + "grad_norm": 3.0518288070660393, + "learning_rate": 2.807119737216619e-06, + "loss": 0.1567, + "step": 16208 + }, + { + "epoch": 2.288760237221124, + "grad_norm": 3.6768568197538345, + "learning_rate": 2.8060607650682247e-06, + "loss": 0.192, + "step": 16209 + }, + { + "epoch": 2.28890144027111, + "grad_norm": 2.6113234338183853, + "learning_rate": 2.805001960103868e-06, + "loss": 0.1339, + "step": 16210 + }, + { + "epoch": 2.2890426433210957, + "grad_norm": 4.210042933919132, + "learning_rate": 2.803943322348156e-06, + "loss": 0.1661, + "step": 16211 + }, + { + "epoch": 2.2891838463710816, + "grad_norm": 4.987218242996483, + "learning_rate": 2.8028848518256967e-06, + "loss": 0.2178, + "step": 16212 + }, + { + "epoch": 2.2893250494210675, + "grad_norm": 3.3982624818812934, + "learning_rate": 2.801826548561085e-06, + "loss": 0.1817, + "step": 16213 + }, + { + "epoch": 2.2894662524710534, + "grad_norm": 2.859248315879725, + "learning_rate": 2.800768412578916e-06, + "loss": 0.14, + "step": 16214 + }, + { + "epoch": 2.2896074555210393, + "grad_norm": 2.950914118197667, + "learning_rate": 2.7997104439037794e-06, + "loss": 0.1423, + "step": 16215 + }, + { + "epoch": 2.289748658571025, + "grad_norm": 3.735334797187053, + "learning_rate": 2.7986526425602623e-06, + "loss": 0.1682, + "step": 16216 + }, + { + "epoch": 2.289889861621011, + "grad_norm": 3.146115289519885, + "learning_rate": 2.7975950085729486e-06, + "loss": 0.1595, + "step": 16217 + }, + { + "epoch": 2.290031064670997, + "grad_norm": 2.791920861933825, + "learning_rate": 2.7965375419664154e-06, + "loss": 0.0993, + "step": 16218 + }, + { + "epoch": 2.290172267720983, + "grad_norm": 3.6471308111055114, + "learning_rate": 2.7954802427652395e-06, + "loss": 0.1891, + "step": 16219 + }, + { + "epoch": 2.2903134707709687, + "grad_norm": 2.9640458175510402, + "learning_rate": 2.794423110993991e-06, + "loss": 0.1427, + "step": 16220 + }, + { + "epoch": 2.2904546738209546, + "grad_norm": 3.3113590054874766, + "learning_rate": 2.793366146677239e-06, + "loss": 0.1483, + "step": 16221 + }, + { + "epoch": 2.2905958768709405, + "grad_norm": 3.221075178050502, + "learning_rate": 2.7923093498395438e-06, + "loss": 0.1811, + "step": 16222 + }, + { + "epoch": 2.2907370799209263, + "grad_norm": 3.3861933172694814, + "learning_rate": 2.7912527205054642e-06, + "loss": 0.1421, + "step": 16223 + }, + { + "epoch": 2.2908782829709122, + "grad_norm": 2.9485036054415708, + "learning_rate": 2.79019625869956e-06, + "loss": 0.1453, + "step": 16224 + }, + { + "epoch": 2.291019486020898, + "grad_norm": 3.591660777868365, + "learning_rate": 2.7891399644463824e-06, + "loss": 0.1713, + "step": 16225 + }, + { + "epoch": 2.291160689070884, + "grad_norm": 3.630841380610836, + "learning_rate": 2.7880838377704812e-06, + "loss": 0.1705, + "step": 16226 + }, + { + "epoch": 2.29130189212087, + "grad_norm": 3.6032448309651817, + "learning_rate": 2.7870278786963935e-06, + "loss": 0.1852, + "step": 16227 + }, + { + "epoch": 2.2914430951708558, + "grad_norm": 3.2432023962765677, + "learning_rate": 2.7859720872486585e-06, + "loss": 0.1471, + "step": 16228 + }, + { + "epoch": 2.2915842982208416, + "grad_norm": 3.3863363074664043, + "learning_rate": 2.7849164634518213e-06, + "loss": 0.1993, + "step": 16229 + }, + { + "epoch": 2.2917255012708275, + "grad_norm": 3.212856447307632, + "learning_rate": 2.7838610073304096e-06, + "loss": 0.1318, + "step": 16230 + }, + { + "epoch": 2.2918667043208134, + "grad_norm": 3.0287469825254223, + "learning_rate": 2.7828057189089507e-06, + "loss": 0.1553, + "step": 16231 + }, + { + "epoch": 2.2920079073707993, + "grad_norm": 3.643180666768194, + "learning_rate": 2.7817505982119708e-06, + "loss": 0.171, + "step": 16232 + }, + { + "epoch": 2.292149110420785, + "grad_norm": 3.021587489548783, + "learning_rate": 2.780695645263989e-06, + "loss": 0.1886, + "step": 16233 + }, + { + "epoch": 2.292290313470771, + "grad_norm": 3.225652954703523, + "learning_rate": 2.779640860089523e-06, + "loss": 0.1385, + "step": 16234 + }, + { + "epoch": 2.292431516520757, + "grad_norm": 2.5161752015034544, + "learning_rate": 2.778586242713085e-06, + "loss": 0.1333, + "step": 16235 + }, + { + "epoch": 2.292572719570743, + "grad_norm": 3.122896634932714, + "learning_rate": 2.777531793159185e-06, + "loss": 0.1178, + "step": 16236 + }, + { + "epoch": 2.2927139226207287, + "grad_norm": 2.776338681183214, + "learning_rate": 2.7764775114523256e-06, + "loss": 0.1477, + "step": 16237 + }, + { + "epoch": 2.2928551256707146, + "grad_norm": 3.641924703841348, + "learning_rate": 2.77542339761701e-06, + "loss": 0.2117, + "step": 16238 + }, + { + "epoch": 2.2929963287207005, + "grad_norm": 3.173805425520337, + "learning_rate": 2.7743694516777342e-06, + "loss": 0.174, + "step": 16239 + }, + { + "epoch": 2.2931375317706864, + "grad_norm": 3.6353883220133194, + "learning_rate": 2.7733156736589893e-06, + "loss": 0.1883, + "step": 16240 + }, + { + "epoch": 2.2932787348206722, + "grad_norm": 3.132734921490453, + "learning_rate": 2.7722620635852704e-06, + "loss": 0.1565, + "step": 16241 + }, + { + "epoch": 2.293419937870658, + "grad_norm": 3.0375963631721055, + "learning_rate": 2.7712086214810585e-06, + "loss": 0.1911, + "step": 16242 + }, + { + "epoch": 2.293561140920644, + "grad_norm": 3.038182211738905, + "learning_rate": 2.770155347370841e-06, + "loss": 0.1352, + "step": 16243 + }, + { + "epoch": 2.29370234397063, + "grad_norm": 2.7768297029408027, + "learning_rate": 2.769102241279087e-06, + "loss": 0.1464, + "step": 16244 + }, + { + "epoch": 2.2938435470206158, + "grad_norm": 2.905165919023434, + "learning_rate": 2.7680493032302733e-06, + "loss": 0.1682, + "step": 16245 + }, + { + "epoch": 2.2939847500706017, + "grad_norm": 2.846355514219431, + "learning_rate": 2.7669965332488692e-06, + "loss": 0.1599, + "step": 16246 + }, + { + "epoch": 2.2941259531205875, + "grad_norm": 2.961876815715632, + "learning_rate": 2.765943931359344e-06, + "loss": 0.1595, + "step": 16247 + }, + { + "epoch": 2.2942671561705734, + "grad_norm": 3.4345788982104843, + "learning_rate": 2.7648914975861573e-06, + "loss": 0.1758, + "step": 16248 + }, + { + "epoch": 2.2944083592205593, + "grad_norm": 2.8097561282570744, + "learning_rate": 2.7638392319537686e-06, + "loss": 0.1266, + "step": 16249 + }, + { + "epoch": 2.294549562270545, + "grad_norm": 2.595316219740321, + "learning_rate": 2.7627871344866308e-06, + "loss": 0.1308, + "step": 16250 + }, + { + "epoch": 2.294690765320531, + "grad_norm": 3.00192667841996, + "learning_rate": 2.7617352052091937e-06, + "loss": 0.1235, + "step": 16251 + }, + { + "epoch": 2.294831968370517, + "grad_norm": 3.5349008696812043, + "learning_rate": 2.760683444145905e-06, + "loss": 0.1464, + "step": 16252 + }, + { + "epoch": 2.294973171420503, + "grad_norm": 3.3592649697834243, + "learning_rate": 2.7596318513212074e-06, + "loss": 0.1644, + "step": 16253 + }, + { + "epoch": 2.2951143744704887, + "grad_norm": 3.2780522286438214, + "learning_rate": 2.7585804267595383e-06, + "loss": 0.1714, + "step": 16254 + }, + { + "epoch": 2.2952555775204746, + "grad_norm": 3.264252020012359, + "learning_rate": 2.7575291704853325e-06, + "loss": 0.1848, + "step": 16255 + }, + { + "epoch": 2.2953967805704605, + "grad_norm": 2.8814363551555573, + "learning_rate": 2.756478082523021e-06, + "loss": 0.1094, + "step": 16256 + }, + { + "epoch": 2.2955379836204464, + "grad_norm": 3.645822491894983, + "learning_rate": 2.7554271628970307e-06, + "loss": 0.175, + "step": 16257 + }, + { + "epoch": 2.2956791866704322, + "grad_norm": 3.318181780189427, + "learning_rate": 2.7543764116317806e-06, + "loss": 0.1603, + "step": 16258 + }, + { + "epoch": 2.295820389720418, + "grad_norm": 3.7858457571820465, + "learning_rate": 2.7533258287516973e-06, + "loss": 0.1911, + "step": 16259 + }, + { + "epoch": 2.295961592770404, + "grad_norm": 3.289525056025436, + "learning_rate": 2.7522754142811957e-06, + "loss": 0.163, + "step": 16260 + }, + { + "epoch": 2.29610279582039, + "grad_norm": 3.1204496424769936, + "learning_rate": 2.75122516824468e-06, + "loss": 0.1424, + "step": 16261 + }, + { + "epoch": 2.296243998870376, + "grad_norm": 3.511784848089861, + "learning_rate": 2.7501750906665603e-06, + "loss": 0.2062, + "step": 16262 + }, + { + "epoch": 2.2963852019203617, + "grad_norm": 2.928057958495144, + "learning_rate": 2.7491251815712384e-06, + "loss": 0.1425, + "step": 16263 + }, + { + "epoch": 2.2965264049703475, + "grad_norm": 3.409434601237091, + "learning_rate": 2.748075440983118e-06, + "loss": 0.1765, + "step": 16264 + }, + { + "epoch": 2.2966676080203334, + "grad_norm": 2.6922074610309847, + "learning_rate": 2.7470258689265926e-06, + "loss": 0.1253, + "step": 16265 + }, + { + "epoch": 2.2968088110703193, + "grad_norm": 3.631354629322686, + "learning_rate": 2.7459764654260547e-06, + "loss": 0.1995, + "step": 16266 + }, + { + "epoch": 2.2969500141203048, + "grad_norm": 3.2786380031309426, + "learning_rate": 2.7449272305058905e-06, + "loss": 0.1506, + "step": 16267 + }, + { + "epoch": 2.2970912171702906, + "grad_norm": 3.5203519772641925, + "learning_rate": 2.7438781641904834e-06, + "loss": 0.1791, + "step": 16268 + }, + { + "epoch": 2.2972324202202765, + "grad_norm": 4.0852382638972164, + "learning_rate": 2.7428292665042157e-06, + "loss": 0.1934, + "step": 16269 + }, + { + "epoch": 2.2973736232702624, + "grad_norm": 3.8643927156582074, + "learning_rate": 2.74178053747146e-06, + "loss": 0.1833, + "step": 16270 + }, + { + "epoch": 2.2975148263202483, + "grad_norm": 2.717198597242836, + "learning_rate": 2.740731977116591e-06, + "loss": 0.1086, + "step": 16271 + }, + { + "epoch": 2.297656029370234, + "grad_norm": 3.1282643788994196, + "learning_rate": 2.7396835854639757e-06, + "loss": 0.169, + "step": 16272 + }, + { + "epoch": 2.29779723242022, + "grad_norm": 3.7127748942819907, + "learning_rate": 2.738635362537978e-06, + "loss": 0.1717, + "step": 16273 + }, + { + "epoch": 2.297938435470206, + "grad_norm": 2.771145098707405, + "learning_rate": 2.737587308362959e-06, + "loss": 0.1335, + "step": 16274 + }, + { + "epoch": 2.298079638520192, + "grad_norm": 3.569860196995526, + "learning_rate": 2.7365394229632713e-06, + "loss": 0.1821, + "step": 16275 + }, + { + "epoch": 2.2982208415701777, + "grad_norm": 3.2776296963707687, + "learning_rate": 2.7354917063632735e-06, + "loss": 0.1124, + "step": 16276 + }, + { + "epoch": 2.2983620446201636, + "grad_norm": 2.6134271563913654, + "learning_rate": 2.734444158587314e-06, + "loss": 0.1057, + "step": 16277 + }, + { + "epoch": 2.2985032476701495, + "grad_norm": 3.56736506256394, + "learning_rate": 2.7333967796597317e-06, + "loss": 0.1717, + "step": 16278 + }, + { + "epoch": 2.2986444507201353, + "grad_norm": 3.3650762612681406, + "learning_rate": 2.7323495696048707e-06, + "loss": 0.1496, + "step": 16279 + }, + { + "epoch": 2.2987856537701212, + "grad_norm": 2.614744930968377, + "learning_rate": 2.731302528447063e-06, + "loss": 0.1546, + "step": 16280 + }, + { + "epoch": 2.298926856820107, + "grad_norm": 2.7313616923878286, + "learning_rate": 2.7302556562106486e-06, + "loss": 0.1227, + "step": 16281 + }, + { + "epoch": 2.299068059870093, + "grad_norm": 3.4847204558159808, + "learning_rate": 2.729208952919954e-06, + "loss": 0.1706, + "step": 16282 + }, + { + "epoch": 2.299209262920079, + "grad_norm": 3.072803162106676, + "learning_rate": 2.7281624185993027e-06, + "loss": 0.1354, + "step": 16283 + }, + { + "epoch": 2.2993504659700648, + "grad_norm": 3.138761572374022, + "learning_rate": 2.7271160532730167e-06, + "loss": 0.1611, + "step": 16284 + }, + { + "epoch": 2.2994916690200506, + "grad_norm": 2.9447509517912183, + "learning_rate": 2.726069856965412e-06, + "loss": 0.1467, + "step": 16285 + }, + { + "epoch": 2.2996328720700365, + "grad_norm": 3.587023868792806, + "learning_rate": 2.7250238297008026e-06, + "loss": 0.1716, + "step": 16286 + }, + { + "epoch": 2.2997740751200224, + "grad_norm": 3.1407759517379614, + "learning_rate": 2.7239779715034975e-06, + "loss": 0.1718, + "step": 16287 + }, + { + "epoch": 2.2999152781700083, + "grad_norm": 2.8478034919897426, + "learning_rate": 2.7229322823978033e-06, + "loss": 0.1225, + "step": 16288 + }, + { + "epoch": 2.300056481219994, + "grad_norm": 4.119813650171776, + "learning_rate": 2.7218867624080194e-06, + "loss": 0.1769, + "step": 16289 + }, + { + "epoch": 2.30019768426998, + "grad_norm": 3.3842406824849682, + "learning_rate": 2.7208414115584436e-06, + "loss": 0.1702, + "step": 16290 + }, + { + "epoch": 2.300338887319966, + "grad_norm": 2.969951724504971, + "learning_rate": 2.7197962298733693e-06, + "loss": 0.1548, + "step": 16291 + }, + { + "epoch": 2.300480090369952, + "grad_norm": 2.6665778173761328, + "learning_rate": 2.7187512173770848e-06, + "loss": 0.1074, + "step": 16292 + }, + { + "epoch": 2.3006212934199377, + "grad_norm": 3.433733803316779, + "learning_rate": 2.717706374093879e-06, + "loss": 0.1984, + "step": 16293 + }, + { + "epoch": 2.3007624964699236, + "grad_norm": 2.12409339169575, + "learning_rate": 2.7166617000480367e-06, + "loss": 0.0762, + "step": 16294 + }, + { + "epoch": 2.3009036995199095, + "grad_norm": 2.4254720284762197, + "learning_rate": 2.7156171952638276e-06, + "loss": 0.1191, + "step": 16295 + }, + { + "epoch": 2.3010449025698954, + "grad_norm": 3.7335704045318163, + "learning_rate": 2.7145728597655286e-06, + "loss": 0.2066, + "step": 16296 + }, + { + "epoch": 2.3011861056198812, + "grad_norm": 2.2341773433082954, + "learning_rate": 2.7135286935774073e-06, + "loss": 0.1297, + "step": 16297 + }, + { + "epoch": 2.301327308669867, + "grad_norm": 2.676988747762443, + "learning_rate": 2.712484696723735e-06, + "loss": 0.136, + "step": 16298 + }, + { + "epoch": 2.301468511719853, + "grad_norm": 2.9861638112652846, + "learning_rate": 2.711440869228771e-06, + "loss": 0.1518, + "step": 16299 + }, + { + "epoch": 2.301609714769839, + "grad_norm": 2.9600746544913425, + "learning_rate": 2.710397211116774e-06, + "loss": 0.1251, + "step": 16300 + }, + { + "epoch": 2.3017509178198248, + "grad_norm": 3.5393407764573883, + "learning_rate": 2.709353722411997e-06, + "loss": 0.159, + "step": 16301 + }, + { + "epoch": 2.3018921208698107, + "grad_norm": 2.8569265373089388, + "learning_rate": 2.708310403138692e-06, + "loss": 0.1709, + "step": 16302 + }, + { + "epoch": 2.3020333239197965, + "grad_norm": 2.440281759810754, + "learning_rate": 2.707267253321103e-06, + "loss": 0.1286, + "step": 16303 + }, + { + "epoch": 2.3021745269697824, + "grad_norm": 2.9857277043414796, + "learning_rate": 2.7062242729834743e-06, + "loss": 0.1308, + "step": 16304 + }, + { + "epoch": 2.3023157300197683, + "grad_norm": 3.006710440637879, + "learning_rate": 2.7051814621500437e-06, + "loss": 0.1266, + "step": 16305 + }, + { + "epoch": 2.302456933069754, + "grad_norm": 2.4296758329853962, + "learning_rate": 2.7041388208450457e-06, + "loss": 0.1205, + "step": 16306 + }, + { + "epoch": 2.30259813611974, + "grad_norm": 3.4514507236900904, + "learning_rate": 2.7030963490927097e-06, + "loss": 0.1277, + "step": 16307 + }, + { + "epoch": 2.302739339169726, + "grad_norm": 2.37006144530992, + "learning_rate": 2.702054046917264e-06, + "loss": 0.0931, + "step": 16308 + }, + { + "epoch": 2.302880542219712, + "grad_norm": 2.9361927340809264, + "learning_rate": 2.701011914342927e-06, + "loss": 0.1512, + "step": 16309 + }, + { + "epoch": 2.3030217452696977, + "grad_norm": 3.025583355391224, + "learning_rate": 2.699969951393925e-06, + "loss": 0.1628, + "step": 16310 + }, + { + "epoch": 2.3031629483196836, + "grad_norm": 2.839336924166147, + "learning_rate": 2.6989281580944704e-06, + "loss": 0.137, + "step": 16311 + }, + { + "epoch": 2.3033041513696695, + "grad_norm": 3.0108482487677737, + "learning_rate": 2.6978865344687697e-06, + "loss": 0.1446, + "step": 16312 + }, + { + "epoch": 2.3034453544196554, + "grad_norm": 3.2634224405247627, + "learning_rate": 2.6968450805410328e-06, + "loss": 0.135, + "step": 16313 + }, + { + "epoch": 2.3035865574696412, + "grad_norm": 2.984902039875968, + "learning_rate": 2.695803796335459e-06, + "loss": 0.1359, + "step": 16314 + }, + { + "epoch": 2.303727760519627, + "grad_norm": 3.232220112539098, + "learning_rate": 2.694762681876253e-06, + "loss": 0.1742, + "step": 16315 + }, + { + "epoch": 2.303868963569613, + "grad_norm": 2.5719603607150887, + "learning_rate": 2.6937217371876077e-06, + "loss": 0.118, + "step": 16316 + }, + { + "epoch": 2.304010166619599, + "grad_norm": 2.9971678122443444, + "learning_rate": 2.6926809622937144e-06, + "loss": 0.177, + "step": 16317 + }, + { + "epoch": 2.304151369669585, + "grad_norm": 3.7108348805070484, + "learning_rate": 2.691640357218759e-06, + "loss": 0.1727, + "step": 16318 + }, + { + "epoch": 2.3042925727195707, + "grad_norm": 3.6008631641139357, + "learning_rate": 2.6905999219869295e-06, + "loss": 0.1751, + "step": 16319 + }, + { + "epoch": 2.3044337757695565, + "grad_norm": 3.271888543225656, + "learning_rate": 2.6895596566223937e-06, + "loss": 0.142, + "step": 16320 + }, + { + "epoch": 2.3045749788195424, + "grad_norm": 4.237449796420313, + "learning_rate": 2.6885195611493386e-06, + "loss": 0.1974, + "step": 16321 + }, + { + "epoch": 2.3047161818695283, + "grad_norm": 3.4041579421590926, + "learning_rate": 2.687479635591931e-06, + "loss": 0.1921, + "step": 16322 + }, + { + "epoch": 2.304857384919514, + "grad_norm": 4.276518103815932, + "learning_rate": 2.6864398799743383e-06, + "loss": 0.1964, + "step": 16323 + }, + { + "epoch": 2.3049985879695, + "grad_norm": 3.079307241495362, + "learning_rate": 2.6854002943207245e-06, + "loss": 0.1667, + "step": 16324 + }, + { + "epoch": 2.305139791019486, + "grad_norm": 3.265125951603585, + "learning_rate": 2.684360878655249e-06, + "loss": 0.143, + "step": 16325 + }, + { + "epoch": 2.305280994069472, + "grad_norm": 2.2690044428292286, + "learning_rate": 2.683321633002064e-06, + "loss": 0.1401, + "step": 16326 + }, + { + "epoch": 2.3054221971194577, + "grad_norm": 2.867190300571336, + "learning_rate": 2.6822825573853274e-06, + "loss": 0.1218, + "step": 16327 + }, + { + "epoch": 2.3055634001694436, + "grad_norm": 3.140173919278724, + "learning_rate": 2.681243651829187e-06, + "loss": 0.1482, + "step": 16328 + }, + { + "epoch": 2.3057046032194295, + "grad_norm": 2.9888965276828277, + "learning_rate": 2.68020491635778e-06, + "loss": 0.1371, + "step": 16329 + }, + { + "epoch": 2.3058458062694154, + "grad_norm": 3.282351357703986, + "learning_rate": 2.6791663509952504e-06, + "loss": 0.1373, + "step": 16330 + }, + { + "epoch": 2.3059870093194013, + "grad_norm": 3.3488256157947833, + "learning_rate": 2.67812795576573e-06, + "loss": 0.1531, + "step": 16331 + }, + { + "epoch": 2.306128212369387, + "grad_norm": 3.0436834687399714, + "learning_rate": 2.677089730693356e-06, + "loss": 0.1342, + "step": 16332 + }, + { + "epoch": 2.306269415419373, + "grad_norm": 2.74116989589514, + "learning_rate": 2.676051675802256e-06, + "loss": 0.1313, + "step": 16333 + }, + { + "epoch": 2.306410618469359, + "grad_norm": 3.8512713728237924, + "learning_rate": 2.675013791116551e-06, + "loss": 0.1552, + "step": 16334 + }, + { + "epoch": 2.306551821519345, + "grad_norm": 2.719487535085552, + "learning_rate": 2.673976076660362e-06, + "loss": 0.1326, + "step": 16335 + }, + { + "epoch": 2.3066930245693307, + "grad_norm": 3.1976942336979284, + "learning_rate": 2.672938532457807e-06, + "loss": 0.1579, + "step": 16336 + }, + { + "epoch": 2.3068342276193166, + "grad_norm": 2.880000793837095, + "learning_rate": 2.671901158532991e-06, + "loss": 0.1426, + "step": 16337 + }, + { + "epoch": 2.3069754306693024, + "grad_norm": 3.0831687425643004, + "learning_rate": 2.670863954910031e-06, + "loss": 0.1498, + "step": 16338 + }, + { + "epoch": 2.3071166337192883, + "grad_norm": 2.4369189496723775, + "learning_rate": 2.6698269216130278e-06, + "loss": 0.1205, + "step": 16339 + }, + { + "epoch": 2.307257836769274, + "grad_norm": 3.397082851110855, + "learning_rate": 2.66879005866608e-06, + "loss": 0.1457, + "step": 16340 + }, + { + "epoch": 2.30739903981926, + "grad_norm": 3.4232342652321748, + "learning_rate": 2.667753366093285e-06, + "loss": 0.1677, + "step": 16341 + }, + { + "epoch": 2.307540242869246, + "grad_norm": 2.5533226066482007, + "learning_rate": 2.6667168439187364e-06, + "loss": 0.0921, + "step": 16342 + }, + { + "epoch": 2.307681445919232, + "grad_norm": 3.2384776434456195, + "learning_rate": 2.665680492166518e-06, + "loss": 0.1451, + "step": 16343 + }, + { + "epoch": 2.3078226489692177, + "grad_norm": 3.721855309065821, + "learning_rate": 2.6646443108607234e-06, + "loss": 0.1699, + "step": 16344 + }, + { + "epoch": 2.3079638520192036, + "grad_norm": 3.306363863911269, + "learning_rate": 2.6636083000254244e-06, + "loss": 0.1208, + "step": 16345 + }, + { + "epoch": 2.3081050550691895, + "grad_norm": 3.3923634535983704, + "learning_rate": 2.662572459684699e-06, + "loss": 0.1492, + "step": 16346 + }, + { + "epoch": 2.3082462581191754, + "grad_norm": 3.9518864569435106, + "learning_rate": 2.661536789862622e-06, + "loss": 0.2081, + "step": 16347 + }, + { + "epoch": 2.3083874611691613, + "grad_norm": 3.4427703625110966, + "learning_rate": 2.6605012905832605e-06, + "loss": 0.1158, + "step": 16348 + }, + { + "epoch": 2.308528664219147, + "grad_norm": 3.050607564513119, + "learning_rate": 2.6594659618706764e-06, + "loss": 0.1288, + "step": 16349 + }, + { + "epoch": 2.308669867269133, + "grad_norm": 3.637359562099576, + "learning_rate": 2.658430803748936e-06, + "loss": 0.158, + "step": 16350 + }, + { + "epoch": 2.308811070319119, + "grad_norm": 3.1845754867021143, + "learning_rate": 2.6573958162420933e-06, + "loss": 0.1442, + "step": 16351 + }, + { + "epoch": 2.308952273369105, + "grad_norm": 3.1031455724272203, + "learning_rate": 2.656360999374201e-06, + "loss": 0.1285, + "step": 16352 + }, + { + "epoch": 2.3090934764190907, + "grad_norm": 3.184603584345711, + "learning_rate": 2.6553263531693096e-06, + "loss": 0.1458, + "step": 16353 + }, + { + "epoch": 2.3092346794690766, + "grad_norm": 3.9334057563022498, + "learning_rate": 2.654291877651457e-06, + "loss": 0.1942, + "step": 16354 + }, + { + "epoch": 2.3093758825190625, + "grad_norm": 2.616577803580007, + "learning_rate": 2.653257572844692e-06, + "loss": 0.1052, + "step": 16355 + }, + { + "epoch": 2.3095170855690483, + "grad_norm": 2.996515919509376, + "learning_rate": 2.652223438773047e-06, + "loss": 0.1271, + "step": 16356 + }, + { + "epoch": 2.309658288619034, + "grad_norm": 3.252248724070849, + "learning_rate": 2.651189475460556e-06, + "loss": 0.1542, + "step": 16357 + }, + { + "epoch": 2.30979949166902, + "grad_norm": 3.0989639011684393, + "learning_rate": 2.6501556829312492e-06, + "loss": 0.1742, + "step": 16358 + }, + { + "epoch": 2.309940694719006, + "grad_norm": 2.8644413977787155, + "learning_rate": 2.6491220612091494e-06, + "loss": 0.1185, + "step": 16359 + }, + { + "epoch": 2.310081897768992, + "grad_norm": 3.6178228549283613, + "learning_rate": 2.648088610318278e-06, + "loss": 0.1788, + "step": 16360 + }, + { + "epoch": 2.3102231008189777, + "grad_norm": 3.614776071568069, + "learning_rate": 2.6470553302826528e-06, + "loss": 0.1711, + "step": 16361 + }, + { + "epoch": 2.3103643038689636, + "grad_norm": 3.598478003392561, + "learning_rate": 2.646022221126285e-06, + "loss": 0.1943, + "step": 16362 + }, + { + "epoch": 2.3105055069189495, + "grad_norm": 2.9720049986047874, + "learning_rate": 2.644989282873187e-06, + "loss": 0.119, + "step": 16363 + }, + { + "epoch": 2.3106467099689354, + "grad_norm": 2.8199790678096277, + "learning_rate": 2.6439565155473602e-06, + "loss": 0.1204, + "step": 16364 + }, + { + "epoch": 2.3107879130189213, + "grad_norm": 2.851508641450763, + "learning_rate": 2.642923919172807e-06, + "loss": 0.1451, + "step": 16365 + }, + { + "epoch": 2.310929116068907, + "grad_norm": 2.9949301656832734, + "learning_rate": 2.6418914937735228e-06, + "loss": 0.1526, + "step": 16366 + }, + { + "epoch": 2.311070319118893, + "grad_norm": 3.5990676047779084, + "learning_rate": 2.6408592393735043e-06, + "loss": 0.1832, + "step": 16367 + }, + { + "epoch": 2.311211522168879, + "grad_norm": 3.507330424449203, + "learning_rate": 2.63982715599674e-06, + "loss": 0.177, + "step": 16368 + }, + { + "epoch": 2.311352725218865, + "grad_norm": 3.3929005113402115, + "learning_rate": 2.6387952436672136e-06, + "loss": 0.1651, + "step": 16369 + }, + { + "epoch": 2.3114939282688507, + "grad_norm": 3.813598094774005, + "learning_rate": 2.637763502408909e-06, + "loss": 0.1855, + "step": 16370 + }, + { + "epoch": 2.3116351313188366, + "grad_norm": 3.0349285210696713, + "learning_rate": 2.636731932245796e-06, + "loss": 0.1391, + "step": 16371 + }, + { + "epoch": 2.3117763343688225, + "grad_norm": 2.647653023374486, + "learning_rate": 2.6357005332018557e-06, + "loss": 0.1465, + "step": 16372 + }, + { + "epoch": 2.3119175374188083, + "grad_norm": 3.4645334532934298, + "learning_rate": 2.634669305301054e-06, + "loss": 0.1359, + "step": 16373 + }, + { + "epoch": 2.3120587404687942, + "grad_norm": 3.131557614636249, + "learning_rate": 2.6336382485673574e-06, + "loss": 0.1568, + "step": 16374 + }, + { + "epoch": 2.31219994351878, + "grad_norm": 2.6967363805764215, + "learning_rate": 2.6326073630247263e-06, + "loss": 0.1371, + "step": 16375 + }, + { + "epoch": 2.312341146568766, + "grad_norm": 3.2815187881778245, + "learning_rate": 2.631576648697118e-06, + "loss": 0.153, + "step": 16376 + }, + { + "epoch": 2.312482349618752, + "grad_norm": 4.144321720768993, + "learning_rate": 2.630546105608488e-06, + "loss": 0.1657, + "step": 16377 + }, + { + "epoch": 2.3126235526687378, + "grad_norm": 2.550874788071806, + "learning_rate": 2.6295157337827827e-06, + "loss": 0.1401, + "step": 16378 + }, + { + "epoch": 2.3127647557187236, + "grad_norm": 3.2807492469931834, + "learning_rate": 2.6284855332439487e-06, + "loss": 0.2081, + "step": 16379 + }, + { + "epoch": 2.3129059587687095, + "grad_norm": 2.778662753952818, + "learning_rate": 2.6274555040159265e-06, + "loss": 0.1407, + "step": 16380 + }, + { + "epoch": 2.3130471618186954, + "grad_norm": 4.453177977532432, + "learning_rate": 2.6264256461226555e-06, + "loss": 0.2164, + "step": 16381 + }, + { + "epoch": 2.3131883648686813, + "grad_norm": 3.1399122716964722, + "learning_rate": 2.625395959588067e-06, + "loss": 0.1471, + "step": 16382 + }, + { + "epoch": 2.313329567918667, + "grad_norm": 3.9735541025727983, + "learning_rate": 2.6243664444360907e-06, + "loss": 0.1597, + "step": 16383 + }, + { + "epoch": 2.313470770968653, + "grad_norm": 3.4836870807083966, + "learning_rate": 2.623337100690654e-06, + "loss": 0.1516, + "step": 16384 + }, + { + "epoch": 2.313611974018639, + "grad_norm": 3.6462400979621896, + "learning_rate": 2.622307928375678e-06, + "loss": 0.1854, + "step": 16385 + }, + { + "epoch": 2.313753177068625, + "grad_norm": 3.6793319647404914, + "learning_rate": 2.6212789275150796e-06, + "loss": 0.1995, + "step": 16386 + }, + { + "epoch": 2.3138943801186107, + "grad_norm": 4.009696522176874, + "learning_rate": 2.620250098132775e-06, + "loss": 0.177, + "step": 16387 + }, + { + "epoch": 2.3140355831685966, + "grad_norm": 2.7956257095416848, + "learning_rate": 2.6192214402526662e-06, + "loss": 0.1627, + "step": 16388 + }, + { + "epoch": 2.3141767862185825, + "grad_norm": 3.2535714573962413, + "learning_rate": 2.618192953898665e-06, + "loss": 0.1332, + "step": 16389 + }, + { + "epoch": 2.3143179892685684, + "grad_norm": 3.023711359002308, + "learning_rate": 2.6171646390946727e-06, + "loss": 0.1582, + "step": 16390 + }, + { + "epoch": 2.3144591923185542, + "grad_norm": 3.4527189551117887, + "learning_rate": 2.6161364958645853e-06, + "loss": 0.1684, + "step": 16391 + }, + { + "epoch": 2.31460039536854, + "grad_norm": 3.753535233111927, + "learning_rate": 2.6151085242322973e-06, + "loss": 0.2014, + "step": 16392 + }, + { + "epoch": 2.314741598418526, + "grad_norm": 3.22188541255434, + "learning_rate": 2.614080724221697e-06, + "loss": 0.1949, + "step": 16393 + }, + { + "epoch": 2.314882801468512, + "grad_norm": 3.4738336985084084, + "learning_rate": 2.613053095856671e-06, + "loss": 0.1777, + "step": 16394 + }, + { + "epoch": 2.3150240045184978, + "grad_norm": 3.362570328077147, + "learning_rate": 2.612025639161102e-06, + "loss": 0.1529, + "step": 16395 + }, + { + "epoch": 2.3151652075684837, + "grad_norm": 3.776681570408574, + "learning_rate": 2.6109983541588655e-06, + "loss": 0.213, + "step": 16396 + }, + { + "epoch": 2.3153064106184695, + "grad_norm": 3.240059536412947, + "learning_rate": 2.6099712408738363e-06, + "loss": 0.1491, + "step": 16397 + }, + { + "epoch": 2.3154476136684554, + "grad_norm": 2.8735924747500934, + "learning_rate": 2.6089442993298854e-06, + "loss": 0.1523, + "step": 16398 + }, + { + "epoch": 2.3155888167184413, + "grad_norm": 3.694435909146389, + "learning_rate": 2.607917529550875e-06, + "loss": 0.1964, + "step": 16399 + }, + { + "epoch": 2.315730019768427, + "grad_norm": 4.150072716422877, + "learning_rate": 2.606890931560667e-06, + "loss": 0.139, + "step": 16400 + }, + { + "epoch": 2.315871222818413, + "grad_norm": 3.3472483519326555, + "learning_rate": 2.6058645053831234e-06, + "loss": 0.1777, + "step": 16401 + }, + { + "epoch": 2.316012425868399, + "grad_norm": 3.146305988925464, + "learning_rate": 2.6048382510420954e-06, + "loss": 0.144, + "step": 16402 + }, + { + "epoch": 2.316153628918385, + "grad_norm": 2.8640163148330595, + "learning_rate": 2.603812168561434e-06, + "loss": 0.1541, + "step": 16403 + }, + { + "epoch": 2.3162948319683707, + "grad_norm": 2.890223631466561, + "learning_rate": 2.6027862579649856e-06, + "loss": 0.1385, + "step": 16404 + }, + { + "epoch": 2.3164360350183566, + "grad_norm": 3.1677013109681966, + "learning_rate": 2.6017605192765828e-06, + "loss": 0.1884, + "step": 16405 + }, + { + "epoch": 2.3165772380683425, + "grad_norm": 3.385777501407841, + "learning_rate": 2.6007349525200754e-06, + "loss": 0.1604, + "step": 16406 + }, + { + "epoch": 2.3167184411183284, + "grad_norm": 2.7142619248731417, + "learning_rate": 2.599709557719291e-06, + "loss": 0.0979, + "step": 16407 + }, + { + "epoch": 2.3168596441683142, + "grad_norm": 3.228369039886004, + "learning_rate": 2.5986843348980607e-06, + "loss": 0.1339, + "step": 16408 + }, + { + "epoch": 2.3170008472183, + "grad_norm": 2.5327960897650073, + "learning_rate": 2.5976592840802105e-06, + "loss": 0.1301, + "step": 16409 + }, + { + "epoch": 2.317142050268286, + "grad_norm": 3.0167833220466056, + "learning_rate": 2.596634405289562e-06, + "loss": 0.1266, + "step": 16410 + }, + { + "epoch": 2.317283253318272, + "grad_norm": 3.625084750290672, + "learning_rate": 2.5956096985499315e-06, + "loss": 0.1585, + "step": 16411 + }, + { + "epoch": 2.317424456368258, + "grad_norm": 2.62455777127269, + "learning_rate": 2.594585163885135e-06, + "loss": 0.128, + "step": 16412 + }, + { + "epoch": 2.3175656594182437, + "grad_norm": 3.5956733710535684, + "learning_rate": 2.5935608013189808e-06, + "loss": 0.1799, + "step": 16413 + }, + { + "epoch": 2.3177068624682295, + "grad_norm": 2.9012610323332164, + "learning_rate": 2.592536610875275e-06, + "loss": 0.1445, + "step": 16414 + }, + { + "epoch": 2.3178480655182154, + "grad_norm": 3.8016298296309103, + "learning_rate": 2.5915125925778184e-06, + "loss": 0.1891, + "step": 16415 + }, + { + "epoch": 2.3179892685682013, + "grad_norm": 3.3366905535707434, + "learning_rate": 2.5904887464504115e-06, + "loss": 0.1598, + "step": 16416 + }, + { + "epoch": 2.318130471618187, + "grad_norm": 2.940637547529869, + "learning_rate": 2.589465072516841e-06, + "loss": 0.1724, + "step": 16417 + }, + { + "epoch": 2.318271674668173, + "grad_norm": 3.6323651311567464, + "learning_rate": 2.588441570800907e-06, + "loss": 0.1899, + "step": 16418 + }, + { + "epoch": 2.318412877718159, + "grad_norm": 2.6642314341877187, + "learning_rate": 2.5874182413263893e-06, + "loss": 0.1421, + "step": 16419 + }, + { + "epoch": 2.3185540807681444, + "grad_norm": 3.1254573362157534, + "learning_rate": 2.5863950841170704e-06, + "loss": 0.1647, + "step": 16420 + }, + { + "epoch": 2.3186952838181303, + "grad_norm": 3.8476599094236823, + "learning_rate": 2.58537209919673e-06, + "loss": 0.1751, + "step": 16421 + }, + { + "epoch": 2.318836486868116, + "grad_norm": 4.523103611399567, + "learning_rate": 2.5843492865891383e-06, + "loss": 0.1728, + "step": 16422 + }, + { + "epoch": 2.318977689918102, + "grad_norm": 3.4659438701668273, + "learning_rate": 2.5833266463180628e-06, + "loss": 0.1749, + "step": 16423 + }, + { + "epoch": 2.319118892968088, + "grad_norm": 3.2818464367596203, + "learning_rate": 2.5823041784072767e-06, + "loss": 0.1382, + "step": 16424 + }, + { + "epoch": 2.319260096018074, + "grad_norm": 3.684851044143204, + "learning_rate": 2.5812818828805376e-06, + "loss": 0.1871, + "step": 16425 + }, + { + "epoch": 2.3194012990680597, + "grad_norm": 3.9861144513427647, + "learning_rate": 2.580259759761604e-06, + "loss": 0.1837, + "step": 16426 + }, + { + "epoch": 2.3195425021180456, + "grad_norm": 3.898380848062007, + "learning_rate": 2.5792378090742285e-06, + "loss": 0.2097, + "step": 16427 + }, + { + "epoch": 2.3196837051680315, + "grad_norm": 2.8749080304804995, + "learning_rate": 2.578216030842162e-06, + "loss": 0.153, + "step": 16428 + }, + { + "epoch": 2.3198249082180173, + "grad_norm": 2.949391624724403, + "learning_rate": 2.57719442508915e-06, + "loss": 0.1485, + "step": 16429 + }, + { + "epoch": 2.3199661112680032, + "grad_norm": 3.1638270022944304, + "learning_rate": 2.576172991838933e-06, + "loss": 0.1977, + "step": 16430 + }, + { + "epoch": 2.320107314317989, + "grad_norm": 3.2471614146865013, + "learning_rate": 2.5751517311152505e-06, + "loss": 0.1212, + "step": 16431 + }, + { + "epoch": 2.320248517367975, + "grad_norm": 4.160561755448681, + "learning_rate": 2.5741306429418355e-06, + "loss": 0.2176, + "step": 16432 + }, + { + "epoch": 2.320389720417961, + "grad_norm": 2.6986422340256615, + "learning_rate": 2.5731097273424167e-06, + "loss": 0.1257, + "step": 16433 + }, + { + "epoch": 2.3205309234679468, + "grad_norm": 3.790095881024734, + "learning_rate": 2.5720889843407205e-06, + "loss": 0.2, + "step": 16434 + }, + { + "epoch": 2.3206721265179326, + "grad_norm": 2.7970784709338323, + "learning_rate": 2.5710684139604645e-06, + "loss": 0.1536, + "step": 16435 + }, + { + "epoch": 2.3208133295679185, + "grad_norm": 4.033082085654044, + "learning_rate": 2.5700480162253748e-06, + "loss": 0.1957, + "step": 16436 + }, + { + "epoch": 2.3209545326179044, + "grad_norm": 3.162844010322644, + "learning_rate": 2.56902779115916e-06, + "loss": 0.1314, + "step": 16437 + }, + { + "epoch": 2.3210957356678903, + "grad_norm": 2.983024541893772, + "learning_rate": 2.568007738785533e-06, + "loss": 0.1517, + "step": 16438 + }, + { + "epoch": 2.321236938717876, + "grad_norm": 2.380366618519906, + "learning_rate": 2.5669878591281928e-06, + "loss": 0.1101, + "step": 16439 + }, + { + "epoch": 2.321378141767862, + "grad_norm": 3.225538374023024, + "learning_rate": 2.5659681522108428e-06, + "loss": 0.1566, + "step": 16440 + }, + { + "epoch": 2.321519344817848, + "grad_norm": 3.518298506648807, + "learning_rate": 2.5649486180571846e-06, + "loss": 0.1487, + "step": 16441 + }, + { + "epoch": 2.321660547867834, + "grad_norm": 3.3667398050820747, + "learning_rate": 2.5639292566909103e-06, + "loss": 0.1437, + "step": 16442 + }, + { + "epoch": 2.3218017509178197, + "grad_norm": 2.8638485510038825, + "learning_rate": 2.562910068135709e-06, + "loss": 0.1623, + "step": 16443 + }, + { + "epoch": 2.3219429539678056, + "grad_norm": 2.6500912379172767, + "learning_rate": 2.5618910524152652e-06, + "loss": 0.1308, + "step": 16444 + }, + { + "epoch": 2.3220841570177915, + "grad_norm": 3.7097068185901145, + "learning_rate": 2.56087220955326e-06, + "loss": 0.196, + "step": 16445 + }, + { + "epoch": 2.3222253600677774, + "grad_norm": 2.8254862257494016, + "learning_rate": 2.5598535395733735e-06, + "loss": 0.1247, + "step": 16446 + }, + { + "epoch": 2.3223665631177632, + "grad_norm": 2.79060646066435, + "learning_rate": 2.558835042499277e-06, + "loss": 0.1237, + "step": 16447 + }, + { + "epoch": 2.322507766167749, + "grad_norm": 3.241697220954079, + "learning_rate": 2.557816718354641e-06, + "loss": 0.1532, + "step": 16448 + }, + { + "epoch": 2.322648969217735, + "grad_norm": 4.078071514505492, + "learning_rate": 2.5567985671631303e-06, + "loss": 0.1734, + "step": 16449 + }, + { + "epoch": 2.322790172267721, + "grad_norm": 3.0657060507610114, + "learning_rate": 2.5557805889484055e-06, + "loss": 0.1332, + "step": 16450 + }, + { + "epoch": 2.3229313753177068, + "grad_norm": 2.6627494299789003, + "learning_rate": 2.554762783734126e-06, + "loss": 0.1168, + "step": 16451 + }, + { + "epoch": 2.3230725783676927, + "grad_norm": 3.107679582905108, + "learning_rate": 2.5537451515439406e-06, + "loss": 0.1409, + "step": 16452 + }, + { + "epoch": 2.3232137814176785, + "grad_norm": 3.28681615917673, + "learning_rate": 2.5527276924015053e-06, + "loss": 0.153, + "step": 16453 + }, + { + "epoch": 2.3233549844676644, + "grad_norm": 3.0292876274254485, + "learning_rate": 2.551710406330462e-06, + "loss": 0.1707, + "step": 16454 + }, + { + "epoch": 2.3234961875176503, + "grad_norm": 2.8708502362951087, + "learning_rate": 2.5506932933544546e-06, + "loss": 0.136, + "step": 16455 + }, + { + "epoch": 2.323637390567636, + "grad_norm": 2.9434675061479125, + "learning_rate": 2.549676353497116e-06, + "loss": 0.1298, + "step": 16456 + }, + { + "epoch": 2.323778593617622, + "grad_norm": 3.2042831825592835, + "learning_rate": 2.5486595867820786e-06, + "loss": 0.1336, + "step": 16457 + }, + { + "epoch": 2.323919796667608, + "grad_norm": 3.34863361236074, + "learning_rate": 2.547642993232976e-06, + "loss": 0.1396, + "step": 16458 + }, + { + "epoch": 2.324060999717594, + "grad_norm": 2.9851152675092276, + "learning_rate": 2.546626572873433e-06, + "loss": 0.1528, + "step": 16459 + }, + { + "epoch": 2.3242022027675797, + "grad_norm": 4.272883973913513, + "learning_rate": 2.5456103257270693e-06, + "loss": 0.1722, + "step": 16460 + }, + { + "epoch": 2.3243434058175656, + "grad_norm": 2.96529372617313, + "learning_rate": 2.5445942518175017e-06, + "loss": 0.1602, + "step": 16461 + }, + { + "epoch": 2.3244846088675515, + "grad_norm": 3.594112981138169, + "learning_rate": 2.5435783511683444e-06, + "loss": 0.1755, + "step": 16462 + }, + { + "epoch": 2.3246258119175374, + "grad_norm": 3.081157238779159, + "learning_rate": 2.5425626238032063e-06, + "loss": 0.1657, + "step": 16463 + }, + { + "epoch": 2.3247670149675232, + "grad_norm": 3.2102579707230596, + "learning_rate": 2.5415470697456923e-06, + "loss": 0.1482, + "step": 16464 + }, + { + "epoch": 2.324908218017509, + "grad_norm": 2.673610361620119, + "learning_rate": 2.540531689019403e-06, + "loss": 0.133, + "step": 16465 + }, + { + "epoch": 2.325049421067495, + "grad_norm": 3.1159168194896023, + "learning_rate": 2.5395164816479357e-06, + "loss": 0.1396, + "step": 16466 + }, + { + "epoch": 2.325190624117481, + "grad_norm": 3.0107994753628664, + "learning_rate": 2.538501447654883e-06, + "loss": 0.1868, + "step": 16467 + }, + { + "epoch": 2.325331827167467, + "grad_norm": 3.1920579968288916, + "learning_rate": 2.5374865870638354e-06, + "loss": 0.1687, + "step": 16468 + }, + { + "epoch": 2.3254730302174527, + "grad_norm": 3.387583164670588, + "learning_rate": 2.536471899898373e-06, + "loss": 0.1791, + "step": 16469 + }, + { + "epoch": 2.3256142332674385, + "grad_norm": 2.751476675097827, + "learning_rate": 2.535457386182083e-06, + "loss": 0.1203, + "step": 16470 + }, + { + "epoch": 2.3257554363174244, + "grad_norm": 3.091689074845375, + "learning_rate": 2.5344430459385405e-06, + "loss": 0.1725, + "step": 16471 + }, + { + "epoch": 2.3258966393674103, + "grad_norm": 3.6892545230713383, + "learning_rate": 2.533428879191321e-06, + "loss": 0.1524, + "step": 16472 + }, + { + "epoch": 2.326037842417396, + "grad_norm": 3.6783665610010967, + "learning_rate": 2.532414885963985e-06, + "loss": 0.1609, + "step": 16473 + }, + { + "epoch": 2.326179045467382, + "grad_norm": 3.837406288610304, + "learning_rate": 2.5314010662801e-06, + "loss": 0.1754, + "step": 16474 + }, + { + "epoch": 2.326320248517368, + "grad_norm": 3.092336931697847, + "learning_rate": 2.5303874201632318e-06, + "loss": 0.1229, + "step": 16475 + }, + { + "epoch": 2.326461451567354, + "grad_norm": 2.9974326411848833, + "learning_rate": 2.529373947636934e-06, + "loss": 0.1337, + "step": 16476 + }, + { + "epoch": 2.3266026546173397, + "grad_norm": 3.457607005198991, + "learning_rate": 2.5283606487247593e-06, + "loss": 0.1454, + "step": 16477 + }, + { + "epoch": 2.3267438576673256, + "grad_norm": 2.8847055228465246, + "learning_rate": 2.5273475234502565e-06, + "loss": 0.1266, + "step": 16478 + }, + { + "epoch": 2.3268850607173115, + "grad_norm": 2.5110811497891703, + "learning_rate": 2.52633457183697e-06, + "loss": 0.1392, + "step": 16479 + }, + { + "epoch": 2.3270262637672974, + "grad_norm": 3.8968157889469235, + "learning_rate": 2.5253217939084407e-06, + "loss": 0.2032, + "step": 16480 + }, + { + "epoch": 2.3271674668172833, + "grad_norm": 2.351836597252122, + "learning_rate": 2.5243091896882044e-06, + "loss": 0.1291, + "step": 16481 + }, + { + "epoch": 2.327308669867269, + "grad_norm": 3.4933974672248334, + "learning_rate": 2.5232967591997946e-06, + "loss": 0.1727, + "step": 16482 + }, + { + "epoch": 2.327449872917255, + "grad_norm": 2.9178322037775355, + "learning_rate": 2.5222845024667387e-06, + "loss": 0.1246, + "step": 16483 + }, + { + "epoch": 2.327591075967241, + "grad_norm": 2.859289921747814, + "learning_rate": 2.5212724195125616e-06, + "loss": 0.127, + "step": 16484 + }, + { + "epoch": 2.327732279017227, + "grad_norm": 2.882985425477126, + "learning_rate": 2.5202605103607835e-06, + "loss": 0.1214, + "step": 16485 + }, + { + "epoch": 2.3278734820672127, + "grad_norm": 2.6793884444513014, + "learning_rate": 2.519248775034918e-06, + "loss": 0.1402, + "step": 16486 + }, + { + "epoch": 2.3280146851171986, + "grad_norm": 2.911587990592806, + "learning_rate": 2.5182372135584845e-06, + "loss": 0.1396, + "step": 16487 + }, + { + "epoch": 2.3281558881671844, + "grad_norm": 2.69294614063943, + "learning_rate": 2.5172258259549854e-06, + "loss": 0.1456, + "step": 16488 + }, + { + "epoch": 2.3282970912171703, + "grad_norm": 3.0806633235901044, + "learning_rate": 2.51621461224793e-06, + "loss": 0.1615, + "step": 16489 + }, + { + "epoch": 2.328438294267156, + "grad_norm": 3.289429309932711, + "learning_rate": 2.5152035724608117e-06, + "loss": 0.1418, + "step": 16490 + }, + { + "epoch": 2.328579497317142, + "grad_norm": 2.959839271503791, + "learning_rate": 2.514192706617128e-06, + "loss": 0.1293, + "step": 16491 + }, + { + "epoch": 2.328720700367128, + "grad_norm": 2.7496895741671237, + "learning_rate": 2.513182014740375e-06, + "loss": 0.1421, + "step": 16492 + }, + { + "epoch": 2.328861903417114, + "grad_norm": 3.9452892724020496, + "learning_rate": 2.512171496854039e-06, + "loss": 0.1837, + "step": 16493 + }, + { + "epoch": 2.3290031064670997, + "grad_norm": 2.654354611518729, + "learning_rate": 2.511161152981604e-06, + "loss": 0.1506, + "step": 16494 + }, + { + "epoch": 2.3291443095170856, + "grad_norm": 3.0328830689270228, + "learning_rate": 2.510150983146549e-06, + "loss": 0.1308, + "step": 16495 + }, + { + "epoch": 2.3292855125670715, + "grad_norm": 3.8549278202495048, + "learning_rate": 2.5091409873723506e-06, + "loss": 0.1786, + "step": 16496 + }, + { + "epoch": 2.3294267156170574, + "grad_norm": 2.907983529307993, + "learning_rate": 2.5081311656824803e-06, + "loss": 0.1259, + "step": 16497 + }, + { + "epoch": 2.3295679186670433, + "grad_norm": 3.1004713950456737, + "learning_rate": 2.5071215181004073e-06, + "loss": 0.1702, + "step": 16498 + }, + { + "epoch": 2.329709121717029, + "grad_norm": 3.4572335442927677, + "learning_rate": 2.5061120446495935e-06, + "loss": 0.1421, + "step": 16499 + }, + { + "epoch": 2.329850324767015, + "grad_norm": 2.534071190538496, + "learning_rate": 2.505102745353499e-06, + "loss": 0.0994, + "step": 16500 + }, + { + "epoch": 2.329991527817001, + "grad_norm": 3.021538377141395, + "learning_rate": 2.5040936202355802e-06, + "loss": 0.1567, + "step": 16501 + }, + { + "epoch": 2.330132730866987, + "grad_norm": 3.8478691041602144, + "learning_rate": 2.503084669319289e-06, + "loss": 0.165, + "step": 16502 + }, + { + "epoch": 2.3302739339169727, + "grad_norm": 3.3804615949282666, + "learning_rate": 2.50207589262807e-06, + "loss": 0.1078, + "step": 16503 + }, + { + "epoch": 2.3304151369669586, + "grad_norm": 2.9548918299353555, + "learning_rate": 2.5010672901853704e-06, + "loss": 0.1657, + "step": 16504 + }, + { + "epoch": 2.3305563400169444, + "grad_norm": 2.736421115640989, + "learning_rate": 2.5000588620146326e-06, + "loss": 0.1236, + "step": 16505 + }, + { + "epoch": 2.3306975430669303, + "grad_norm": 3.6564320649823805, + "learning_rate": 2.499050608139284e-06, + "loss": 0.1718, + "step": 16506 + }, + { + "epoch": 2.330838746116916, + "grad_norm": 3.0949297295631504, + "learning_rate": 2.4980425285827614e-06, + "loss": 0.1457, + "step": 16507 + }, + { + "epoch": 2.330979949166902, + "grad_norm": 4.118557755510834, + "learning_rate": 2.4970346233684863e-06, + "loss": 0.1803, + "step": 16508 + }, + { + "epoch": 2.331121152216888, + "grad_norm": 3.050454004761832, + "learning_rate": 2.4960268925198894e-06, + "loss": 0.1441, + "step": 16509 + }, + { + "epoch": 2.331262355266874, + "grad_norm": 2.8739019298655193, + "learning_rate": 2.4950193360603868e-06, + "loss": 0.1612, + "step": 16510 + }, + { + "epoch": 2.3314035583168597, + "grad_norm": 2.8547235004220872, + "learning_rate": 2.4940119540133943e-06, + "loss": 0.1349, + "step": 16511 + }, + { + "epoch": 2.3315447613668456, + "grad_norm": 2.5900418194657155, + "learning_rate": 2.493004746402322e-06, + "loss": 0.087, + "step": 16512 + }, + { + "epoch": 2.3316859644168315, + "grad_norm": 2.8248535712875356, + "learning_rate": 2.491997713250577e-06, + "loss": 0.1206, + "step": 16513 + }, + { + "epoch": 2.3318271674668174, + "grad_norm": 3.1426631052514953, + "learning_rate": 2.490990854581563e-06, + "loss": 0.1478, + "step": 16514 + }, + { + "epoch": 2.3319683705168033, + "grad_norm": 3.289241403571953, + "learning_rate": 2.489984170418679e-06, + "loss": 0.1756, + "step": 16515 + }, + { + "epoch": 2.332109573566789, + "grad_norm": 3.694447586799479, + "learning_rate": 2.488977660785319e-06, + "loss": 0.1589, + "step": 16516 + }, + { + "epoch": 2.332250776616775, + "grad_norm": 3.2179673842811334, + "learning_rate": 2.4879713257048743e-06, + "loss": 0.1423, + "step": 16517 + }, + { + "epoch": 2.332391979666761, + "grad_norm": 3.284648967000756, + "learning_rate": 2.486965165200733e-06, + "loss": 0.1859, + "step": 16518 + }, + { + "epoch": 2.332533182716747, + "grad_norm": 2.658455622729808, + "learning_rate": 2.4859591792962754e-06, + "loss": 0.1272, + "step": 16519 + }, + { + "epoch": 2.3326743857667327, + "grad_norm": 2.9458215247635215, + "learning_rate": 2.4849533680148787e-06, + "loss": 0.1203, + "step": 16520 + }, + { + "epoch": 2.3328155888167186, + "grad_norm": 4.041647663048573, + "learning_rate": 2.483947731379923e-06, + "loss": 0.1815, + "step": 16521 + }, + { + "epoch": 2.3329567918667045, + "grad_norm": 3.0737443440292047, + "learning_rate": 2.4829422694147796e-06, + "loss": 0.1423, + "step": 16522 + }, + { + "epoch": 2.3330979949166903, + "grad_norm": 2.667578138844263, + "learning_rate": 2.4819369821428085e-06, + "loss": 0.1215, + "step": 16523 + }, + { + "epoch": 2.3332391979666762, + "grad_norm": 3.80941077009797, + "learning_rate": 2.480931869587375e-06, + "loss": 0.1935, + "step": 16524 + }, + { + "epoch": 2.333380401016662, + "grad_norm": 3.529374039841733, + "learning_rate": 2.4799269317718377e-06, + "loss": 0.1684, + "step": 16525 + }, + { + "epoch": 2.333521604066648, + "grad_norm": 2.873313247022884, + "learning_rate": 2.4789221687195473e-06, + "loss": 0.167, + "step": 16526 + }, + { + "epoch": 2.333662807116634, + "grad_norm": 3.487601642856519, + "learning_rate": 2.4779175804538613e-06, + "loss": 0.1686, + "step": 16527 + }, + { + "epoch": 2.3338040101666198, + "grad_norm": 2.6357737909256738, + "learning_rate": 2.4769131669981217e-06, + "loss": 0.1116, + "step": 16528 + }, + { + "epoch": 2.3339452132166056, + "grad_norm": 3.827574220568226, + "learning_rate": 2.475908928375671e-06, + "loss": 0.1844, + "step": 16529 + }, + { + "epoch": 2.3340864162665915, + "grad_norm": 3.2102102557773753, + "learning_rate": 2.4749048646098486e-06, + "loss": 0.1495, + "step": 16530 + }, + { + "epoch": 2.3342276193165774, + "grad_norm": 3.4073848972601106, + "learning_rate": 2.4739009757239853e-06, + "loss": 0.1685, + "step": 16531 + }, + { + "epoch": 2.3343688223665633, + "grad_norm": 4.186464274163126, + "learning_rate": 2.472897261741415e-06, + "loss": 0.1876, + "step": 16532 + }, + { + "epoch": 2.334510025416549, + "grad_norm": 3.6508384041255906, + "learning_rate": 2.47189372268546e-06, + "loss": 0.1527, + "step": 16533 + }, + { + "epoch": 2.334651228466535, + "grad_norm": 2.869233319162469, + "learning_rate": 2.470890358579444e-06, + "loss": 0.1179, + "step": 16534 + }, + { + "epoch": 2.334792431516521, + "grad_norm": 4.240485557982178, + "learning_rate": 2.469887169446685e-06, + "loss": 0.2037, + "step": 16535 + }, + { + "epoch": 2.334933634566507, + "grad_norm": 3.3180778477754758, + "learning_rate": 2.468884155310497e-06, + "loss": 0.1422, + "step": 16536 + }, + { + "epoch": 2.3350748376164927, + "grad_norm": 3.916534792935674, + "learning_rate": 2.4678813161941883e-06, + "loss": 0.1948, + "step": 16537 + }, + { + "epoch": 2.3352160406664786, + "grad_norm": 3.2030305782821324, + "learning_rate": 2.466878652121061e-06, + "loss": 0.1434, + "step": 16538 + }, + { + "epoch": 2.335357243716464, + "grad_norm": 2.5607439757271755, + "learning_rate": 2.465876163114427e-06, + "loss": 0.1315, + "step": 16539 + }, + { + "epoch": 2.33549844676645, + "grad_norm": 3.343455180028635, + "learning_rate": 2.4648738491975745e-06, + "loss": 0.1568, + "step": 16540 + }, + { + "epoch": 2.335639649816436, + "grad_norm": 3.1983977486863475, + "learning_rate": 2.463871710393799e-06, + "loss": 0.1524, + "step": 16541 + }, + { + "epoch": 2.3357808528664217, + "grad_norm": 2.989297112298677, + "learning_rate": 2.4628697467263916e-06, + "loss": 0.1437, + "step": 16542 + }, + { + "epoch": 2.3359220559164076, + "grad_norm": 3.3647711591525056, + "learning_rate": 2.461867958218632e-06, + "loss": 0.1628, + "step": 16543 + }, + { + "epoch": 2.3360632589663934, + "grad_norm": 3.0044997569299166, + "learning_rate": 2.46086634489381e-06, + "loss": 0.1535, + "step": 16544 + }, + { + "epoch": 2.3362044620163793, + "grad_norm": 3.2693348997836735, + "learning_rate": 2.459864906775197e-06, + "loss": 0.2016, + "step": 16545 + }, + { + "epoch": 2.336345665066365, + "grad_norm": 2.981277535063776, + "learning_rate": 2.458863643886067e-06, + "loss": 0.1456, + "step": 16546 + }, + { + "epoch": 2.336486868116351, + "grad_norm": 3.3900866101662372, + "learning_rate": 2.4578625562496896e-06, + "loss": 0.1555, + "step": 16547 + }, + { + "epoch": 2.336628071166337, + "grad_norm": 3.4991319543809443, + "learning_rate": 2.4568616438893287e-06, + "loss": 0.1643, + "step": 16548 + }, + { + "epoch": 2.336769274216323, + "grad_norm": 3.4018544580317003, + "learning_rate": 2.455860906828247e-06, + "loss": 0.1747, + "step": 16549 + }, + { + "epoch": 2.3369104772663087, + "grad_norm": 2.9110265415148864, + "learning_rate": 2.454860345089698e-06, + "loss": 0.1514, + "step": 16550 + }, + { + "epoch": 2.3370516803162946, + "grad_norm": 3.1134507919253096, + "learning_rate": 2.4538599586969367e-06, + "loss": 0.1831, + "step": 16551 + }, + { + "epoch": 2.3371928833662805, + "grad_norm": 3.1770611669860194, + "learning_rate": 2.452859747673212e-06, + "loss": 0.1674, + "step": 16552 + }, + { + "epoch": 2.3373340864162664, + "grad_norm": 3.1066391961167534, + "learning_rate": 2.4518597120417665e-06, + "loss": 0.166, + "step": 16553 + }, + { + "epoch": 2.3374752894662523, + "grad_norm": 4.481824898006849, + "learning_rate": 2.450859851825842e-06, + "loss": 0.2016, + "step": 16554 + }, + { + "epoch": 2.337616492516238, + "grad_norm": 2.829831441655758, + "learning_rate": 2.449860167048671e-06, + "loss": 0.1297, + "step": 16555 + }, + { + "epoch": 2.337757695566224, + "grad_norm": 3.188729150078145, + "learning_rate": 2.448860657733495e-06, + "loss": 0.1734, + "step": 16556 + }, + { + "epoch": 2.33789889861621, + "grad_norm": 3.593987130151595, + "learning_rate": 2.4478613239035333e-06, + "loss": 0.1492, + "step": 16557 + }, + { + "epoch": 2.338040101666196, + "grad_norm": 3.217982043014439, + "learning_rate": 2.4468621655820125e-06, + "loss": 0.1315, + "step": 16558 + }, + { + "epoch": 2.3381813047161817, + "grad_norm": 3.320377481625558, + "learning_rate": 2.4458631827921543e-06, + "loss": 0.1249, + "step": 16559 + }, + { + "epoch": 2.3383225077661676, + "grad_norm": 3.8329020272087906, + "learning_rate": 2.4448643755571687e-06, + "loss": 0.1982, + "step": 16560 + }, + { + "epoch": 2.3384637108161535, + "grad_norm": 4.039504830310931, + "learning_rate": 2.4438657439002765e-06, + "loss": 0.1842, + "step": 16561 + }, + { + "epoch": 2.3386049138661393, + "grad_norm": 3.5339873136152304, + "learning_rate": 2.4428672878446803e-06, + "loss": 0.1757, + "step": 16562 + }, + { + "epoch": 2.338746116916125, + "grad_norm": 3.166911841498467, + "learning_rate": 2.441869007413585e-06, + "loss": 0.1484, + "step": 16563 + }, + { + "epoch": 2.338887319966111, + "grad_norm": 3.439817063640929, + "learning_rate": 2.4408709026301903e-06, + "loss": 0.1261, + "step": 16564 + }, + { + "epoch": 2.339028523016097, + "grad_norm": 3.078681715728318, + "learning_rate": 2.4398729735176907e-06, + "loss": 0.1652, + "step": 16565 + }, + { + "epoch": 2.339169726066083, + "grad_norm": 3.06932197485248, + "learning_rate": 2.438875220099278e-06, + "loss": 0.1739, + "step": 16566 + }, + { + "epoch": 2.3393109291160687, + "grad_norm": 4.662164107228458, + "learning_rate": 2.4378776423981398e-06, + "loss": 0.1685, + "step": 16567 + }, + { + "epoch": 2.3394521321660546, + "grad_norm": 2.92490531564976, + "learning_rate": 2.43688024043746e-06, + "loss": 0.1399, + "step": 16568 + }, + { + "epoch": 2.3395933352160405, + "grad_norm": 3.500153577893397, + "learning_rate": 2.4358830142404166e-06, + "loss": 0.1716, + "step": 16569 + }, + { + "epoch": 2.3397345382660264, + "grad_norm": 2.9972560004876425, + "learning_rate": 2.4348859638301857e-06, + "loss": 0.1281, + "step": 16570 + }, + { + "epoch": 2.3398757413160123, + "grad_norm": 4.310056007915561, + "learning_rate": 2.4338890892299383e-06, + "loss": 0.1762, + "step": 16571 + }, + { + "epoch": 2.340016944365998, + "grad_norm": 3.098136607355675, + "learning_rate": 2.4328923904628376e-06, + "loss": 0.156, + "step": 16572 + }, + { + "epoch": 2.340158147415984, + "grad_norm": 3.3087486635333203, + "learning_rate": 2.4318958675520553e-06, + "loss": 0.1322, + "step": 16573 + }, + { + "epoch": 2.34029935046597, + "grad_norm": 3.3678099704468707, + "learning_rate": 2.430899520520741e-06, + "loss": 0.1726, + "step": 16574 + }, + { + "epoch": 2.340440553515956, + "grad_norm": 3.4524527610445, + "learning_rate": 2.4299033493920543e-06, + "loss": 0.1554, + "step": 16575 + }, + { + "epoch": 2.3405817565659417, + "grad_norm": 3.602610267448821, + "learning_rate": 2.428907354189144e-06, + "loss": 0.1729, + "step": 16576 + }, + { + "epoch": 2.3407229596159276, + "grad_norm": 3.604474011203195, + "learning_rate": 2.4279115349351546e-06, + "loss": 0.1954, + "step": 16577 + }, + { + "epoch": 2.3408641626659135, + "grad_norm": 3.7291353000982945, + "learning_rate": 2.4269158916532332e-06, + "loss": 0.1755, + "step": 16578 + }, + { + "epoch": 2.3410053657158993, + "grad_norm": 3.649894958722056, + "learning_rate": 2.4259204243665157e-06, + "loss": 0.2109, + "step": 16579 + }, + { + "epoch": 2.3411465687658852, + "grad_norm": 2.7950855341051213, + "learning_rate": 2.424925133098137e-06, + "loss": 0.1157, + "step": 16580 + }, + { + "epoch": 2.341287771815871, + "grad_norm": 3.0345121900505707, + "learning_rate": 2.4239300178712265e-06, + "loss": 0.1792, + "step": 16581 + }, + { + "epoch": 2.341428974865857, + "grad_norm": 3.03045270370139, + "learning_rate": 2.422935078708911e-06, + "loss": 0.1588, + "step": 16582 + }, + { + "epoch": 2.341570177915843, + "grad_norm": 2.832531245571322, + "learning_rate": 2.4219403156343123e-06, + "loss": 0.1662, + "step": 16583 + }, + { + "epoch": 2.3417113809658288, + "grad_norm": 2.9034395610127453, + "learning_rate": 2.4209457286705475e-06, + "loss": 0.1434, + "step": 16584 + }, + { + "epoch": 2.3418525840158146, + "grad_norm": 3.448512474951474, + "learning_rate": 2.4199513178407306e-06, + "loss": 0.1993, + "step": 16585 + }, + { + "epoch": 2.3419937870658005, + "grad_norm": 3.803943184354228, + "learning_rate": 2.418957083167972e-06, + "loss": 0.152, + "step": 16586 + }, + { + "epoch": 2.3421349901157864, + "grad_norm": 2.565057292524917, + "learning_rate": 2.417963024675376e-06, + "loss": 0.114, + "step": 16587 + }, + { + "epoch": 2.3422761931657723, + "grad_norm": 3.9042882144797035, + "learning_rate": 2.4169691423860454e-06, + "loss": 0.1759, + "step": 16588 + }, + { + "epoch": 2.342417396215758, + "grad_norm": 2.8044060496096983, + "learning_rate": 2.4159754363230745e-06, + "loss": 0.1352, + "step": 16589 + }, + { + "epoch": 2.342558599265744, + "grad_norm": 3.6356439952734116, + "learning_rate": 2.414981906509565e-06, + "loss": 0.2297, + "step": 16590 + }, + { + "epoch": 2.34269980231573, + "grad_norm": 3.473421090058503, + "learning_rate": 2.413988552968597e-06, + "loss": 0.1814, + "step": 16591 + }, + { + "epoch": 2.342841005365716, + "grad_norm": 3.74932791425919, + "learning_rate": 2.4129953757232584e-06, + "loss": 0.1659, + "step": 16592 + }, + { + "epoch": 2.3429822084157017, + "grad_norm": 3.3188668468929894, + "learning_rate": 2.4120023747966314e-06, + "loss": 0.1607, + "step": 16593 + }, + { + "epoch": 2.3431234114656876, + "grad_norm": 2.5480852223318604, + "learning_rate": 2.411009550211789e-06, + "loss": 0.1373, + "step": 16594 + }, + { + "epoch": 2.3432646145156735, + "grad_norm": 3.5204785045908005, + "learning_rate": 2.4100169019918095e-06, + "loss": 0.1769, + "step": 16595 + }, + { + "epoch": 2.3434058175656594, + "grad_norm": 3.175146724564063, + "learning_rate": 2.409024430159761e-06, + "loss": 0.1264, + "step": 16596 + }, + { + "epoch": 2.3435470206156452, + "grad_norm": 3.294022770184792, + "learning_rate": 2.4080321347387046e-06, + "loss": 0.1724, + "step": 16597 + }, + { + "epoch": 2.343688223665631, + "grad_norm": 3.222489863851626, + "learning_rate": 2.4070400157517036e-06, + "loss": 0.1696, + "step": 16598 + }, + { + "epoch": 2.343829426715617, + "grad_norm": 2.9726430405008135, + "learning_rate": 2.406048073221814e-06, + "loss": 0.1166, + "step": 16599 + }, + { + "epoch": 2.343970629765603, + "grad_norm": 3.3208264696955636, + "learning_rate": 2.4050563071720867e-06, + "loss": 0.1321, + "step": 16600 + }, + { + "epoch": 2.3441118328155888, + "grad_norm": 3.4581316712527945, + "learning_rate": 2.4040647176255717e-06, + "loss": 0.1581, + "step": 16601 + }, + { + "epoch": 2.3442530358655747, + "grad_norm": 4.008794991202921, + "learning_rate": 2.403073304605311e-06, + "loss": 0.2211, + "step": 16602 + }, + { + "epoch": 2.3443942389155605, + "grad_norm": 3.0608931162305844, + "learning_rate": 2.402082068134347e-06, + "loss": 0.1214, + "step": 16603 + }, + { + "epoch": 2.3445354419655464, + "grad_norm": 2.3348591645265397, + "learning_rate": 2.401091008235714e-06, + "loss": 0.1306, + "step": 16604 + }, + { + "epoch": 2.3446766450155323, + "grad_norm": 3.347525126758426, + "learning_rate": 2.400100124932444e-06, + "loss": 0.1892, + "step": 16605 + }, + { + "epoch": 2.344817848065518, + "grad_norm": 2.7269688445431366, + "learning_rate": 2.399109418247563e-06, + "loss": 0.1224, + "step": 16606 + }, + { + "epoch": 2.344959051115504, + "grad_norm": 3.1756162212093226, + "learning_rate": 2.3981188882041005e-06, + "loss": 0.1838, + "step": 16607 + }, + { + "epoch": 2.34510025416549, + "grad_norm": 3.5462293554996283, + "learning_rate": 2.3971285348250705e-06, + "loss": 0.1719, + "step": 16608 + }, + { + "epoch": 2.345241457215476, + "grad_norm": 2.848473369226521, + "learning_rate": 2.3961383581334897e-06, + "loss": 0.1681, + "step": 16609 + }, + { + "epoch": 2.3453826602654617, + "grad_norm": 3.1394107245903853, + "learning_rate": 2.3951483581523694e-06, + "loss": 0.1309, + "step": 16610 + }, + { + "epoch": 2.3455238633154476, + "grad_norm": 3.26245157482, + "learning_rate": 2.3941585349047146e-06, + "loss": 0.1645, + "step": 16611 + }, + { + "epoch": 2.3456650663654335, + "grad_norm": 3.1654875368667583, + "learning_rate": 2.393168888413533e-06, + "loss": 0.1532, + "step": 16612 + }, + { + "epoch": 2.3458062694154194, + "grad_norm": 3.941693657191256, + "learning_rate": 2.392179418701822e-06, + "loss": 0.2157, + "step": 16613 + }, + { + "epoch": 2.3459474724654052, + "grad_norm": 3.86797988631536, + "learning_rate": 2.3911901257925752e-06, + "loss": 0.2009, + "step": 16614 + }, + { + "epoch": 2.346088675515391, + "grad_norm": 3.1862774778825966, + "learning_rate": 2.3902010097087834e-06, + "loss": 0.1885, + "step": 16615 + }, + { + "epoch": 2.346229878565377, + "grad_norm": 3.2327824101503584, + "learning_rate": 2.389212070473438e-06, + "loss": 0.1674, + "step": 16616 + }, + { + "epoch": 2.346371081615363, + "grad_norm": 3.7925712040934125, + "learning_rate": 2.388223308109511e-06, + "loss": 0.2214, + "step": 16617 + }, + { + "epoch": 2.346512284665349, + "grad_norm": 3.4998147652875464, + "learning_rate": 2.3872347226399895e-06, + "loss": 0.1864, + "step": 16618 + }, + { + "epoch": 2.3466534877153347, + "grad_norm": 2.7499813477225166, + "learning_rate": 2.3862463140878443e-06, + "loss": 0.162, + "step": 16619 + }, + { + "epoch": 2.3467946907653205, + "grad_norm": 2.5572899571543988, + "learning_rate": 2.3852580824760487e-06, + "loss": 0.1223, + "step": 16620 + }, + { + "epoch": 2.3469358938153064, + "grad_norm": 3.4136451492428357, + "learning_rate": 2.3842700278275656e-06, + "loss": 0.1202, + "step": 16621 + }, + { + "epoch": 2.3470770968652923, + "grad_norm": 2.503281566434876, + "learning_rate": 2.383282150165358e-06, + "loss": 0.1417, + "step": 16622 + }, + { + "epoch": 2.347218299915278, + "grad_norm": 3.15305953250733, + "learning_rate": 2.3822944495123814e-06, + "loss": 0.154, + "step": 16623 + }, + { + "epoch": 2.347359502965264, + "grad_norm": 3.066299530875791, + "learning_rate": 2.381306925891598e-06, + "loss": 0.1457, + "step": 16624 + }, + { + "epoch": 2.34750070601525, + "grad_norm": 2.6453775273740763, + "learning_rate": 2.380319579325947e-06, + "loss": 0.1576, + "step": 16625 + }, + { + "epoch": 2.347641909065236, + "grad_norm": 3.4606002356100527, + "learning_rate": 2.3793324098383796e-06, + "loss": 0.1887, + "step": 16626 + }, + { + "epoch": 2.3477831121152217, + "grad_norm": 3.5277398877317303, + "learning_rate": 2.3783454174518362e-06, + "loss": 0.1885, + "step": 16627 + }, + { + "epoch": 2.3479243151652076, + "grad_norm": 2.888798441463067, + "learning_rate": 2.3773586021892524e-06, + "loss": 0.1173, + "step": 16628 + }, + { + "epoch": 2.3480655182151935, + "grad_norm": 2.8162296064778802, + "learning_rate": 2.3763719640735603e-06, + "loss": 0.1182, + "step": 16629 + }, + { + "epoch": 2.3482067212651794, + "grad_norm": 2.8841728080694145, + "learning_rate": 2.375385503127695e-06, + "loss": 0.1309, + "step": 16630 + }, + { + "epoch": 2.3483479243151653, + "grad_norm": 3.3802075816301445, + "learning_rate": 2.3743992193745778e-06, + "loss": 0.1724, + "step": 16631 + }, + { + "epoch": 2.348489127365151, + "grad_norm": 2.687398088693144, + "learning_rate": 2.3734131128371283e-06, + "loss": 0.1497, + "step": 16632 + }, + { + "epoch": 2.348630330415137, + "grad_norm": 4.636465219397901, + "learning_rate": 2.3724271835382682e-06, + "loss": 0.2241, + "step": 16633 + }, + { + "epoch": 2.348771533465123, + "grad_norm": 3.8390331457788838, + "learning_rate": 2.3714414315009005e-06, + "loss": 0.1323, + "step": 16634 + }, + { + "epoch": 2.348912736515109, + "grad_norm": 2.5822990595413744, + "learning_rate": 2.370455856747942e-06, + "loss": 0.1411, + "step": 16635 + }, + { + "epoch": 2.3490539395650947, + "grad_norm": 3.463292827111964, + "learning_rate": 2.3694704593022942e-06, + "loss": 0.1933, + "step": 16636 + }, + { + "epoch": 2.3491951426150806, + "grad_norm": 3.6340970982519853, + "learning_rate": 2.3684852391868586e-06, + "loss": 0.1706, + "step": 16637 + }, + { + "epoch": 2.3493363456650664, + "grad_norm": 3.560038650204616, + "learning_rate": 2.367500196424529e-06, + "loss": 0.2171, + "step": 16638 + }, + { + "epoch": 2.3494775487150523, + "grad_norm": 2.905858217558989, + "learning_rate": 2.3665153310381994e-06, + "loss": 0.1538, + "step": 16639 + }, + { + "epoch": 2.349618751765038, + "grad_norm": 2.512841239871109, + "learning_rate": 2.3655306430507563e-06, + "loss": 0.1452, + "step": 16640 + }, + { + "epoch": 2.349759954815024, + "grad_norm": 2.7191356279700205, + "learning_rate": 2.364546132485085e-06, + "loss": 0.1075, + "step": 16641 + }, + { + "epoch": 2.34990115786501, + "grad_norm": 2.518868744419248, + "learning_rate": 2.3635617993640625e-06, + "loss": 0.1092, + "step": 16642 + }, + { + "epoch": 2.350042360914996, + "grad_norm": 3.4741598068897317, + "learning_rate": 2.3625776437105674e-06, + "loss": 0.1809, + "step": 16643 + }, + { + "epoch": 2.3501835639649817, + "grad_norm": 3.3994506491930263, + "learning_rate": 2.3615936655474693e-06, + "loss": 0.1559, + "step": 16644 + }, + { + "epoch": 2.3503247670149676, + "grad_norm": 3.1450527318004053, + "learning_rate": 2.360609864897635e-06, + "loss": 0.1611, + "step": 16645 + }, + { + "epoch": 2.3504659700649535, + "grad_norm": 3.057692533434694, + "learning_rate": 2.3596262417839256e-06, + "loss": 0.1265, + "step": 16646 + }, + { + "epoch": 2.3506071731149394, + "grad_norm": 2.570076215118854, + "learning_rate": 2.3586427962292046e-06, + "loss": 0.1232, + "step": 16647 + }, + { + "epoch": 2.3507483761649253, + "grad_norm": 3.093022008073182, + "learning_rate": 2.357659528256326e-06, + "loss": 0.1378, + "step": 16648 + }, + { + "epoch": 2.350889579214911, + "grad_norm": 3.0584504503702106, + "learning_rate": 2.3566764378881392e-06, + "loss": 0.1537, + "step": 16649 + }, + { + "epoch": 2.351030782264897, + "grad_norm": 2.7159109409179063, + "learning_rate": 2.3556935251474946e-06, + "loss": 0.1337, + "step": 16650 + }, + { + "epoch": 2.351171985314883, + "grad_norm": 3.12146019371224, + "learning_rate": 2.354710790057225e-06, + "loss": 0.1319, + "step": 16651 + }, + { + "epoch": 2.351313188364869, + "grad_norm": 2.975046790948436, + "learning_rate": 2.3537282326401777e-06, + "loss": 0.1638, + "step": 16652 + }, + { + "epoch": 2.3514543914148547, + "grad_norm": 2.4660571084825857, + "learning_rate": 2.352745852919184e-06, + "loss": 0.135, + "step": 16653 + }, + { + "epoch": 2.3515955944648406, + "grad_norm": 3.3047830967455356, + "learning_rate": 2.351763650917074e-06, + "loss": 0.1786, + "step": 16654 + }, + { + "epoch": 2.3517367975148264, + "grad_norm": 3.138636760757721, + "learning_rate": 2.350781626656674e-06, + "loss": 0.1252, + "step": 16655 + }, + { + "epoch": 2.3518780005648123, + "grad_norm": 2.685413982261762, + "learning_rate": 2.3497997801608054e-06, + "loss": 0.1133, + "step": 16656 + }, + { + "epoch": 2.352019203614798, + "grad_norm": 3.402290601550504, + "learning_rate": 2.348818111452287e-06, + "loss": 0.153, + "step": 16657 + }, + { + "epoch": 2.352160406664784, + "grad_norm": 2.4690867644021073, + "learning_rate": 2.3478366205539295e-06, + "loss": 0.1169, + "step": 16658 + }, + { + "epoch": 2.35230160971477, + "grad_norm": 4.6218114745324925, + "learning_rate": 2.346855307488546e-06, + "loss": 0.2045, + "step": 16659 + }, + { + "epoch": 2.352442812764756, + "grad_norm": 3.61000026408447, + "learning_rate": 2.345874172278939e-06, + "loss": 0.1093, + "step": 16660 + }, + { + "epoch": 2.3525840158147417, + "grad_norm": 4.127194737828726, + "learning_rate": 2.3448932149479107e-06, + "loss": 0.2156, + "step": 16661 + }, + { + "epoch": 2.3527252188647276, + "grad_norm": 3.082974617770854, + "learning_rate": 2.3439124355182584e-06, + "loss": 0.1364, + "step": 16662 + }, + { + "epoch": 2.3528664219147135, + "grad_norm": 3.7364178413406064, + "learning_rate": 2.3429318340127717e-06, + "loss": 0.1545, + "step": 16663 + }, + { + "epoch": 2.3530076249646994, + "grad_norm": 3.1461949329827887, + "learning_rate": 2.341951410454245e-06, + "loss": 0.1696, + "step": 16664 + }, + { + "epoch": 2.3531488280146853, + "grad_norm": 2.8644655661195304, + "learning_rate": 2.340971164865461e-06, + "loss": 0.126, + "step": 16665 + }, + { + "epoch": 2.353290031064671, + "grad_norm": 3.0838767382371137, + "learning_rate": 2.3399910972692017e-06, + "loss": 0.1733, + "step": 16666 + }, + { + "epoch": 2.353431234114657, + "grad_norm": 2.9588600779669156, + "learning_rate": 2.339011207688239e-06, + "loss": 0.1192, + "step": 16667 + }, + { + "epoch": 2.353572437164643, + "grad_norm": 2.811973582361199, + "learning_rate": 2.338031496145343e-06, + "loss": 0.1279, + "step": 16668 + }, + { + "epoch": 2.353713640214629, + "grad_norm": 2.670454618637573, + "learning_rate": 2.3370519626632904e-06, + "loss": 0.1035, + "step": 16669 + }, + { + "epoch": 2.3538548432646147, + "grad_norm": 2.617087461016208, + "learning_rate": 2.33607260726484e-06, + "loss": 0.0981, + "step": 16670 + }, + { + "epoch": 2.3539960463146006, + "grad_norm": 2.920268041675882, + "learning_rate": 2.335093429972752e-06, + "loss": 0.1584, + "step": 16671 + }, + { + "epoch": 2.3541372493645865, + "grad_norm": 3.2378570906990354, + "learning_rate": 2.334114430809784e-06, + "loss": 0.1071, + "step": 16672 + }, + { + "epoch": 2.3542784524145723, + "grad_norm": 2.564055895149629, + "learning_rate": 2.3331356097986848e-06, + "loss": 0.1503, + "step": 16673 + }, + { + "epoch": 2.3544196554645582, + "grad_norm": 3.4253511107625405, + "learning_rate": 2.3321569669622034e-06, + "loss": 0.148, + "step": 16674 + }, + { + "epoch": 2.354560858514544, + "grad_norm": 2.8541535218961873, + "learning_rate": 2.3311785023230825e-06, + "loss": 0.1312, + "step": 16675 + }, + { + "epoch": 2.35470206156453, + "grad_norm": 3.51530082540038, + "learning_rate": 2.3302002159040613e-06, + "loss": 0.1446, + "step": 16676 + }, + { + "epoch": 2.354843264614516, + "grad_norm": 3.2601604902415198, + "learning_rate": 2.3292221077278744e-06, + "loss": 0.153, + "step": 16677 + }, + { + "epoch": 2.3549844676645018, + "grad_norm": 2.869569567007493, + "learning_rate": 2.328244177817254e-06, + "loss": 0.1369, + "step": 16678 + }, + { + "epoch": 2.3551256707144876, + "grad_norm": 3.263088299865168, + "learning_rate": 2.3272664261949253e-06, + "loss": 0.1475, + "step": 16679 + }, + { + "epoch": 2.3552668737644735, + "grad_norm": 2.5873052417090174, + "learning_rate": 2.326288852883607e-06, + "loss": 0.1205, + "step": 16680 + }, + { + "epoch": 2.3554080768144594, + "grad_norm": 3.1899124304130146, + "learning_rate": 2.3253114579060266e-06, + "loss": 0.1477, + "step": 16681 + }, + { + "epoch": 2.3555492798644453, + "grad_norm": 3.274804817949863, + "learning_rate": 2.3243342412848923e-06, + "loss": 0.1682, + "step": 16682 + }, + { + "epoch": 2.355690482914431, + "grad_norm": 3.0463938556398413, + "learning_rate": 2.3233572030429187e-06, + "loss": 0.151, + "step": 16683 + }, + { + "epoch": 2.355831685964417, + "grad_norm": 3.062068376514406, + "learning_rate": 2.3223803432028046e-06, + "loss": 0.1366, + "step": 16684 + }, + { + "epoch": 2.355972889014403, + "grad_norm": 3.765162928821967, + "learning_rate": 2.3214036617872536e-06, + "loss": 0.1624, + "step": 16685 + }, + { + "epoch": 2.356114092064389, + "grad_norm": 4.36960310945819, + "learning_rate": 2.3204271588189685e-06, + "loss": 0.2004, + "step": 16686 + }, + { + "epoch": 2.3562552951143747, + "grad_norm": 2.8512451549550377, + "learning_rate": 2.319450834320639e-06, + "loss": 0.1189, + "step": 16687 + }, + { + "epoch": 2.3563964981643606, + "grad_norm": 3.4423166055682928, + "learning_rate": 2.3184746883149556e-06, + "loss": 0.1642, + "step": 16688 + }, + { + "epoch": 2.3565377012143465, + "grad_norm": 3.217575098854573, + "learning_rate": 2.3174987208246038e-06, + "loss": 0.141, + "step": 16689 + }, + { + "epoch": 2.3566789042643324, + "grad_norm": 3.1845278937044843, + "learning_rate": 2.3165229318722627e-06, + "loss": 0.1622, + "step": 16690 + }, + { + "epoch": 2.3568201073143182, + "grad_norm": 3.8978800224577665, + "learning_rate": 2.31554732148061e-06, + "loss": 0.1833, + "step": 16691 + }, + { + "epoch": 2.356961310364304, + "grad_norm": 3.2537622388035787, + "learning_rate": 2.3145718896723204e-06, + "loss": 0.1585, + "step": 16692 + }, + { + "epoch": 2.3571025134142896, + "grad_norm": 3.3961136143053783, + "learning_rate": 2.3135966364700603e-06, + "loss": 0.1479, + "step": 16693 + }, + { + "epoch": 2.3572437164642754, + "grad_norm": 2.964405352577542, + "learning_rate": 2.312621561896494e-06, + "loss": 0.1324, + "step": 16694 + }, + { + "epoch": 2.3573849195142613, + "grad_norm": 2.9759674844534567, + "learning_rate": 2.311646665974284e-06, + "loss": 0.1515, + "step": 16695 + }, + { + "epoch": 2.357526122564247, + "grad_norm": 3.6417743798910207, + "learning_rate": 2.3106719487260843e-06, + "loss": 0.1434, + "step": 16696 + }, + { + "epoch": 2.357667325614233, + "grad_norm": 3.167411292920211, + "learning_rate": 2.309697410174545e-06, + "loss": 0.1211, + "step": 16697 + }, + { + "epoch": 2.357808528664219, + "grad_norm": 3.6106210681205795, + "learning_rate": 2.3087230503423207e-06, + "loss": 0.1581, + "step": 16698 + }, + { + "epoch": 2.357949731714205, + "grad_norm": 3.690799445797164, + "learning_rate": 2.30774886925205e-06, + "loss": 0.1885, + "step": 16699 + }, + { + "epoch": 2.3580909347641907, + "grad_norm": 3.4603217483896413, + "learning_rate": 2.306774866926377e-06, + "loss": 0.1823, + "step": 16700 + }, + { + "epoch": 2.3582321378141766, + "grad_norm": 3.4846246306200603, + "learning_rate": 2.3058010433879297e-06, + "loss": 0.2054, + "step": 16701 + }, + { + "epoch": 2.3583733408641625, + "grad_norm": 3.859154502171222, + "learning_rate": 2.304827398659342e-06, + "loss": 0.1923, + "step": 16702 + }, + { + "epoch": 2.3585145439141484, + "grad_norm": 3.149821175537311, + "learning_rate": 2.303853932763244e-06, + "loss": 0.1583, + "step": 16703 + }, + { + "epoch": 2.3586557469641343, + "grad_norm": 2.9617161232596247, + "learning_rate": 2.3028806457222565e-06, + "loss": 0.1461, + "step": 16704 + }, + { + "epoch": 2.35879695001412, + "grad_norm": 3.141985952882657, + "learning_rate": 2.3019075375589995e-06, + "loss": 0.1529, + "step": 16705 + }, + { + "epoch": 2.358938153064106, + "grad_norm": 3.566826133937719, + "learning_rate": 2.300934608296086e-06, + "loss": 0.1577, + "step": 16706 + }, + { + "epoch": 2.359079356114092, + "grad_norm": 2.9670539565602283, + "learning_rate": 2.299961857956128e-06, + "loss": 0.1237, + "step": 16707 + }, + { + "epoch": 2.359220559164078, + "grad_norm": 3.6668071331968215, + "learning_rate": 2.2989892865617313e-06, + "loss": 0.2091, + "step": 16708 + }, + { + "epoch": 2.3593617622140637, + "grad_norm": 2.990665666709609, + "learning_rate": 2.2980168941354973e-06, + "loss": 0.1597, + "step": 16709 + }, + { + "epoch": 2.3595029652640496, + "grad_norm": 3.34084985189003, + "learning_rate": 2.2970446807000237e-06, + "loss": 0.1302, + "step": 16710 + }, + { + "epoch": 2.3596441683140355, + "grad_norm": 2.5365759978189826, + "learning_rate": 2.296072646277906e-06, + "loss": 0.1275, + "step": 16711 + }, + { + "epoch": 2.3597853713640213, + "grad_norm": 3.3346695832173427, + "learning_rate": 2.2951007908917334e-06, + "loss": 0.1555, + "step": 16712 + }, + { + "epoch": 2.359926574414007, + "grad_norm": 2.9571388246806043, + "learning_rate": 2.2941291145640898e-06, + "loss": 0.1807, + "step": 16713 + }, + { + "epoch": 2.360067777463993, + "grad_norm": 3.5764015834354788, + "learning_rate": 2.293157617317555e-06, + "loss": 0.1868, + "step": 16714 + }, + { + "epoch": 2.360208980513979, + "grad_norm": 2.764716020827102, + "learning_rate": 2.292186299174712e-06, + "loss": 0.1561, + "step": 16715 + }, + { + "epoch": 2.360350183563965, + "grad_norm": 3.1090639997268554, + "learning_rate": 2.291215160158131e-06, + "loss": 0.1608, + "step": 16716 + }, + { + "epoch": 2.3604913866139507, + "grad_norm": 3.550344346168288, + "learning_rate": 2.2902442002903823e-06, + "loss": 0.1564, + "step": 16717 + }, + { + "epoch": 2.3606325896639366, + "grad_norm": 3.585780853375674, + "learning_rate": 2.289273419594027e-06, + "loss": 0.1527, + "step": 16718 + }, + { + "epoch": 2.3607737927139225, + "grad_norm": 3.2535969945152323, + "learning_rate": 2.2883028180916265e-06, + "loss": 0.1665, + "step": 16719 + }, + { + "epoch": 2.3609149957639084, + "grad_norm": 2.8620029125672697, + "learning_rate": 2.287332395805737e-06, + "loss": 0.1128, + "step": 16720 + }, + { + "epoch": 2.3610561988138943, + "grad_norm": 3.174831221469931, + "learning_rate": 2.286362152758913e-06, + "loss": 0.1335, + "step": 16721 + }, + { + "epoch": 2.36119740186388, + "grad_norm": 2.904956965956408, + "learning_rate": 2.2853920889737015e-06, + "loss": 0.1511, + "step": 16722 + }, + { + "epoch": 2.361338604913866, + "grad_norm": 2.7158183361761647, + "learning_rate": 2.2844222044726463e-06, + "loss": 0.1418, + "step": 16723 + }, + { + "epoch": 2.361479807963852, + "grad_norm": 2.5152687948818757, + "learning_rate": 2.2834524992782867e-06, + "loss": 0.1568, + "step": 16724 + }, + { + "epoch": 2.361621011013838, + "grad_norm": 3.5973077838860066, + "learning_rate": 2.282482973413159e-06, + "loss": 0.1482, + "step": 16725 + }, + { + "epoch": 2.3617622140638237, + "grad_norm": 2.946663072075912, + "learning_rate": 2.281513626899794e-06, + "loss": 0.1319, + "step": 16726 + }, + { + "epoch": 2.3619034171138096, + "grad_norm": 2.914028124592764, + "learning_rate": 2.2805444597607195e-06, + "loss": 0.111, + "step": 16727 + }, + { + "epoch": 2.3620446201637955, + "grad_norm": 3.5525910267244405, + "learning_rate": 2.279575472018457e-06, + "loss": 0.1807, + "step": 16728 + }, + { + "epoch": 2.3621858232137813, + "grad_norm": 3.045541499727895, + "learning_rate": 2.278606663695526e-06, + "loss": 0.1495, + "step": 16729 + }, + { + "epoch": 2.3623270262637672, + "grad_norm": 2.8383180107553527, + "learning_rate": 2.2776380348144435e-06, + "loss": 0.1415, + "step": 16730 + }, + { + "epoch": 2.362468229313753, + "grad_norm": 3.9236594155479154, + "learning_rate": 2.276669585397716e-06, + "loss": 0.1815, + "step": 16731 + }, + { + "epoch": 2.362609432363739, + "grad_norm": 3.2538822995069636, + "learning_rate": 2.2757013154678496e-06, + "loss": 0.1688, + "step": 16732 + }, + { + "epoch": 2.362750635413725, + "grad_norm": 3.1496179962404147, + "learning_rate": 2.274733225047352e-06, + "loss": 0.1582, + "step": 16733 + }, + { + "epoch": 2.3628918384637108, + "grad_norm": 4.024662395414331, + "learning_rate": 2.2737653141587203e-06, + "loss": 0.2048, + "step": 16734 + }, + { + "epoch": 2.3630330415136966, + "grad_norm": 4.063873570807576, + "learning_rate": 2.2727975828244443e-06, + "loss": 0.1381, + "step": 16735 + }, + { + "epoch": 2.3631742445636825, + "grad_norm": 3.1398909984290304, + "learning_rate": 2.2718300310670136e-06, + "loss": 0.1402, + "step": 16736 + }, + { + "epoch": 2.3633154476136684, + "grad_norm": 2.598271885989341, + "learning_rate": 2.2708626589089146e-06, + "loss": 0.1256, + "step": 16737 + }, + { + "epoch": 2.3634566506636543, + "grad_norm": 2.987888638309345, + "learning_rate": 2.26989546637263e-06, + "loss": 0.1192, + "step": 16738 + }, + { + "epoch": 2.36359785371364, + "grad_norm": 3.0807834556379765, + "learning_rate": 2.2689284534806376e-06, + "loss": 0.132, + "step": 16739 + }, + { + "epoch": 2.363739056763626, + "grad_norm": 3.4885130524883396, + "learning_rate": 2.26796162025541e-06, + "loss": 0.158, + "step": 16740 + }, + { + "epoch": 2.363880259813612, + "grad_norm": 2.862157648787348, + "learning_rate": 2.2669949667194137e-06, + "loss": 0.1473, + "step": 16741 + }, + { + "epoch": 2.364021462863598, + "grad_norm": 3.026163312504716, + "learning_rate": 2.2660284928951148e-06, + "loss": 0.1474, + "step": 16742 + }, + { + "epoch": 2.3641626659135837, + "grad_norm": 3.265749374698899, + "learning_rate": 2.265062198804975e-06, + "loss": 0.1433, + "step": 16743 + }, + { + "epoch": 2.3643038689635696, + "grad_norm": 3.646680306600485, + "learning_rate": 2.2640960844714478e-06, + "loss": 0.1724, + "step": 16744 + }, + { + "epoch": 2.3644450720135555, + "grad_norm": 2.3220017617928583, + "learning_rate": 2.2631301499169877e-06, + "loss": 0.104, + "step": 16745 + }, + { + "epoch": 2.3645862750635414, + "grad_norm": 3.2126335279731677, + "learning_rate": 2.2621643951640413e-06, + "loss": 0.1787, + "step": 16746 + }, + { + "epoch": 2.3647274781135272, + "grad_norm": 3.841370649046859, + "learning_rate": 2.261198820235052e-06, + "loss": 0.1983, + "step": 16747 + }, + { + "epoch": 2.364868681163513, + "grad_norm": 3.7768973931581393, + "learning_rate": 2.2602334251524617e-06, + "loss": 0.1828, + "step": 16748 + }, + { + "epoch": 2.365009884213499, + "grad_norm": 3.035120788490452, + "learning_rate": 2.2592682099386996e-06, + "loss": 0.1442, + "step": 16749 + }, + { + "epoch": 2.365151087263485, + "grad_norm": 3.503043857085711, + "learning_rate": 2.258303174616204e-06, + "loss": 0.1757, + "step": 16750 + }, + { + "epoch": 2.3652922903134708, + "grad_norm": 3.1046231248207197, + "learning_rate": 2.257338319207403e-06, + "loss": 0.1479, + "step": 16751 + }, + { + "epoch": 2.3654334933634567, + "grad_norm": 3.1146212908005686, + "learning_rate": 2.256373643734713e-06, + "loss": 0.1555, + "step": 16752 + }, + { + "epoch": 2.3655746964134425, + "grad_norm": 4.104367679538476, + "learning_rate": 2.2554091482205544e-06, + "loss": 0.1536, + "step": 16753 + }, + { + "epoch": 2.3657158994634284, + "grad_norm": 2.7720890744626865, + "learning_rate": 2.25444483268734e-06, + "loss": 0.121, + "step": 16754 + }, + { + "epoch": 2.3658571025134143, + "grad_norm": 3.63672139750319, + "learning_rate": 2.253480697157486e-06, + "loss": 0.1767, + "step": 16755 + }, + { + "epoch": 2.3659983055634, + "grad_norm": 2.919902228443104, + "learning_rate": 2.2525167416533945e-06, + "loss": 0.1336, + "step": 16756 + }, + { + "epoch": 2.366139508613386, + "grad_norm": 3.2348142901196733, + "learning_rate": 2.251552966197469e-06, + "loss": 0.1369, + "step": 16757 + }, + { + "epoch": 2.366280711663372, + "grad_norm": 2.6102307933158504, + "learning_rate": 2.250589370812105e-06, + "loss": 0.1424, + "step": 16758 + }, + { + "epoch": 2.366421914713358, + "grad_norm": 3.3693386623751906, + "learning_rate": 2.2496259555196986e-06, + "loss": 0.1581, + "step": 16759 + }, + { + "epoch": 2.3665631177633437, + "grad_norm": 3.036184750015656, + "learning_rate": 2.248662720342637e-06, + "loss": 0.1515, + "step": 16760 + }, + { + "epoch": 2.3667043208133296, + "grad_norm": 3.1529133487863, + "learning_rate": 2.247699665303307e-06, + "loss": 0.1231, + "step": 16761 + }, + { + "epoch": 2.3668455238633155, + "grad_norm": 2.900054581045101, + "learning_rate": 2.2467367904240877e-06, + "loss": 0.1644, + "step": 16762 + }, + { + "epoch": 2.3669867269133014, + "grad_norm": 3.97893936330357, + "learning_rate": 2.2457740957273577e-06, + "loss": 0.1844, + "step": 16763 + }, + { + "epoch": 2.3671279299632872, + "grad_norm": 3.7346162757334413, + "learning_rate": 2.2448115812354888e-06, + "loss": 0.1956, + "step": 16764 + }, + { + "epoch": 2.367269133013273, + "grad_norm": 2.540897266463932, + "learning_rate": 2.2438492469708494e-06, + "loss": 0.1255, + "step": 16765 + }, + { + "epoch": 2.367410336063259, + "grad_norm": 3.2408841194428137, + "learning_rate": 2.2428870929558012e-06, + "loss": 0.1804, + "step": 16766 + }, + { + "epoch": 2.367551539113245, + "grad_norm": 3.0376324982207246, + "learning_rate": 2.241925119212709e-06, + "loss": 0.1603, + "step": 16767 + }, + { + "epoch": 2.367692742163231, + "grad_norm": 2.8684779971555803, + "learning_rate": 2.2409633257639306e-06, + "loss": 0.1168, + "step": 16768 + }, + { + "epoch": 2.3678339452132167, + "grad_norm": 3.6440323380936688, + "learning_rate": 2.2400017126318095e-06, + "loss": 0.1508, + "step": 16769 + }, + { + "epoch": 2.3679751482632025, + "grad_norm": 3.047506574521593, + "learning_rate": 2.239040279838699e-06, + "loss": 0.1687, + "step": 16770 + }, + { + "epoch": 2.3681163513131884, + "grad_norm": 2.977736404618285, + "learning_rate": 2.2380790274069363e-06, + "loss": 0.1498, + "step": 16771 + }, + { + "epoch": 2.3682575543631743, + "grad_norm": 3.0891657577434475, + "learning_rate": 2.2371179553588685e-06, + "loss": 0.129, + "step": 16772 + }, + { + "epoch": 2.36839875741316, + "grad_norm": 3.5975547444760116, + "learning_rate": 2.236157063716827e-06, + "loss": 0.1489, + "step": 16773 + }, + { + "epoch": 2.368539960463146, + "grad_norm": 4.175897512158729, + "learning_rate": 2.2351963525031428e-06, + "loss": 0.1902, + "step": 16774 + }, + { + "epoch": 2.368681163513132, + "grad_norm": 2.9073938878241234, + "learning_rate": 2.2342358217401407e-06, + "loss": 0.1523, + "step": 16775 + }, + { + "epoch": 2.368822366563118, + "grad_norm": 3.3971754903535913, + "learning_rate": 2.233275471450146e-06, + "loss": 0.1665, + "step": 16776 + }, + { + "epoch": 2.3689635696131037, + "grad_norm": 3.4085875242012853, + "learning_rate": 2.232315301655473e-06, + "loss": 0.1489, + "step": 16777 + }, + { + "epoch": 2.3691047726630896, + "grad_norm": 3.6377655989530173, + "learning_rate": 2.231355312378439e-06, + "loss": 0.1419, + "step": 16778 + }, + { + "epoch": 2.3692459757130755, + "grad_norm": 4.126370118529404, + "learning_rate": 2.2303955036413528e-06, + "loss": 0.2482, + "step": 16779 + }, + { + "epoch": 2.3693871787630614, + "grad_norm": 3.468834686382979, + "learning_rate": 2.229435875466519e-06, + "loss": 0.1828, + "step": 16780 + }, + { + "epoch": 2.3695283818130473, + "grad_norm": 2.3891788840695525, + "learning_rate": 2.2284764278762404e-06, + "loss": 0.0878, + "step": 16781 + }, + { + "epoch": 2.369669584863033, + "grad_norm": 3.249383321163135, + "learning_rate": 2.2275171608928124e-06, + "loss": 0.1283, + "step": 16782 + }, + { + "epoch": 2.369810787913019, + "grad_norm": 4.421932860761235, + "learning_rate": 2.226558074538527e-06, + "loss": 0.2027, + "step": 16783 + }, + { + "epoch": 2.369951990963005, + "grad_norm": 4.163506919692353, + "learning_rate": 2.225599168835677e-06, + "loss": 0.1811, + "step": 16784 + }, + { + "epoch": 2.370093194012991, + "grad_norm": 3.279789363133372, + "learning_rate": 2.2246404438065483e-06, + "loss": 0.1239, + "step": 16785 + }, + { + "epoch": 2.3702343970629767, + "grad_norm": 3.6170108142916058, + "learning_rate": 2.2236818994734144e-06, + "loss": 0.1394, + "step": 16786 + }, + { + "epoch": 2.3703756001129626, + "grad_norm": 3.340104319756753, + "learning_rate": 2.2227235358585554e-06, + "loss": 0.1714, + "step": 16787 + }, + { + "epoch": 2.3705168031629484, + "grad_norm": 2.997069832781216, + "learning_rate": 2.221765352984239e-06, + "loss": 0.1139, + "step": 16788 + }, + { + "epoch": 2.3706580062129343, + "grad_norm": 4.960897284905732, + "learning_rate": 2.22080735087274e-06, + "loss": 0.1997, + "step": 16789 + }, + { + "epoch": 2.37079920926292, + "grad_norm": 3.053743705739603, + "learning_rate": 2.2198495295463184e-06, + "loss": 0.1167, + "step": 16790 + }, + { + "epoch": 2.370940412312906, + "grad_norm": 2.566892677107905, + "learning_rate": 2.2188918890272335e-06, + "loss": 0.1447, + "step": 16791 + }, + { + "epoch": 2.371081615362892, + "grad_norm": 3.63923234768654, + "learning_rate": 2.2179344293377415e-06, + "loss": 0.1802, + "step": 16792 + }, + { + "epoch": 2.371222818412878, + "grad_norm": 3.556732355067024, + "learning_rate": 2.2169771505000912e-06, + "loss": 0.181, + "step": 16793 + }, + { + "epoch": 2.3713640214628637, + "grad_norm": 3.319846057301965, + "learning_rate": 2.2160200525365326e-06, + "loss": 0.1565, + "step": 16794 + }, + { + "epoch": 2.3715052245128496, + "grad_norm": 3.5740189101594386, + "learning_rate": 2.2150631354693054e-06, + "loss": 0.2204, + "step": 16795 + }, + { + "epoch": 2.3716464275628355, + "grad_norm": 3.1018345132765486, + "learning_rate": 2.214106399320648e-06, + "loss": 0.1334, + "step": 16796 + }, + { + "epoch": 2.3717876306128214, + "grad_norm": 3.4836925638860876, + "learning_rate": 2.2131498441127964e-06, + "loss": 0.1388, + "step": 16797 + }, + { + "epoch": 2.3719288336628073, + "grad_norm": 3.2267371304477495, + "learning_rate": 2.2121934698679793e-06, + "loss": 0.1912, + "step": 16798 + }, + { + "epoch": 2.372070036712793, + "grad_norm": 2.9429593615016425, + "learning_rate": 2.2112372766084223e-06, + "loss": 0.1202, + "step": 16799 + }, + { + "epoch": 2.372211239762779, + "grad_norm": 3.359542551781249, + "learning_rate": 2.2102812643563455e-06, + "loss": 0.1412, + "step": 16800 + }, + { + "epoch": 2.372352442812765, + "grad_norm": 3.1605134811386835, + "learning_rate": 2.2093254331339697e-06, + "loss": 0.169, + "step": 16801 + }, + { + "epoch": 2.372493645862751, + "grad_norm": 3.133843874930088, + "learning_rate": 2.20836978296351e-06, + "loss": 0.1597, + "step": 16802 + }, + { + "epoch": 2.3726348489127367, + "grad_norm": 3.4449206845367133, + "learning_rate": 2.2074143138671677e-06, + "loss": 0.1722, + "step": 16803 + }, + { + "epoch": 2.3727760519627226, + "grad_norm": 3.334969547659957, + "learning_rate": 2.2064590258671524e-06, + "loss": 0.1463, + "step": 16804 + }, + { + "epoch": 2.3729172550127084, + "grad_norm": 3.4295660305092928, + "learning_rate": 2.205503918985661e-06, + "loss": 0.1821, + "step": 16805 + }, + { + "epoch": 2.3730584580626943, + "grad_norm": 4.44281081225325, + "learning_rate": 2.2045489932448937e-06, + "loss": 0.1817, + "step": 16806 + }, + { + "epoch": 2.37319966111268, + "grad_norm": 3.669500104790085, + "learning_rate": 2.2035942486670425e-06, + "loss": 0.1279, + "step": 16807 + }, + { + "epoch": 2.373340864162666, + "grad_norm": 2.676594464041409, + "learning_rate": 2.2026396852742936e-06, + "loss": 0.1233, + "step": 16808 + }, + { + "epoch": 2.373482067212652, + "grad_norm": 2.7779642848725805, + "learning_rate": 2.2016853030888306e-06, + "loss": 0.13, + "step": 16809 + }, + { + "epoch": 2.373623270262638, + "grad_norm": 3.2569654300715705, + "learning_rate": 2.2007311021328337e-06, + "loss": 0.1637, + "step": 16810 + }, + { + "epoch": 2.3737644733126237, + "grad_norm": 2.8232553673817233, + "learning_rate": 2.199777082428478e-06, + "loss": 0.1387, + "step": 16811 + }, + { + "epoch": 2.373905676362609, + "grad_norm": 3.2533348989648405, + "learning_rate": 2.198823243997933e-06, + "loss": 0.1589, + "step": 16812 + }, + { + "epoch": 2.374046879412595, + "grad_norm": 2.971038238114221, + "learning_rate": 2.197869586863368e-06, + "loss": 0.133, + "step": 16813 + }, + { + "epoch": 2.374188082462581, + "grad_norm": 2.581745547649538, + "learning_rate": 2.196916111046944e-06, + "loss": 0.1296, + "step": 16814 + }, + { + "epoch": 2.374329285512567, + "grad_norm": 3.308605595511561, + "learning_rate": 2.195962816570819e-06, + "loss": 0.1751, + "step": 16815 + }, + { + "epoch": 2.3744704885625527, + "grad_norm": 2.495779136728042, + "learning_rate": 2.1950097034571495e-06, + "loss": 0.1344, + "step": 16816 + }, + { + "epoch": 2.3746116916125386, + "grad_norm": 3.315600295572005, + "learning_rate": 2.1940567717280793e-06, + "loss": 0.1443, + "step": 16817 + }, + { + "epoch": 2.3747528946625245, + "grad_norm": 3.7217681564831127, + "learning_rate": 2.193104021405762e-06, + "loss": 0.1716, + "step": 16818 + }, + { + "epoch": 2.3748940977125104, + "grad_norm": 3.1384611504683306, + "learning_rate": 2.192151452512339e-06, + "loss": 0.1448, + "step": 16819 + }, + { + "epoch": 2.3750353007624962, + "grad_norm": 2.8030048416374695, + "learning_rate": 2.191199065069941e-06, + "loss": 0.1627, + "step": 16820 + }, + { + "epoch": 2.375176503812482, + "grad_norm": 3.4786606488999428, + "learning_rate": 2.1902468591007042e-06, + "loss": 0.1812, + "step": 16821 + }, + { + "epoch": 2.375317706862468, + "grad_norm": 3.8978158660890467, + "learning_rate": 2.1892948346267583e-06, + "loss": 0.2333, + "step": 16822 + }, + { + "epoch": 2.375458909912454, + "grad_norm": 2.6898938527455134, + "learning_rate": 2.1883429916702238e-06, + "loss": 0.1304, + "step": 16823 + }, + { + "epoch": 2.37560011296244, + "grad_norm": 2.7725877506387833, + "learning_rate": 2.187391330253228e-06, + "loss": 0.1482, + "step": 16824 + }, + { + "epoch": 2.3757413160124257, + "grad_norm": 3.8028033625614697, + "learning_rate": 2.186439850397882e-06, + "loss": 0.1503, + "step": 16825 + }, + { + "epoch": 2.3758825190624115, + "grad_norm": 2.890799461779304, + "learning_rate": 2.1854885521263016e-06, + "loss": 0.1476, + "step": 16826 + }, + { + "epoch": 2.3760237221123974, + "grad_norm": 2.624418045570247, + "learning_rate": 2.184537435460594e-06, + "loss": 0.1226, + "step": 16827 + }, + { + "epoch": 2.3761649251623833, + "grad_norm": 3.796868830613685, + "learning_rate": 2.183586500422856e-06, + "loss": 0.137, + "step": 16828 + }, + { + "epoch": 2.376306128212369, + "grad_norm": 2.7027500865259673, + "learning_rate": 2.1826357470351945e-06, + "loss": 0.1313, + "step": 16829 + }, + { + "epoch": 2.376447331262355, + "grad_norm": 3.9337572020463285, + "learning_rate": 2.1816851753197023e-06, + "loss": 0.1607, + "step": 16830 + }, + { + "epoch": 2.376588534312341, + "grad_norm": 2.6190177369118417, + "learning_rate": 2.1807347852984707e-06, + "loss": 0.1056, + "step": 16831 + }, + { + "epoch": 2.376729737362327, + "grad_norm": 3.4987924433025244, + "learning_rate": 2.1797845769935856e-06, + "loss": 0.1694, + "step": 16832 + }, + { + "epoch": 2.3768709404123127, + "grad_norm": 3.017635996546025, + "learning_rate": 2.178834550427129e-06, + "loss": 0.1235, + "step": 16833 + }, + { + "epoch": 2.3770121434622986, + "grad_norm": 3.4253151426848296, + "learning_rate": 2.1778847056211803e-06, + "loss": 0.1418, + "step": 16834 + }, + { + "epoch": 2.3771533465122845, + "grad_norm": 3.0283263908078557, + "learning_rate": 2.1769350425978097e-06, + "loss": 0.1353, + "step": 16835 + }, + { + "epoch": 2.3772945495622704, + "grad_norm": 2.911163552527298, + "learning_rate": 2.175985561379097e-06, + "loss": 0.1253, + "step": 16836 + }, + { + "epoch": 2.3774357526122563, + "grad_norm": 2.9560822252812304, + "learning_rate": 2.1750362619870967e-06, + "loss": 0.1349, + "step": 16837 + }, + { + "epoch": 2.377576955662242, + "grad_norm": 2.951963398970572, + "learning_rate": 2.174087144443875e-06, + "loss": 0.1897, + "step": 16838 + }, + { + "epoch": 2.377718158712228, + "grad_norm": 2.813355255166902, + "learning_rate": 2.173138208771488e-06, + "loss": 0.1171, + "step": 16839 + }, + { + "epoch": 2.377859361762214, + "grad_norm": 3.3249924572557554, + "learning_rate": 2.1721894549919863e-06, + "loss": 0.1525, + "step": 16840 + }, + { + "epoch": 2.3780005648122, + "grad_norm": 4.21156338227217, + "learning_rate": 2.1712408831274232e-06, + "loss": 0.2071, + "step": 16841 + }, + { + "epoch": 2.3781417678621857, + "grad_norm": 2.6621535929826505, + "learning_rate": 2.1702924931998426e-06, + "loss": 0.1122, + "step": 16842 + }, + { + "epoch": 2.3782829709121716, + "grad_norm": 3.5684731222605888, + "learning_rate": 2.1693442852312817e-06, + "loss": 0.1477, + "step": 16843 + }, + { + "epoch": 2.3784241739621574, + "grad_norm": 3.834633803624263, + "learning_rate": 2.168396259243781e-06, + "loss": 0.14, + "step": 16844 + }, + { + "epoch": 2.3785653770121433, + "grad_norm": 3.099933924499636, + "learning_rate": 2.1674484152593634e-06, + "loss": 0.1608, + "step": 16845 + }, + { + "epoch": 2.378706580062129, + "grad_norm": 3.0221030984721358, + "learning_rate": 2.166500753300065e-06, + "loss": 0.161, + "step": 16846 + }, + { + "epoch": 2.378847783112115, + "grad_norm": 2.596044824389492, + "learning_rate": 2.165553273387906e-06, + "loss": 0.123, + "step": 16847 + }, + { + "epoch": 2.378988986162101, + "grad_norm": 3.153993282837189, + "learning_rate": 2.1646059755449058e-06, + "loss": 0.1568, + "step": 16848 + }, + { + "epoch": 2.379130189212087, + "grad_norm": 3.7686804122485573, + "learning_rate": 2.1636588597930775e-06, + "loss": 0.1762, + "step": 16849 + }, + { + "epoch": 2.3792713922620727, + "grad_norm": 3.3200375582842514, + "learning_rate": 2.1627119261544348e-06, + "loss": 0.1065, + "step": 16850 + }, + { + "epoch": 2.3794125953120586, + "grad_norm": 3.241732455816064, + "learning_rate": 2.1617651746509815e-06, + "loss": 0.1689, + "step": 16851 + }, + { + "epoch": 2.3795537983620445, + "grad_norm": 2.783290901304856, + "learning_rate": 2.1608186053047177e-06, + "loss": 0.1349, + "step": 16852 + }, + { + "epoch": 2.3796950014120304, + "grad_norm": 3.193364012437937, + "learning_rate": 2.1598722181376497e-06, + "loss": 0.1385, + "step": 16853 + }, + { + "epoch": 2.3798362044620163, + "grad_norm": 3.9331525621472814, + "learning_rate": 2.1589260131717614e-06, + "loss": 0.1706, + "step": 16854 + }, + { + "epoch": 2.379977407512002, + "grad_norm": 3.2871113105016256, + "learning_rate": 2.1579799904290476e-06, + "loss": 0.1225, + "step": 16855 + }, + { + "epoch": 2.380118610561988, + "grad_norm": 3.751676753259436, + "learning_rate": 2.157034149931492e-06, + "loss": 0.189, + "step": 16856 + }, + { + "epoch": 2.380259813611974, + "grad_norm": 3.636325786659401, + "learning_rate": 2.1560884917010725e-06, + "loss": 0.1714, + "step": 16857 + }, + { + "epoch": 2.38040101666196, + "grad_norm": 2.750835651375333, + "learning_rate": 2.155143015759773e-06, + "loss": 0.1429, + "step": 16858 + }, + { + "epoch": 2.3805422197119457, + "grad_norm": 3.6596374430454177, + "learning_rate": 2.1541977221295605e-06, + "loss": 0.1843, + "step": 16859 + }, + { + "epoch": 2.3806834227619316, + "grad_norm": 3.63387828127033, + "learning_rate": 2.1532526108324047e-06, + "loss": 0.1791, + "step": 16860 + }, + { + "epoch": 2.3808246258119174, + "grad_norm": 2.1818596143524274, + "learning_rate": 2.1523076818902722e-06, + "loss": 0.08, + "step": 16861 + }, + { + "epoch": 2.3809658288619033, + "grad_norm": 2.8908724743711356, + "learning_rate": 2.151362935325115e-06, + "loss": 0.1434, + "step": 16862 + }, + { + "epoch": 2.381107031911889, + "grad_norm": 2.4791904498862607, + "learning_rate": 2.1504183711588966e-06, + "loss": 0.1275, + "step": 16863 + }, + { + "epoch": 2.381248234961875, + "grad_norm": 2.593423071621891, + "learning_rate": 2.149473989413564e-06, + "loss": 0.0952, + "step": 16864 + }, + { + "epoch": 2.381389438011861, + "grad_norm": 3.4013339525721715, + "learning_rate": 2.148529790111067e-06, + "loss": 0.1757, + "step": 16865 + }, + { + "epoch": 2.381530641061847, + "grad_norm": 2.8369335383036236, + "learning_rate": 2.1475857732733464e-06, + "loss": 0.1315, + "step": 16866 + }, + { + "epoch": 2.3816718441118327, + "grad_norm": 3.259529982458239, + "learning_rate": 2.1466419389223403e-06, + "loss": 0.1276, + "step": 16867 + }, + { + "epoch": 2.3818130471618186, + "grad_norm": 2.9851834195123392, + "learning_rate": 2.1456982870799848e-06, + "loss": 0.1222, + "step": 16868 + }, + { + "epoch": 2.3819542502118045, + "grad_norm": 3.228418306674102, + "learning_rate": 2.1447548177682065e-06, + "loss": 0.1567, + "step": 16869 + }, + { + "epoch": 2.3820954532617904, + "grad_norm": 3.1675876251642334, + "learning_rate": 2.1438115310089393e-06, + "loss": 0.1302, + "step": 16870 + }, + { + "epoch": 2.3822366563117763, + "grad_norm": 2.0743157181808023, + "learning_rate": 2.1428684268240964e-06, + "loss": 0.1321, + "step": 16871 + }, + { + "epoch": 2.382377859361762, + "grad_norm": 4.121627887555411, + "learning_rate": 2.1419255052355983e-06, + "loss": 0.1908, + "step": 16872 + }, + { + "epoch": 2.382519062411748, + "grad_norm": 2.9301165144113197, + "learning_rate": 2.140982766265357e-06, + "loss": 0.1304, + "step": 16873 + }, + { + "epoch": 2.382660265461734, + "grad_norm": 3.394183471132699, + "learning_rate": 2.14004020993528e-06, + "loss": 0.162, + "step": 16874 + }, + { + "epoch": 2.38280146851172, + "grad_norm": 3.528012232321496, + "learning_rate": 2.1390978362672763e-06, + "loss": 0.153, + "step": 16875 + }, + { + "epoch": 2.3829426715617057, + "grad_norm": 3.000637826485278, + "learning_rate": 2.138155645283244e-06, + "loss": 0.1312, + "step": 16876 + }, + { + "epoch": 2.3830838746116916, + "grad_norm": 3.2413708452045085, + "learning_rate": 2.13721363700508e-06, + "loss": 0.1475, + "step": 16877 + }, + { + "epoch": 2.3832250776616775, + "grad_norm": 3.1098766203682544, + "learning_rate": 2.1362718114546777e-06, + "loss": 0.1249, + "step": 16878 + }, + { + "epoch": 2.3833662807116633, + "grad_norm": 3.0271378774702544, + "learning_rate": 2.1353301686539173e-06, + "loss": 0.1121, + "step": 16879 + }, + { + "epoch": 2.3835074837616492, + "grad_norm": 3.0764241744404495, + "learning_rate": 2.1343887086246893e-06, + "loss": 0.1418, + "step": 16880 + }, + { + "epoch": 2.383648686811635, + "grad_norm": 2.8739305416767023, + "learning_rate": 2.133447431388872e-06, + "loss": 0.1226, + "step": 16881 + }, + { + "epoch": 2.383789889861621, + "grad_norm": 3.6562634606108873, + "learning_rate": 2.1325063369683374e-06, + "loss": 0.139, + "step": 16882 + }, + { + "epoch": 2.383931092911607, + "grad_norm": 2.706378317998329, + "learning_rate": 2.1315654253849594e-06, + "loss": 0.1217, + "step": 16883 + }, + { + "epoch": 2.3840722959615928, + "grad_norm": 3.2890492456411637, + "learning_rate": 2.1306246966606025e-06, + "loss": 0.128, + "step": 16884 + }, + { + "epoch": 2.3842134990115786, + "grad_norm": 2.5057486636725206, + "learning_rate": 2.1296841508171285e-06, + "loss": 0.1349, + "step": 16885 + }, + { + "epoch": 2.3843547020615645, + "grad_norm": 3.22059075939742, + "learning_rate": 2.128743787876393e-06, + "loss": 0.1444, + "step": 16886 + }, + { + "epoch": 2.3844959051115504, + "grad_norm": 3.2905058048843467, + "learning_rate": 2.1278036078602584e-06, + "loss": 0.1776, + "step": 16887 + }, + { + "epoch": 2.3846371081615363, + "grad_norm": 2.9343742730428994, + "learning_rate": 2.126863610790566e-06, + "loss": 0.1314, + "step": 16888 + }, + { + "epoch": 2.384778311211522, + "grad_norm": 3.1352321630894964, + "learning_rate": 2.1259237966891623e-06, + "loss": 0.1406, + "step": 16889 + }, + { + "epoch": 2.384919514261508, + "grad_norm": 3.1461646947860333, + "learning_rate": 2.124984165577889e-06, + "loss": 0.1051, + "step": 16890 + }, + { + "epoch": 2.385060717311494, + "grad_norm": 2.8928606369337597, + "learning_rate": 2.1240447174785806e-06, + "loss": 0.1625, + "step": 16891 + }, + { + "epoch": 2.38520192036148, + "grad_norm": 3.19215859831351, + "learning_rate": 2.1231054524130746e-06, + "loss": 0.1667, + "step": 16892 + }, + { + "epoch": 2.3853431234114657, + "grad_norm": 3.6164602667332986, + "learning_rate": 2.1221663704031957e-06, + "loss": 0.1516, + "step": 16893 + }, + { + "epoch": 2.3854843264614516, + "grad_norm": 3.497314894308194, + "learning_rate": 2.121227471470768e-06, + "loss": 0.1805, + "step": 16894 + }, + { + "epoch": 2.3856255295114375, + "grad_norm": 4.235992160202638, + "learning_rate": 2.120288755637614e-06, + "loss": 0.2222, + "step": 16895 + }, + { + "epoch": 2.3857667325614234, + "grad_norm": 3.369591627455159, + "learning_rate": 2.1193502229255436e-06, + "loss": 0.1486, + "step": 16896 + }, + { + "epoch": 2.3859079356114092, + "grad_norm": 4.513794629689269, + "learning_rate": 2.1184118733563685e-06, + "loss": 0.1976, + "step": 16897 + }, + { + "epoch": 2.386049138661395, + "grad_norm": 2.974483642533953, + "learning_rate": 2.1174737069519e-06, + "loss": 0.1054, + "step": 16898 + }, + { + "epoch": 2.386190341711381, + "grad_norm": 3.303286288961413, + "learning_rate": 2.116535723733938e-06, + "loss": 0.1383, + "step": 16899 + }, + { + "epoch": 2.386331544761367, + "grad_norm": 2.7780379208416677, + "learning_rate": 2.1155979237242817e-06, + "loss": 0.1257, + "step": 16900 + }, + { + "epoch": 2.3864727478113528, + "grad_norm": 3.29292251244628, + "learning_rate": 2.1146603069447234e-06, + "loss": 0.2088, + "step": 16901 + }, + { + "epoch": 2.3866139508613387, + "grad_norm": 3.9175322988482097, + "learning_rate": 2.1137228734170558e-06, + "loss": 0.1727, + "step": 16902 + }, + { + "epoch": 2.3867551539113245, + "grad_norm": 2.8990293912138303, + "learning_rate": 2.1127856231630593e-06, + "loss": 0.1502, + "step": 16903 + }, + { + "epoch": 2.3868963569613104, + "grad_norm": 3.0941608781904213, + "learning_rate": 2.1118485562045244e-06, + "loss": 0.122, + "step": 16904 + }, + { + "epoch": 2.3870375600112963, + "grad_norm": 2.93435337313772, + "learning_rate": 2.1109116725632193e-06, + "loss": 0.1586, + "step": 16905 + }, + { + "epoch": 2.387178763061282, + "grad_norm": 3.130180943115138, + "learning_rate": 2.109974972260921e-06, + "loss": 0.1399, + "step": 16906 + }, + { + "epoch": 2.387319966111268, + "grad_norm": 3.81169229690288, + "learning_rate": 2.1090384553193953e-06, + "loss": 0.1506, + "step": 16907 + }, + { + "epoch": 2.387461169161254, + "grad_norm": 2.999748380957778, + "learning_rate": 2.108102121760409e-06, + "loss": 0.1652, + "step": 16908 + }, + { + "epoch": 2.38760237221124, + "grad_norm": 3.289506261611598, + "learning_rate": 2.107165971605718e-06, + "loss": 0.172, + "step": 16909 + }, + { + "epoch": 2.3877435752612257, + "grad_norm": 3.565859863998495, + "learning_rate": 2.1062300048770847e-06, + "loss": 0.1595, + "step": 16910 + }, + { + "epoch": 2.3878847783112116, + "grad_norm": 3.0678509385941433, + "learning_rate": 2.105294221596256e-06, + "loss": 0.136, + "step": 16911 + }, + { + "epoch": 2.3880259813611975, + "grad_norm": 3.144068866342032, + "learning_rate": 2.104358621784983e-06, + "loss": 0.1749, + "step": 16912 + }, + { + "epoch": 2.3881671844111834, + "grad_norm": 2.508002444693295, + "learning_rate": 2.103423205465004e-06, + "loss": 0.1446, + "step": 16913 + }, + { + "epoch": 2.3883083874611692, + "grad_norm": 2.8644246761573102, + "learning_rate": 2.102487972658056e-06, + "loss": 0.1568, + "step": 16914 + }, + { + "epoch": 2.388449590511155, + "grad_norm": 3.3656818647591384, + "learning_rate": 2.101552923385879e-06, + "loss": 0.1484, + "step": 16915 + }, + { + "epoch": 2.388590793561141, + "grad_norm": 3.732281131695765, + "learning_rate": 2.1006180576702017e-06, + "loss": 0.1787, + "step": 16916 + }, + { + "epoch": 2.388731996611127, + "grad_norm": 3.222004584684271, + "learning_rate": 2.0996833755327493e-06, + "loss": 0.1429, + "step": 16917 + }, + { + "epoch": 2.388873199661113, + "grad_norm": 2.780391201297536, + "learning_rate": 2.0987488769952436e-06, + "loss": 0.1249, + "step": 16918 + }, + { + "epoch": 2.3890144027110987, + "grad_norm": 2.372011405250769, + "learning_rate": 2.097814562079401e-06, + "loss": 0.096, + "step": 16919 + }, + { + "epoch": 2.3891556057610845, + "grad_norm": 3.30239240061786, + "learning_rate": 2.0968804308069324e-06, + "loss": 0.1529, + "step": 16920 + }, + { + "epoch": 2.3892968088110704, + "grad_norm": 2.6595140622041646, + "learning_rate": 2.0959464831995557e-06, + "loss": 0.1307, + "step": 16921 + }, + { + "epoch": 2.3894380118610563, + "grad_norm": 3.185856299500183, + "learning_rate": 2.095012719278966e-06, + "loss": 0.1644, + "step": 16922 + }, + { + "epoch": 2.389579214911042, + "grad_norm": 3.1040273784057666, + "learning_rate": 2.094079139066868e-06, + "loss": 0.1753, + "step": 16923 + }, + { + "epoch": 2.389720417961028, + "grad_norm": 3.2835151214127594, + "learning_rate": 2.0931457425849555e-06, + "loss": 0.1522, + "step": 16924 + }, + { + "epoch": 2.389861621011014, + "grad_norm": 2.7178529999987737, + "learning_rate": 2.092212529854921e-06, + "loss": 0.1255, + "step": 16925 + }, + { + "epoch": 2.390002824061, + "grad_norm": 3.3100662964775682, + "learning_rate": 2.09127950089845e-06, + "loss": 0.1732, + "step": 16926 + }, + { + "epoch": 2.3901440271109857, + "grad_norm": 2.909307484857051, + "learning_rate": 2.09034665573723e-06, + "loss": 0.1439, + "step": 16927 + }, + { + "epoch": 2.3902852301609716, + "grad_norm": 2.7410693226195955, + "learning_rate": 2.089413994392938e-06, + "loss": 0.1272, + "step": 16928 + }, + { + "epoch": 2.3904264332109575, + "grad_norm": 3.3869034461445438, + "learning_rate": 2.088481516887252e-06, + "loss": 0.1416, + "step": 16929 + }, + { + "epoch": 2.3905676362609434, + "grad_norm": 3.3509758780329, + "learning_rate": 2.087549223241835e-06, + "loss": 0.1236, + "step": 16930 + }, + { + "epoch": 2.3907088393109293, + "grad_norm": 3.8165363144350435, + "learning_rate": 2.086617113478354e-06, + "loss": 0.1726, + "step": 16931 + }, + { + "epoch": 2.390850042360915, + "grad_norm": 3.4336482525392125, + "learning_rate": 2.085685187618478e-06, + "loss": 0.1665, + "step": 16932 + }, + { + "epoch": 2.390991245410901, + "grad_norm": 3.0106414155881844, + "learning_rate": 2.084753445683859e-06, + "loss": 0.1334, + "step": 16933 + }, + { + "epoch": 2.391132448460887, + "grad_norm": 3.0846619681921066, + "learning_rate": 2.0838218876961524e-06, + "loss": 0.1107, + "step": 16934 + }, + { + "epoch": 2.391273651510873, + "grad_norm": 3.693857139616947, + "learning_rate": 2.082890513677006e-06, + "loss": 0.1958, + "step": 16935 + }, + { + "epoch": 2.3914148545608587, + "grad_norm": 3.305159366411521, + "learning_rate": 2.081959323648065e-06, + "loss": 0.183, + "step": 16936 + }, + { + "epoch": 2.3915560576108446, + "grad_norm": 3.6562492030960034, + "learning_rate": 2.08102831763097e-06, + "loss": 0.1747, + "step": 16937 + }, + { + "epoch": 2.3916972606608304, + "grad_norm": 3.834081912606494, + "learning_rate": 2.0800974956473565e-06, + "loss": 0.1919, + "step": 16938 + }, + { + "epoch": 2.3918384637108163, + "grad_norm": 3.011216813524388, + "learning_rate": 2.079166857718856e-06, + "loss": 0.1351, + "step": 16939 + }, + { + "epoch": 2.391979666760802, + "grad_norm": 2.7792333723633837, + "learning_rate": 2.0782364038670986e-06, + "loss": 0.1373, + "step": 16940 + }, + { + "epoch": 2.392120869810788, + "grad_norm": 2.9614190025743516, + "learning_rate": 2.0773061341137057e-06, + "loss": 0.1406, + "step": 16941 + }, + { + "epoch": 2.392262072860774, + "grad_norm": 2.812234777266102, + "learning_rate": 2.0763760484802966e-06, + "loss": 0.1241, + "step": 16942 + }, + { + "epoch": 2.39240327591076, + "grad_norm": 3.527025634108565, + "learning_rate": 2.0754461469884836e-06, + "loss": 0.17, + "step": 16943 + }, + { + "epoch": 2.3925444789607457, + "grad_norm": 3.423091661390692, + "learning_rate": 2.074516429659882e-06, + "loss": 0.1396, + "step": 16944 + }, + { + "epoch": 2.3926856820107316, + "grad_norm": 3.6972629888284136, + "learning_rate": 2.0735868965160953e-06, + "loss": 0.2181, + "step": 16945 + }, + { + "epoch": 2.3928268850607175, + "grad_norm": 3.290052481382784, + "learning_rate": 2.0726575475787293e-06, + "loss": 0.1595, + "step": 16946 + }, + { + "epoch": 2.3929680881107034, + "grad_norm": 2.700608833352401, + "learning_rate": 2.071728382869376e-06, + "loss": 0.1028, + "step": 16947 + }, + { + "epoch": 2.3931092911606893, + "grad_norm": 3.4445725325027596, + "learning_rate": 2.070799402409628e-06, + "loss": 0.1865, + "step": 16948 + }, + { + "epoch": 2.393250494210675, + "grad_norm": 2.70174360647674, + "learning_rate": 2.0698706062210804e-06, + "loss": 0.1497, + "step": 16949 + }, + { + "epoch": 2.393391697260661, + "grad_norm": 3.543520884337734, + "learning_rate": 2.068941994325315e-06, + "loss": 0.1728, + "step": 16950 + }, + { + "epoch": 2.393532900310647, + "grad_norm": 3.029676466372009, + "learning_rate": 2.068013566743913e-06, + "loss": 0.1553, + "step": 16951 + }, + { + "epoch": 2.393674103360633, + "grad_norm": 3.5536986284155496, + "learning_rate": 2.06708532349845e-06, + "loss": 0.1825, + "step": 16952 + }, + { + "epoch": 2.3938153064106187, + "grad_norm": 3.118160308799758, + "learning_rate": 2.0661572646104975e-06, + "loss": 0.1361, + "step": 16953 + }, + { + "epoch": 2.3939565094606046, + "grad_norm": 2.9861453209696, + "learning_rate": 2.065229390101624e-06, + "loss": 0.1375, + "step": 16954 + }, + { + "epoch": 2.3940977125105904, + "grad_norm": 3.4730025401811693, + "learning_rate": 2.064301699993393e-06, + "loss": 0.1651, + "step": 16955 + }, + { + "epoch": 2.3942389155605763, + "grad_norm": 2.798785760141852, + "learning_rate": 2.063374194307364e-06, + "loss": 0.1434, + "step": 16956 + }, + { + "epoch": 2.394380118610562, + "grad_norm": 3.801573620700732, + "learning_rate": 2.0624468730650903e-06, + "loss": 0.166, + "step": 16957 + }, + { + "epoch": 2.394521321660548, + "grad_norm": 3.695102569134403, + "learning_rate": 2.0615197362881234e-06, + "loss": 0.1574, + "step": 16958 + }, + { + "epoch": 2.394662524710534, + "grad_norm": 3.361808546060126, + "learning_rate": 2.060592783998009e-06, + "loss": 0.1492, + "step": 16959 + }, + { + "epoch": 2.39480372776052, + "grad_norm": 2.978792146729772, + "learning_rate": 2.0596660162162872e-06, + "loss": 0.1586, + "step": 16960 + }, + { + "epoch": 2.3949449308105057, + "grad_norm": 2.5695790176852067, + "learning_rate": 2.0587394329645018e-06, + "loss": 0.1118, + "step": 16961 + }, + { + "epoch": 2.3950861338604916, + "grad_norm": 2.6964622627040464, + "learning_rate": 2.057813034264181e-06, + "loss": 0.1267, + "step": 16962 + }, + { + "epoch": 2.3952273369104775, + "grad_norm": 3.3592956819004978, + "learning_rate": 2.0568868201368587e-06, + "loss": 0.138, + "step": 16963 + }, + { + "epoch": 2.3953685399604634, + "grad_norm": 3.2528426364813066, + "learning_rate": 2.0559607906040524e-06, + "loss": 0.1762, + "step": 16964 + }, + { + "epoch": 2.395509743010449, + "grad_norm": 4.017866235119019, + "learning_rate": 2.0550349456872853e-06, + "loss": 0.1552, + "step": 16965 + }, + { + "epoch": 2.3956509460604347, + "grad_norm": 3.7990873679225334, + "learning_rate": 2.054109285408077e-06, + "loss": 0.1894, + "step": 16966 + }, + { + "epoch": 2.3957921491104206, + "grad_norm": 3.5081018729001143, + "learning_rate": 2.0531838097879377e-06, + "loss": 0.1983, + "step": 16967 + }, + { + "epoch": 2.3959333521604065, + "grad_norm": 2.892450141963926, + "learning_rate": 2.0522585188483745e-06, + "loss": 0.1262, + "step": 16968 + }, + { + "epoch": 2.3960745552103924, + "grad_norm": 2.9858956663469445, + "learning_rate": 2.051333412610891e-06, + "loss": 0.1298, + "step": 16969 + }, + { + "epoch": 2.3962157582603782, + "grad_norm": 2.647187125274911, + "learning_rate": 2.0504084910969856e-06, + "loss": 0.1226, + "step": 16970 + }, + { + "epoch": 2.396356961310364, + "grad_norm": 3.85197333523568, + "learning_rate": 2.0494837543281543e-06, + "loss": 0.1604, + "step": 16971 + }, + { + "epoch": 2.39649816436035, + "grad_norm": 2.775118155429708, + "learning_rate": 2.0485592023258872e-06, + "loss": 0.1467, + "step": 16972 + }, + { + "epoch": 2.396639367410336, + "grad_norm": 2.464203968113668, + "learning_rate": 2.0476348351116702e-06, + "loss": 0.1461, + "step": 16973 + }, + { + "epoch": 2.396780570460322, + "grad_norm": 3.5108784580881562, + "learning_rate": 2.046710652706985e-06, + "loss": 0.1765, + "step": 16974 + }, + { + "epoch": 2.3969217735103077, + "grad_norm": 2.4179921258261423, + "learning_rate": 2.0457866551333094e-06, + "loss": 0.1222, + "step": 16975 + }, + { + "epoch": 2.3970629765602935, + "grad_norm": 2.817553641664922, + "learning_rate": 2.0448628424121173e-06, + "loss": 0.0993, + "step": 16976 + }, + { + "epoch": 2.3972041796102794, + "grad_norm": 3.0225950216418265, + "learning_rate": 2.043939214564874e-06, + "loss": 0.1564, + "step": 16977 + }, + { + "epoch": 2.3973453826602653, + "grad_norm": 3.187890033929883, + "learning_rate": 2.0430157716130506e-06, + "loss": 0.15, + "step": 16978 + }, + { + "epoch": 2.397486585710251, + "grad_norm": 2.8332602651289323, + "learning_rate": 2.0420925135781035e-06, + "loss": 0.1422, + "step": 16979 + }, + { + "epoch": 2.397627788760237, + "grad_norm": 4.214074705262303, + "learning_rate": 2.041169440481493e-06, + "loss": 0.2012, + "step": 16980 + }, + { + "epoch": 2.397768991810223, + "grad_norm": 2.992840649556532, + "learning_rate": 2.0402465523446648e-06, + "loss": 0.1266, + "step": 16981 + }, + { + "epoch": 2.397910194860209, + "grad_norm": 3.001202755229056, + "learning_rate": 2.0393238491890655e-06, + "loss": 0.1357, + "step": 16982 + }, + { + "epoch": 2.3980513979101947, + "grad_norm": 3.4520750436045082, + "learning_rate": 2.038401331036146e-06, + "loss": 0.1343, + "step": 16983 + }, + { + "epoch": 2.3981926009601806, + "grad_norm": 3.8870909296759195, + "learning_rate": 2.0374789979073408e-06, + "loss": 0.1687, + "step": 16984 + }, + { + "epoch": 2.3983338040101665, + "grad_norm": 3.6522517758393978, + "learning_rate": 2.036556849824084e-06, + "loss": 0.1682, + "step": 16985 + }, + { + "epoch": 2.3984750070601524, + "grad_norm": 3.2689158862969974, + "learning_rate": 2.035634886807808e-06, + "loss": 0.1621, + "step": 16986 + }, + { + "epoch": 2.3986162101101383, + "grad_norm": 2.4619751773024707, + "learning_rate": 2.0347131088799375e-06, + "loss": 0.1318, + "step": 16987 + }, + { + "epoch": 2.398757413160124, + "grad_norm": 3.8404803111642534, + "learning_rate": 2.0337915160618936e-06, + "loss": 0.1569, + "step": 16988 + }, + { + "epoch": 2.39889861621011, + "grad_norm": 2.9510833763785045, + "learning_rate": 2.032870108375096e-06, + "loss": 0.1566, + "step": 16989 + }, + { + "epoch": 2.399039819260096, + "grad_norm": 3.5273539107793797, + "learning_rate": 2.0319488858409552e-06, + "loss": 0.1858, + "step": 16990 + }, + { + "epoch": 2.399181022310082, + "grad_norm": 3.5795345710610333, + "learning_rate": 2.031027848480881e-06, + "loss": 0.1685, + "step": 16991 + }, + { + "epoch": 2.3993222253600677, + "grad_norm": 3.267834376929403, + "learning_rate": 2.030106996316279e-06, + "loss": 0.1775, + "step": 16992 + }, + { + "epoch": 2.3994634284100536, + "grad_norm": 3.7969931283701084, + "learning_rate": 2.0291863293685477e-06, + "loss": 0.1724, + "step": 16993 + }, + { + "epoch": 2.3996046314600394, + "grad_norm": 3.0674786494111483, + "learning_rate": 2.0282658476590823e-06, + "loss": 0.1488, + "step": 16994 + }, + { + "epoch": 2.3997458345100253, + "grad_norm": 3.206102938320587, + "learning_rate": 2.0273455512092767e-06, + "loss": 0.1731, + "step": 16995 + }, + { + "epoch": 2.399887037560011, + "grad_norm": 3.786699221505517, + "learning_rate": 2.0264254400405192e-06, + "loss": 0.1509, + "step": 16996 + }, + { + "epoch": 2.400028240609997, + "grad_norm": 3.826865788699074, + "learning_rate": 2.025505514174192e-06, + "loss": 0.1467, + "step": 16997 + }, + { + "epoch": 2.400169443659983, + "grad_norm": 5.007631849796837, + "learning_rate": 2.024585773631671e-06, + "loss": 0.2467, + "step": 16998 + }, + { + "epoch": 2.400310646709969, + "grad_norm": 3.237093440913676, + "learning_rate": 2.0236662184343325e-06, + "loss": 0.1387, + "step": 16999 + }, + { + "epoch": 2.4004518497599547, + "grad_norm": 3.616872453733728, + "learning_rate": 2.022746848603543e-06, + "loss": 0.1638, + "step": 17000 + }, + { + "epoch": 2.4005930528099406, + "grad_norm": 2.741695318250654, + "learning_rate": 2.0218276641606748e-06, + "loss": 0.1087, + "step": 17001 + }, + { + "epoch": 2.4007342558599265, + "grad_norm": 2.5031180529284525, + "learning_rate": 2.020908665127086e-06, + "loss": 0.1261, + "step": 17002 + }, + { + "epoch": 2.4008754589099124, + "grad_norm": 2.9181118878659813, + "learning_rate": 2.0199898515241333e-06, + "loss": 0.1642, + "step": 17003 + }, + { + "epoch": 2.4010166619598983, + "grad_norm": 2.7580166723087443, + "learning_rate": 2.019071223373171e-06, + "loss": 0.1097, + "step": 17004 + }, + { + "epoch": 2.401157865009884, + "grad_norm": 2.8602409862986593, + "learning_rate": 2.018152780695547e-06, + "loss": 0.166, + "step": 17005 + }, + { + "epoch": 2.40129906805987, + "grad_norm": 3.28514809516474, + "learning_rate": 2.0172345235126043e-06, + "loss": 0.1503, + "step": 17006 + }, + { + "epoch": 2.401440271109856, + "grad_norm": 2.795001426567266, + "learning_rate": 2.0163164518456846e-06, + "loss": 0.1382, + "step": 17007 + }, + { + "epoch": 2.401581474159842, + "grad_norm": 3.777642332044963, + "learning_rate": 2.0153985657161223e-06, + "loss": 0.1897, + "step": 17008 + }, + { + "epoch": 2.4017226772098277, + "grad_norm": 3.8718338654761086, + "learning_rate": 2.0144808651452495e-06, + "loss": 0.1721, + "step": 17009 + }, + { + "epoch": 2.4018638802598136, + "grad_norm": 3.9719470631013816, + "learning_rate": 2.0135633501543916e-06, + "loss": 0.2004, + "step": 17010 + }, + { + "epoch": 2.4020050833097994, + "grad_norm": 3.6532600411301073, + "learning_rate": 2.0126460207648734e-06, + "loss": 0.1788, + "step": 17011 + }, + { + "epoch": 2.4021462863597853, + "grad_norm": 2.9936414595983867, + "learning_rate": 2.0117288769980092e-06, + "loss": 0.1438, + "step": 17012 + }, + { + "epoch": 2.402287489409771, + "grad_norm": 2.917090846361606, + "learning_rate": 2.0108119188751186e-06, + "loss": 0.1305, + "step": 17013 + }, + { + "epoch": 2.402428692459757, + "grad_norm": 2.8186529471572355, + "learning_rate": 2.009895146417512e-06, + "loss": 0.1349, + "step": 17014 + }, + { + "epoch": 2.402569895509743, + "grad_norm": 3.236650076883082, + "learning_rate": 2.008978559646488e-06, + "loss": 0.1761, + "step": 17015 + }, + { + "epoch": 2.402711098559729, + "grad_norm": 3.3399888632244057, + "learning_rate": 2.0080621585833516e-06, + "loss": 0.1468, + "step": 17016 + }, + { + "epoch": 2.4028523016097147, + "grad_norm": 3.4896443238376476, + "learning_rate": 2.0071459432493967e-06, + "loss": 0.1721, + "step": 17017 + }, + { + "epoch": 2.4029935046597006, + "grad_norm": 3.0798643987558916, + "learning_rate": 2.0062299136659203e-06, + "loss": 0.1408, + "step": 17018 + }, + { + "epoch": 2.4031347077096865, + "grad_norm": 2.7460754438328845, + "learning_rate": 2.0053140698542096e-06, + "loss": 0.1349, + "step": 17019 + }, + { + "epoch": 2.4032759107596724, + "grad_norm": 3.7423615598091837, + "learning_rate": 2.0043984118355464e-06, + "loss": 0.1934, + "step": 17020 + }, + { + "epoch": 2.4034171138096583, + "grad_norm": 3.3433141709234233, + "learning_rate": 2.0034829396312115e-06, + "loss": 0.1386, + "step": 17021 + }, + { + "epoch": 2.403558316859644, + "grad_norm": 2.9314097682598477, + "learning_rate": 2.0025676532624794e-06, + "loss": 0.1593, + "step": 17022 + }, + { + "epoch": 2.40369951990963, + "grad_norm": 2.793720863801027, + "learning_rate": 2.0016525527506203e-06, + "loss": 0.1171, + "step": 17023 + }, + { + "epoch": 2.403840722959616, + "grad_norm": 3.208090175875538, + "learning_rate": 2.000737638116903e-06, + "loss": 0.1433, + "step": 17024 + }, + { + "epoch": 2.403981926009602, + "grad_norm": 3.763394110760023, + "learning_rate": 1.999822909382588e-06, + "loss": 0.1648, + "step": 17025 + }, + { + "epoch": 2.4041231290595877, + "grad_norm": 2.784817852668454, + "learning_rate": 1.9989083665689335e-06, + "loss": 0.127, + "step": 17026 + }, + { + "epoch": 2.4042643321095736, + "grad_norm": 2.8823208571680197, + "learning_rate": 1.9979940096971928e-06, + "loss": 0.145, + "step": 17027 + }, + { + "epoch": 2.4044055351595595, + "grad_norm": 2.4921598187721403, + "learning_rate": 1.997079838788616e-06, + "loss": 0.1021, + "step": 17028 + }, + { + "epoch": 2.4045467382095453, + "grad_norm": 3.5271795523193608, + "learning_rate": 1.9961658538644447e-06, + "loss": 0.178, + "step": 17029 + }, + { + "epoch": 2.4046879412595312, + "grad_norm": 2.2549416833608382, + "learning_rate": 1.9952520549459254e-06, + "loss": 0.1268, + "step": 17030 + }, + { + "epoch": 2.404829144309517, + "grad_norm": 3.9227016093854243, + "learning_rate": 1.994338442054293e-06, + "loss": 0.1614, + "step": 17031 + }, + { + "epoch": 2.404970347359503, + "grad_norm": 2.7379225478209577, + "learning_rate": 1.993425015210777e-06, + "loss": 0.1407, + "step": 17032 + }, + { + "epoch": 2.405111550409489, + "grad_norm": 3.264747096804538, + "learning_rate": 1.992511774436604e-06, + "loss": 0.1442, + "step": 17033 + }, + { + "epoch": 2.4052527534594748, + "grad_norm": 2.9510364316049214, + "learning_rate": 1.9915987197529973e-06, + "loss": 0.1299, + "step": 17034 + }, + { + "epoch": 2.4053939565094606, + "grad_norm": 3.4362503362212684, + "learning_rate": 1.990685851181181e-06, + "loss": 0.1405, + "step": 17035 + }, + { + "epoch": 2.4055351595594465, + "grad_norm": 3.502391578997964, + "learning_rate": 1.989773168742366e-06, + "loss": 0.1555, + "step": 17036 + }, + { + "epoch": 2.4056763626094324, + "grad_norm": 4.0363089227585345, + "learning_rate": 1.988860672457763e-06, + "loss": 0.1886, + "step": 17037 + }, + { + "epoch": 2.4058175656594183, + "grad_norm": 3.304975817021495, + "learning_rate": 1.9879483623485786e-06, + "loss": 0.1908, + "step": 17038 + }, + { + "epoch": 2.405958768709404, + "grad_norm": 3.067266070400253, + "learning_rate": 1.987036238436014e-06, + "loss": 0.131, + "step": 17039 + }, + { + "epoch": 2.40609997175939, + "grad_norm": 3.07072508757356, + "learning_rate": 1.986124300741267e-06, + "loss": 0.1467, + "step": 17040 + }, + { + "epoch": 2.406241174809376, + "grad_norm": 3.356140292256625, + "learning_rate": 1.985212549285529e-06, + "loss": 0.1583, + "step": 17041 + }, + { + "epoch": 2.406382377859362, + "grad_norm": 3.52210803903658, + "learning_rate": 1.9843009840899917e-06, + "loss": 0.1646, + "step": 17042 + }, + { + "epoch": 2.4065235809093477, + "grad_norm": 3.200044327501405, + "learning_rate": 1.983389605175837e-06, + "loss": 0.1307, + "step": 17043 + }, + { + "epoch": 2.4066647839593336, + "grad_norm": 3.0611807629184526, + "learning_rate": 1.982478412564246e-06, + "loss": 0.1701, + "step": 17044 + }, + { + "epoch": 2.4068059870093195, + "grad_norm": 2.896255461557228, + "learning_rate": 1.9815674062763936e-06, + "loss": 0.1243, + "step": 17045 + }, + { + "epoch": 2.4069471900593054, + "grad_norm": 3.2336848466640427, + "learning_rate": 1.980656586333449e-06, + "loss": 0.1579, + "step": 17046 + }, + { + "epoch": 2.4070883931092912, + "grad_norm": 2.311343637127122, + "learning_rate": 1.9797459527565856e-06, + "loss": 0.1096, + "step": 17047 + }, + { + "epoch": 2.407229596159277, + "grad_norm": 3.470348602553056, + "learning_rate": 1.978835505566965e-06, + "loss": 0.1413, + "step": 17048 + }, + { + "epoch": 2.407370799209263, + "grad_norm": 2.9967358261730745, + "learning_rate": 1.977925244785741e-06, + "loss": 0.1353, + "step": 17049 + }, + { + "epoch": 2.407512002259249, + "grad_norm": 2.6830244640655754, + "learning_rate": 1.977015170434069e-06, + "loss": 0.1472, + "step": 17050 + }, + { + "epoch": 2.4076532053092348, + "grad_norm": 2.4999751968678483, + "learning_rate": 1.9761052825330975e-06, + "loss": 0.1023, + "step": 17051 + }, + { + "epoch": 2.4077944083592207, + "grad_norm": 3.304182349891002, + "learning_rate": 1.975195581103976e-06, + "loss": 0.1202, + "step": 17052 + }, + { + "epoch": 2.4079356114092065, + "grad_norm": 3.2638175886739336, + "learning_rate": 1.974286066167844e-06, + "loss": 0.1858, + "step": 17053 + }, + { + "epoch": 2.4080768144591924, + "grad_norm": 4.127048475569063, + "learning_rate": 1.9733767377458377e-06, + "loss": 0.1971, + "step": 17054 + }, + { + "epoch": 2.4082180175091783, + "grad_norm": 2.852323887957247, + "learning_rate": 1.97246759585909e-06, + "loss": 0.1435, + "step": 17055 + }, + { + "epoch": 2.408359220559164, + "grad_norm": 3.720634621930768, + "learning_rate": 1.971558640528728e-06, + "loss": 0.17, + "step": 17056 + }, + { + "epoch": 2.40850042360915, + "grad_norm": 2.7516834611931618, + "learning_rate": 1.970649871775876e-06, + "loss": 0.1355, + "step": 17057 + }, + { + "epoch": 2.408641626659136, + "grad_norm": 2.7406142273192153, + "learning_rate": 1.969741289621653e-06, + "loss": 0.1206, + "step": 17058 + }, + { + "epoch": 2.408782829709122, + "grad_norm": 3.4896496582040832, + "learning_rate": 1.9688328940871747e-06, + "loss": 0.1581, + "step": 17059 + }, + { + "epoch": 2.4089240327591077, + "grad_norm": 3.0013940258078278, + "learning_rate": 1.967924685193552e-06, + "loss": 0.093, + "step": 17060 + }, + { + "epoch": 2.4090652358090936, + "grad_norm": 3.1796208319890478, + "learning_rate": 1.9670166629618903e-06, + "loss": 0.1449, + "step": 17061 + }, + { + "epoch": 2.4092064388590795, + "grad_norm": 4.047483458292405, + "learning_rate": 1.9661088274132924e-06, + "loss": 0.1858, + "step": 17062 + }, + { + "epoch": 2.4093476419090654, + "grad_norm": 2.9585503454644164, + "learning_rate": 1.965201178568853e-06, + "loss": 0.1314, + "step": 17063 + }, + { + "epoch": 2.4094888449590512, + "grad_norm": 3.6699651401732156, + "learning_rate": 1.9642937164496712e-06, + "loss": 0.1844, + "step": 17064 + }, + { + "epoch": 2.409630048009037, + "grad_norm": 4.615353068313634, + "learning_rate": 1.9633864410768356e-06, + "loss": 0.2265, + "step": 17065 + }, + { + "epoch": 2.409771251059023, + "grad_norm": 2.832294657802391, + "learning_rate": 1.962479352471426e-06, + "loss": 0.1405, + "step": 17066 + }, + { + "epoch": 2.409912454109009, + "grad_norm": 2.9118594759840226, + "learning_rate": 1.961572450654524e-06, + "loss": 0.1179, + "step": 17067 + }, + { + "epoch": 2.4100536571589948, + "grad_norm": 3.647322233573388, + "learning_rate": 1.960665735647206e-06, + "loss": 0.1681, + "step": 17068 + }, + { + "epoch": 2.4101948602089807, + "grad_norm": 2.858339836307105, + "learning_rate": 1.9597592074705452e-06, + "loss": 0.167, + "step": 17069 + }, + { + "epoch": 2.4103360632589665, + "grad_norm": 3.9749513714507216, + "learning_rate": 1.9588528661456087e-06, + "loss": 0.1952, + "step": 17070 + }, + { + "epoch": 2.4104772663089524, + "grad_norm": 3.731251282202874, + "learning_rate": 1.957946711693459e-06, + "loss": 0.1728, + "step": 17071 + }, + { + "epoch": 2.4106184693589383, + "grad_norm": 3.466234954161315, + "learning_rate": 1.9570407441351548e-06, + "loss": 0.1402, + "step": 17072 + }, + { + "epoch": 2.410759672408924, + "grad_norm": 3.706305679325977, + "learning_rate": 1.95613496349175e-06, + "loss": 0.1882, + "step": 17073 + }, + { + "epoch": 2.41090087545891, + "grad_norm": 3.24128042688095, + "learning_rate": 1.955229369784295e-06, + "loss": 0.1398, + "step": 17074 + }, + { + "epoch": 2.411042078508896, + "grad_norm": 2.986847233665659, + "learning_rate": 1.954323963033835e-06, + "loss": 0.1323, + "step": 17075 + }, + { + "epoch": 2.411183281558882, + "grad_norm": 4.383957604812067, + "learning_rate": 1.9534187432614114e-06, + "loss": 0.1711, + "step": 17076 + }, + { + "epoch": 2.4113244846088677, + "grad_norm": 2.975597065479522, + "learning_rate": 1.952513710488061e-06, + "loss": 0.158, + "step": 17077 + }, + { + "epoch": 2.4114656876588536, + "grad_norm": 3.522376985909804, + "learning_rate": 1.9516088647348164e-06, + "loss": 0.1768, + "step": 17078 + }, + { + "epoch": 2.4116068907088395, + "grad_norm": 3.5310393878240585, + "learning_rate": 1.9507042060227064e-06, + "loss": 0.1754, + "step": 17079 + }, + { + "epoch": 2.4117480937588254, + "grad_norm": 3.2415036505826853, + "learning_rate": 1.9497997343727513e-06, + "loss": 0.1621, + "step": 17080 + }, + { + "epoch": 2.4118892968088113, + "grad_norm": 2.998305578281165, + "learning_rate": 1.9488954498059777e-06, + "loss": 0.1474, + "step": 17081 + }, + { + "epoch": 2.412030499858797, + "grad_norm": 3.328258166917561, + "learning_rate": 1.947991352343398e-06, + "loss": 0.1557, + "step": 17082 + }, + { + "epoch": 2.412171702908783, + "grad_norm": 2.6362100751543527, + "learning_rate": 1.9470874420060202e-06, + "loss": 0.1273, + "step": 17083 + }, + { + "epoch": 2.4123129059587685, + "grad_norm": 3.2299710630863503, + "learning_rate": 1.9461837188148513e-06, + "loss": 0.1675, + "step": 17084 + }, + { + "epoch": 2.4124541090087543, + "grad_norm": 3.6270290097742377, + "learning_rate": 1.945280182790893e-06, + "loss": 0.1546, + "step": 17085 + }, + { + "epoch": 2.4125953120587402, + "grad_norm": 3.082577709029947, + "learning_rate": 1.944376833955147e-06, + "loss": 0.1497, + "step": 17086 + }, + { + "epoch": 2.412736515108726, + "grad_norm": 2.1911857360473843, + "learning_rate": 1.9434736723286042e-06, + "loss": 0.12, + "step": 17087 + }, + { + "epoch": 2.412877718158712, + "grad_norm": 3.4292407935881846, + "learning_rate": 1.9425706979322544e-06, + "loss": 0.1977, + "step": 17088 + }, + { + "epoch": 2.413018921208698, + "grad_norm": 3.0249649123711677, + "learning_rate": 1.941667910787082e-06, + "loss": 0.1662, + "step": 17089 + }, + { + "epoch": 2.4131601242586838, + "grad_norm": 2.462392796433591, + "learning_rate": 1.940765310914069e-06, + "loss": 0.0872, + "step": 17090 + }, + { + "epoch": 2.4133013273086696, + "grad_norm": 3.4887255523778125, + "learning_rate": 1.939862898334185e-06, + "loss": 0.1395, + "step": 17091 + }, + { + "epoch": 2.4134425303586555, + "grad_norm": 3.2173610014384812, + "learning_rate": 1.9389606730684084e-06, + "loss": 0.1654, + "step": 17092 + }, + { + "epoch": 2.4135837334086414, + "grad_norm": 3.3147681723106435, + "learning_rate": 1.9380586351377052e-06, + "loss": 0.16, + "step": 17093 + }, + { + "epoch": 2.4137249364586273, + "grad_norm": 6.515556774036979, + "learning_rate": 1.937156784563037e-06, + "loss": 0.1625, + "step": 17094 + }, + { + "epoch": 2.413866139508613, + "grad_norm": 3.9580745051065547, + "learning_rate": 1.9362551213653636e-06, + "loss": 0.1728, + "step": 17095 + }, + { + "epoch": 2.414007342558599, + "grad_norm": 3.1705448154777645, + "learning_rate": 1.9353536455656385e-06, + "loss": 0.1566, + "step": 17096 + }, + { + "epoch": 2.414148545608585, + "grad_norm": 2.960726521770711, + "learning_rate": 1.9344523571848096e-06, + "loss": 0.1291, + "step": 17097 + }, + { + "epoch": 2.414289748658571, + "grad_norm": 3.1646003692953912, + "learning_rate": 1.9335512562438263e-06, + "loss": 0.1625, + "step": 17098 + }, + { + "epoch": 2.4144309517085567, + "grad_norm": 2.926123500759486, + "learning_rate": 1.9326503427636313e-06, + "loss": 0.1407, + "step": 17099 + }, + { + "epoch": 2.4145721547585426, + "grad_norm": 3.161368139616355, + "learning_rate": 1.9317496167651563e-06, + "loss": 0.1386, + "step": 17100 + }, + { + "epoch": 2.4147133578085285, + "grad_norm": 3.108719796029688, + "learning_rate": 1.9308490782693346e-06, + "loss": 0.1252, + "step": 17101 + }, + { + "epoch": 2.4148545608585144, + "grad_norm": 3.3649233797629376, + "learning_rate": 1.929948727297096e-06, + "loss": 0.1401, + "step": 17102 + }, + { + "epoch": 2.4149957639085002, + "grad_norm": 3.434812600037867, + "learning_rate": 1.9290485638693613e-06, + "loss": 0.1919, + "step": 17103 + }, + { + "epoch": 2.415136966958486, + "grad_norm": 3.2043317664527056, + "learning_rate": 1.928148588007055e-06, + "loss": 0.1487, + "step": 17104 + }, + { + "epoch": 2.415278170008472, + "grad_norm": 3.081611353945789, + "learning_rate": 1.9272487997310894e-06, + "loss": 0.1317, + "step": 17105 + }, + { + "epoch": 2.415419373058458, + "grad_norm": 3.039104295489057, + "learning_rate": 1.9263491990623763e-06, + "loss": 0.1538, + "step": 17106 + }, + { + "epoch": 2.4155605761084438, + "grad_norm": 3.5732256096084574, + "learning_rate": 1.9254497860218223e-06, + "loss": 0.1772, + "step": 17107 + }, + { + "epoch": 2.4157017791584297, + "grad_norm": 2.949726696394914, + "learning_rate": 1.9245505606303238e-06, + "loss": 0.1342, + "step": 17108 + }, + { + "epoch": 2.4158429822084155, + "grad_norm": 2.841419564913024, + "learning_rate": 1.923651522908785e-06, + "loss": 0.1519, + "step": 17109 + }, + { + "epoch": 2.4159841852584014, + "grad_norm": 3.743750313422187, + "learning_rate": 1.9227526728780978e-06, + "loss": 0.1874, + "step": 17110 + }, + { + "epoch": 2.4161253883083873, + "grad_norm": 3.311484642364792, + "learning_rate": 1.921854010559149e-06, + "loss": 0.1652, + "step": 17111 + }, + { + "epoch": 2.416266591358373, + "grad_norm": 2.8730812416670988, + "learning_rate": 1.920955535972825e-06, + "loss": 0.1548, + "step": 17112 + }, + { + "epoch": 2.416407794408359, + "grad_norm": 3.2598050132515777, + "learning_rate": 1.920057249140005e-06, + "loss": 0.1558, + "step": 17113 + }, + { + "epoch": 2.416548997458345, + "grad_norm": 3.026824621128307, + "learning_rate": 1.9191591500815642e-06, + "loss": 0.1196, + "step": 17114 + }, + { + "epoch": 2.416690200508331, + "grad_norm": 3.6923533187952513, + "learning_rate": 1.918261238818374e-06, + "loss": 0.2454, + "step": 17115 + }, + { + "epoch": 2.4168314035583167, + "grad_norm": 4.198397236463667, + "learning_rate": 1.9173635153713066e-06, + "loss": 0.1814, + "step": 17116 + }, + { + "epoch": 2.4169726066083026, + "grad_norm": 3.2772135437708214, + "learning_rate": 1.9164659797612172e-06, + "loss": 0.1458, + "step": 17117 + }, + { + "epoch": 2.4171138096582885, + "grad_norm": 2.9609600675098666, + "learning_rate": 1.9155686320089684e-06, + "loss": 0.1404, + "step": 17118 + }, + { + "epoch": 2.4172550127082744, + "grad_norm": 3.1270771096290098, + "learning_rate": 1.914671472135413e-06, + "loss": 0.1743, + "step": 17119 + }, + { + "epoch": 2.4173962157582602, + "grad_norm": 3.3562446705929916, + "learning_rate": 1.9137745001613984e-06, + "loss": 0.1455, + "step": 17120 + }, + { + "epoch": 2.417537418808246, + "grad_norm": 3.4678363886607784, + "learning_rate": 1.9128777161077748e-06, + "loss": 0.1367, + "step": 17121 + }, + { + "epoch": 2.417678621858232, + "grad_norm": 3.4798248466700668, + "learning_rate": 1.911981119995381e-06, + "loss": 0.1856, + "step": 17122 + }, + { + "epoch": 2.417819824908218, + "grad_norm": 3.702778791225187, + "learning_rate": 1.9110847118450517e-06, + "loss": 0.1699, + "step": 17123 + }, + { + "epoch": 2.417961027958204, + "grad_norm": 3.7119301335623858, + "learning_rate": 1.9101884916776236e-06, + "loss": 0.1786, + "step": 17124 + }, + { + "epoch": 2.4181022310081897, + "grad_norm": 3.4426962714187574, + "learning_rate": 1.909292459513916e-06, + "loss": 0.1577, + "step": 17125 + }, + { + "epoch": 2.4182434340581755, + "grad_norm": 2.871508633446945, + "learning_rate": 1.9083966153747603e-06, + "loss": 0.1141, + "step": 17126 + }, + { + "epoch": 2.4183846371081614, + "grad_norm": 3.161242825909235, + "learning_rate": 1.9075009592809733e-06, + "loss": 0.191, + "step": 17127 + }, + { + "epoch": 2.4185258401581473, + "grad_norm": 3.739364537922266, + "learning_rate": 1.9066054912533683e-06, + "loss": 0.1453, + "step": 17128 + }, + { + "epoch": 2.418667043208133, + "grad_norm": 2.5073187509531776, + "learning_rate": 1.9057102113127569e-06, + "loss": 0.1349, + "step": 17129 + }, + { + "epoch": 2.418808246258119, + "grad_norm": 2.8797024752121247, + "learning_rate": 1.9048151194799435e-06, + "loss": 0.1577, + "step": 17130 + }, + { + "epoch": 2.418949449308105, + "grad_norm": 3.1822994979636228, + "learning_rate": 1.9039202157757319e-06, + "loss": 0.1199, + "step": 17131 + }, + { + "epoch": 2.419090652358091, + "grad_norm": 3.3128047887305034, + "learning_rate": 1.9030255002209153e-06, + "loss": 0.1648, + "step": 17132 + }, + { + "epoch": 2.4192318554080767, + "grad_norm": 3.2638085017855207, + "learning_rate": 1.9021309728362935e-06, + "loss": 0.1718, + "step": 17133 + }, + { + "epoch": 2.4193730584580626, + "grad_norm": 3.7715551129188896, + "learning_rate": 1.901236633642649e-06, + "loss": 0.2045, + "step": 17134 + }, + { + "epoch": 2.4195142615080485, + "grad_norm": 4.914970269029538, + "learning_rate": 1.9003424826607674e-06, + "loss": 0.2414, + "step": 17135 + }, + { + "epoch": 2.4196554645580344, + "grad_norm": 2.985415403410023, + "learning_rate": 1.8994485199114287e-06, + "loss": 0.1275, + "step": 17136 + }, + { + "epoch": 2.4197966676080203, + "grad_norm": 3.0417998742362067, + "learning_rate": 1.8985547454154053e-06, + "loss": 0.149, + "step": 17137 + }, + { + "epoch": 2.419937870658006, + "grad_norm": 2.8185543891681935, + "learning_rate": 1.8976611591934734e-06, + "loss": 0.1447, + "step": 17138 + }, + { + "epoch": 2.420079073707992, + "grad_norm": 2.9844557477744655, + "learning_rate": 1.896767761266397e-06, + "loss": 0.1214, + "step": 17139 + }, + { + "epoch": 2.420220276757978, + "grad_norm": 2.911296703501799, + "learning_rate": 1.8958745516549382e-06, + "loss": 0.1188, + "step": 17140 + }, + { + "epoch": 2.420361479807964, + "grad_norm": 3.715911646859128, + "learning_rate": 1.8949815303798568e-06, + "loss": 0.2046, + "step": 17141 + }, + { + "epoch": 2.4205026828579497, + "grad_norm": 3.7613390802198903, + "learning_rate": 1.8940886974619e-06, + "loss": 0.1761, + "step": 17142 + }, + { + "epoch": 2.4206438859079356, + "grad_norm": 3.4428215924109073, + "learning_rate": 1.893196052921824e-06, + "loss": 0.1546, + "step": 17143 + }, + { + "epoch": 2.4207850889579214, + "grad_norm": 2.8501013530884345, + "learning_rate": 1.8923035967803704e-06, + "loss": 0.1305, + "step": 17144 + }, + { + "epoch": 2.4209262920079073, + "grad_norm": 3.4759455127356294, + "learning_rate": 1.8914113290582802e-06, + "loss": 0.1929, + "step": 17145 + }, + { + "epoch": 2.421067495057893, + "grad_norm": 3.5197958051190565, + "learning_rate": 1.8905192497762881e-06, + "loss": 0.1517, + "step": 17146 + }, + { + "epoch": 2.421208698107879, + "grad_norm": 3.607400995226715, + "learning_rate": 1.8896273589551273e-06, + "loss": 0.1757, + "step": 17147 + }, + { + "epoch": 2.421349901157865, + "grad_norm": 3.821816259638706, + "learning_rate": 1.8887356566155236e-06, + "loss": 0.2168, + "step": 17148 + }, + { + "epoch": 2.421491104207851, + "grad_norm": 2.4728836991684733, + "learning_rate": 1.8878441427782e-06, + "loss": 0.1093, + "step": 17149 + }, + { + "epoch": 2.4216323072578367, + "grad_norm": 3.1333700417486465, + "learning_rate": 1.8869528174638752e-06, + "loss": 0.1928, + "step": 17150 + }, + { + "epoch": 2.4217735103078226, + "grad_norm": 3.6382218282718224, + "learning_rate": 1.886061680693263e-06, + "loss": 0.1858, + "step": 17151 + }, + { + "epoch": 2.4219147133578085, + "grad_norm": 2.8740903665219872, + "learning_rate": 1.885170732487074e-06, + "loss": 0.1188, + "step": 17152 + }, + { + "epoch": 2.4220559164077944, + "grad_norm": 2.8416557016412054, + "learning_rate": 1.884279972866012e-06, + "loss": 0.1294, + "step": 17153 + }, + { + "epoch": 2.4221971194577803, + "grad_norm": 3.140858070227539, + "learning_rate": 1.8833894018507758e-06, + "loss": 0.149, + "step": 17154 + }, + { + "epoch": 2.422338322507766, + "grad_norm": 2.968625187251119, + "learning_rate": 1.8824990194620674e-06, + "loss": 0.1491, + "step": 17155 + }, + { + "epoch": 2.422479525557752, + "grad_norm": 3.7300800817213235, + "learning_rate": 1.8816088257205767e-06, + "loss": 0.1601, + "step": 17156 + }, + { + "epoch": 2.422620728607738, + "grad_norm": 2.8000697752699897, + "learning_rate": 1.8807188206469906e-06, + "loss": 0.1014, + "step": 17157 + }, + { + "epoch": 2.422761931657724, + "grad_norm": 3.3486173012522142, + "learning_rate": 1.8798290042619949e-06, + "loss": 0.1849, + "step": 17158 + }, + { + "epoch": 2.4229031347077097, + "grad_norm": 2.46996074011264, + "learning_rate": 1.8789393765862608e-06, + "loss": 0.1152, + "step": 17159 + }, + { + "epoch": 2.4230443377576956, + "grad_norm": 2.38141861270557, + "learning_rate": 1.8780499376404715e-06, + "loss": 0.1094, + "step": 17160 + }, + { + "epoch": 2.4231855408076814, + "grad_norm": 3.7383386722199616, + "learning_rate": 1.8771606874452941e-06, + "loss": 0.1864, + "step": 17161 + }, + { + "epoch": 2.4233267438576673, + "grad_norm": 3.073584992793781, + "learning_rate": 1.8762716260213943e-06, + "loss": 0.1672, + "step": 17162 + }, + { + "epoch": 2.423467946907653, + "grad_norm": 3.5871842393548325, + "learning_rate": 1.875382753389433e-06, + "loss": 0.1584, + "step": 17163 + }, + { + "epoch": 2.423609149957639, + "grad_norm": 3.7824919317541825, + "learning_rate": 1.8744940695700686e-06, + "loss": 0.1532, + "step": 17164 + }, + { + "epoch": 2.423750353007625, + "grad_norm": 3.540866669000487, + "learning_rate": 1.8736055745839522e-06, + "loss": 0.1705, + "step": 17165 + }, + { + "epoch": 2.423891556057611, + "grad_norm": 3.396740317461732, + "learning_rate": 1.8727172684517325e-06, + "loss": 0.1752, + "step": 17166 + }, + { + "epoch": 2.4240327591075967, + "grad_norm": 3.528360243051571, + "learning_rate": 1.8718291511940546e-06, + "loss": 0.1633, + "step": 17167 + }, + { + "epoch": 2.4241739621575826, + "grad_norm": 2.9751861975393497, + "learning_rate": 1.870941222831556e-06, + "loss": 0.1357, + "step": 17168 + }, + { + "epoch": 2.4243151652075685, + "grad_norm": 3.494655786736849, + "learning_rate": 1.8700534833848738e-06, + "loss": 0.1779, + "step": 17169 + }, + { + "epoch": 2.4244563682575544, + "grad_norm": 3.7257536354798173, + "learning_rate": 1.869165932874636e-06, + "loss": 0.1834, + "step": 17170 + }, + { + "epoch": 2.4245975713075403, + "grad_norm": 3.4743758114180783, + "learning_rate": 1.868278571321469e-06, + "loss": 0.1641, + "step": 17171 + }, + { + "epoch": 2.424738774357526, + "grad_norm": 3.074647482287495, + "learning_rate": 1.867391398745999e-06, + "loss": 0.1631, + "step": 17172 + }, + { + "epoch": 2.424879977407512, + "grad_norm": 3.2984405157056957, + "learning_rate": 1.8665044151688404e-06, + "loss": 0.1569, + "step": 17173 + }, + { + "epoch": 2.425021180457498, + "grad_norm": 4.5352064234196705, + "learning_rate": 1.8656176206106059e-06, + "loss": 0.1797, + "step": 17174 + }, + { + "epoch": 2.425162383507484, + "grad_norm": 2.689618237622237, + "learning_rate": 1.8647310150919084e-06, + "loss": 0.1009, + "step": 17175 + }, + { + "epoch": 2.4253035865574697, + "grad_norm": 3.107030592597233, + "learning_rate": 1.863844598633343e-06, + "loss": 0.1751, + "step": 17176 + }, + { + "epoch": 2.4254447896074556, + "grad_norm": 4.6619338439193845, + "learning_rate": 1.8629583712555188e-06, + "loss": 0.2095, + "step": 17177 + }, + { + "epoch": 2.4255859926574415, + "grad_norm": 3.969409285522405, + "learning_rate": 1.8620723329790269e-06, + "loss": 0.1987, + "step": 17178 + }, + { + "epoch": 2.4257271957074273, + "grad_norm": 2.8290457213653912, + "learning_rate": 1.8611864838244598e-06, + "loss": 0.1511, + "step": 17179 + }, + { + "epoch": 2.4258683987574132, + "grad_norm": 2.999653985981247, + "learning_rate": 1.8603008238124043e-06, + "loss": 0.1298, + "step": 17180 + }, + { + "epoch": 2.426009601807399, + "grad_norm": 2.745271204131022, + "learning_rate": 1.8594153529634417e-06, + "loss": 0.1406, + "step": 17181 + }, + { + "epoch": 2.426150804857385, + "grad_norm": 3.130626232421815, + "learning_rate": 1.8585300712981514e-06, + "loss": 0.1735, + "step": 17182 + }, + { + "epoch": 2.426292007907371, + "grad_norm": 2.747524388058323, + "learning_rate": 1.8576449788371065e-06, + "loss": 0.1371, + "step": 17183 + }, + { + "epoch": 2.4264332109573568, + "grad_norm": 4.413331653601289, + "learning_rate": 1.8567600756008753e-06, + "loss": 0.204, + "step": 17184 + }, + { + "epoch": 2.4265744140073426, + "grad_norm": 3.3118489707059426, + "learning_rate": 1.8558753616100223e-06, + "loss": 0.1538, + "step": 17185 + }, + { + "epoch": 2.4267156170573285, + "grad_norm": 3.1554393015322715, + "learning_rate": 1.8549908368851099e-06, + "loss": 0.1634, + "step": 17186 + }, + { + "epoch": 2.4268568201073144, + "grad_norm": 2.913859310193168, + "learning_rate": 1.854106501446693e-06, + "loss": 0.1549, + "step": 17187 + }, + { + "epoch": 2.4269980231573003, + "grad_norm": 3.0406136463002915, + "learning_rate": 1.8532223553153194e-06, + "loss": 0.1603, + "step": 17188 + }, + { + "epoch": 2.427139226207286, + "grad_norm": 2.9229674863759323, + "learning_rate": 1.852338398511544e-06, + "loss": 0.1276, + "step": 17189 + }, + { + "epoch": 2.427280429257272, + "grad_norm": 3.8550741383075775, + "learning_rate": 1.8514546310559044e-06, + "loss": 0.1372, + "step": 17190 + }, + { + "epoch": 2.427421632307258, + "grad_norm": 3.2322720814740085, + "learning_rate": 1.8505710529689402e-06, + "loss": 0.1533, + "step": 17191 + }, + { + "epoch": 2.427562835357244, + "grad_norm": 3.7059883903013, + "learning_rate": 1.8496876642711882e-06, + "loss": 0.1753, + "step": 17192 + }, + { + "epoch": 2.4277040384072297, + "grad_norm": 3.492832784961619, + "learning_rate": 1.8488044649831716e-06, + "loss": 0.1969, + "step": 17193 + }, + { + "epoch": 2.4278452414572156, + "grad_norm": 2.783881213349675, + "learning_rate": 1.8479214551254176e-06, + "loss": 0.1398, + "step": 17194 + }, + { + "epoch": 2.4279864445072015, + "grad_norm": 4.034497505615193, + "learning_rate": 1.84703863471845e-06, + "loss": 0.2461, + "step": 17195 + }, + { + "epoch": 2.4281276475571874, + "grad_norm": 3.468404042266877, + "learning_rate": 1.8461560037827842e-06, + "loss": 0.1881, + "step": 17196 + }, + { + "epoch": 2.4282688506071732, + "grad_norm": 3.430727768869994, + "learning_rate": 1.8452735623389317e-06, + "loss": 0.1905, + "step": 17197 + }, + { + "epoch": 2.428410053657159, + "grad_norm": 3.369458468775003, + "learning_rate": 1.8443913104073984e-06, + "loss": 0.1805, + "step": 17198 + }, + { + "epoch": 2.428551256707145, + "grad_norm": 5.1839203163831895, + "learning_rate": 1.8435092480086902e-06, + "loss": 0.1594, + "step": 17199 + }, + { + "epoch": 2.428692459757131, + "grad_norm": 2.8985353166412, + "learning_rate": 1.842627375163305e-06, + "loss": 0.128, + "step": 17200 + }, + { + "epoch": 2.4288336628071168, + "grad_norm": 3.2380151516382125, + "learning_rate": 1.8417456918917355e-06, + "loss": 0.1574, + "step": 17201 + }, + { + "epoch": 2.4289748658571026, + "grad_norm": 2.912280008433937, + "learning_rate": 1.8408641982144738e-06, + "loss": 0.1502, + "step": 17202 + }, + { + "epoch": 2.4291160689070885, + "grad_norm": 3.0728797262641825, + "learning_rate": 1.8399828941520036e-06, + "loss": 0.138, + "step": 17203 + }, + { + "epoch": 2.4292572719570744, + "grad_norm": 3.123077298687668, + "learning_rate": 1.8391017797248079e-06, + "loss": 0.1147, + "step": 17204 + }, + { + "epoch": 2.4293984750070603, + "grad_norm": 2.6960118934154265, + "learning_rate": 1.8382208549533609e-06, + "loss": 0.1415, + "step": 17205 + }, + { + "epoch": 2.429539678057046, + "grad_norm": 3.4733377429385985, + "learning_rate": 1.8373401198581353e-06, + "loss": 0.1561, + "step": 17206 + }, + { + "epoch": 2.429680881107032, + "grad_norm": 2.664096796477448, + "learning_rate": 1.8364595744596026e-06, + "loss": 0.1304, + "step": 17207 + }, + { + "epoch": 2.429822084157018, + "grad_norm": 2.821024016576151, + "learning_rate": 1.8355792187782228e-06, + "loss": 0.1181, + "step": 17208 + }, + { + "epoch": 2.429963287207004, + "grad_norm": 3.456582404735825, + "learning_rate": 1.8346990528344589e-06, + "loss": 0.1669, + "step": 17209 + }, + { + "epoch": 2.4301044902569897, + "grad_norm": 3.721668412541519, + "learning_rate": 1.8338190766487606e-06, + "loss": 0.151, + "step": 17210 + }, + { + "epoch": 2.4302456933069756, + "grad_norm": 2.904562493566288, + "learning_rate": 1.8329392902415777e-06, + "loss": 0.1385, + "step": 17211 + }, + { + "epoch": 2.4303868963569615, + "grad_norm": 2.718114041907725, + "learning_rate": 1.8320596936333613e-06, + "loss": 0.1397, + "step": 17212 + }, + { + "epoch": 2.4305280994069474, + "grad_norm": 3.471337090174425, + "learning_rate": 1.8311802868445494e-06, + "loss": 0.159, + "step": 17213 + }, + { + "epoch": 2.4306693024569332, + "grad_norm": 3.087029411894934, + "learning_rate": 1.8303010698955803e-06, + "loss": 0.1577, + "step": 17214 + }, + { + "epoch": 2.430810505506919, + "grad_norm": 2.631387592854345, + "learning_rate": 1.8294220428068865e-06, + "loss": 0.119, + "step": 17215 + }, + { + "epoch": 2.430951708556905, + "grad_norm": 3.0708274456448463, + "learning_rate": 1.828543205598895e-06, + "loss": 0.1194, + "step": 17216 + }, + { + "epoch": 2.431092911606891, + "grad_norm": 3.8797698660103297, + "learning_rate": 1.8276645582920306e-06, + "loss": 0.2223, + "step": 17217 + }, + { + "epoch": 2.4312341146568768, + "grad_norm": 4.0951883884088245, + "learning_rate": 1.8267861009067124e-06, + "loss": 0.1936, + "step": 17218 + }, + { + "epoch": 2.4313753177068627, + "grad_norm": 3.8315254373504364, + "learning_rate": 1.8259078334633561e-06, + "loss": 0.1669, + "step": 17219 + }, + { + "epoch": 2.4315165207568485, + "grad_norm": 2.7585148019647674, + "learning_rate": 1.8250297559823716e-06, + "loss": 0.1294, + "step": 17220 + }, + { + "epoch": 2.4316577238068344, + "grad_norm": 3.00863625472103, + "learning_rate": 1.8241518684841642e-06, + "loss": 0.1505, + "step": 17221 + }, + { + "epoch": 2.4317989268568203, + "grad_norm": 3.5954465939841986, + "learning_rate": 1.8232741709891376e-06, + "loss": 0.1718, + "step": 17222 + }, + { + "epoch": 2.431940129906806, + "grad_norm": 3.218723743168936, + "learning_rate": 1.822396663517685e-06, + "loss": 0.1546, + "step": 17223 + }, + { + "epoch": 2.432081332956792, + "grad_norm": 3.4682573013106293, + "learning_rate": 1.8215193460902047e-06, + "loss": 0.1348, + "step": 17224 + }, + { + "epoch": 2.432222536006778, + "grad_norm": 3.5527284163116355, + "learning_rate": 1.8206422187270823e-06, + "loss": 0.1472, + "step": 17225 + }, + { + "epoch": 2.432363739056764, + "grad_norm": 2.728546326426804, + "learning_rate": 1.8197652814487054e-06, + "loss": 0.1324, + "step": 17226 + }, + { + "epoch": 2.4325049421067497, + "grad_norm": 2.5518412629950653, + "learning_rate": 1.8188885342754481e-06, + "loss": 0.1211, + "step": 17227 + }, + { + "epoch": 2.4326461451567356, + "grad_norm": 3.32274652476081, + "learning_rate": 1.818011977227686e-06, + "loss": 0.1583, + "step": 17228 + }, + { + "epoch": 2.4327873482067215, + "grad_norm": 3.1013064180302736, + "learning_rate": 1.8171356103257942e-06, + "loss": 0.1183, + "step": 17229 + }, + { + "epoch": 2.4329285512567074, + "grad_norm": 3.43214513386801, + "learning_rate": 1.8162594335901363e-06, + "loss": 0.1508, + "step": 17230 + }, + { + "epoch": 2.4330697543066933, + "grad_norm": 3.351763724303904, + "learning_rate": 1.8153834470410758e-06, + "loss": 0.1589, + "step": 17231 + }, + { + "epoch": 2.433210957356679, + "grad_norm": 3.337588406520045, + "learning_rate": 1.814507650698969e-06, + "loss": 0.1429, + "step": 17232 + }, + { + "epoch": 2.433352160406665, + "grad_norm": 3.6443654478298524, + "learning_rate": 1.8136320445841693e-06, + "loss": 0.1691, + "step": 17233 + }, + { + "epoch": 2.433493363456651, + "grad_norm": 3.1130296443172876, + "learning_rate": 1.812756628717025e-06, + "loss": 0.1461, + "step": 17234 + }, + { + "epoch": 2.433634566506637, + "grad_norm": 3.331029963570307, + "learning_rate": 1.8118814031178822e-06, + "loss": 0.09, + "step": 17235 + }, + { + "epoch": 2.4337757695566227, + "grad_norm": 3.222937538724706, + "learning_rate": 1.8110063678070778e-06, + "loss": 0.1539, + "step": 17236 + }, + { + "epoch": 2.433916972606608, + "grad_norm": 3.3823771127924647, + "learning_rate": 1.8101315228049498e-06, + "loss": 0.174, + "step": 17237 + }, + { + "epoch": 2.434058175656594, + "grad_norm": 2.917301796009687, + "learning_rate": 1.809256868131828e-06, + "loss": 0.1389, + "step": 17238 + }, + { + "epoch": 2.43419937870658, + "grad_norm": 3.0951001755624943, + "learning_rate": 1.80838240380804e-06, + "loss": 0.137, + "step": 17239 + }, + { + "epoch": 2.4343405817565658, + "grad_norm": 2.547149511365408, + "learning_rate": 1.8075081298539032e-06, + "loss": 0.138, + "step": 17240 + }, + { + "epoch": 2.4344817848065516, + "grad_norm": 2.6507846007217126, + "learning_rate": 1.8066340462897435e-06, + "loss": 0.1288, + "step": 17241 + }, + { + "epoch": 2.4346229878565375, + "grad_norm": 3.479158426646564, + "learning_rate": 1.8057601531358693e-06, + "loss": 0.1835, + "step": 17242 + }, + { + "epoch": 2.4347641909065234, + "grad_norm": 2.520138431855701, + "learning_rate": 1.804886450412593e-06, + "loss": 0.102, + "step": 17243 + }, + { + "epoch": 2.4349053939565093, + "grad_norm": 3.116229566631311, + "learning_rate": 1.8040129381402137e-06, + "loss": 0.161, + "step": 17244 + }, + { + "epoch": 2.435046597006495, + "grad_norm": 2.6524300149918334, + "learning_rate": 1.8031396163390314e-06, + "loss": 0.1146, + "step": 17245 + }, + { + "epoch": 2.435187800056481, + "grad_norm": 3.0850634330095303, + "learning_rate": 1.802266485029347e-06, + "loss": 0.1238, + "step": 17246 + }, + { + "epoch": 2.435329003106467, + "grad_norm": 2.738934147477456, + "learning_rate": 1.8013935442314502e-06, + "loss": 0.1182, + "step": 17247 + }, + { + "epoch": 2.435470206156453, + "grad_norm": 2.9747631240238106, + "learning_rate": 1.8005207939656255e-06, + "loss": 0.1516, + "step": 17248 + }, + { + "epoch": 2.4356114092064387, + "grad_norm": 3.236784471974631, + "learning_rate": 1.799648234252157e-06, + "loss": 0.15, + "step": 17249 + }, + { + "epoch": 2.4357526122564246, + "grad_norm": 3.5388537161929357, + "learning_rate": 1.7987758651113218e-06, + "loss": 0.208, + "step": 17250 + }, + { + "epoch": 2.4358938153064105, + "grad_norm": 3.1778392936440896, + "learning_rate": 1.7979036865633949e-06, + "loss": 0.1536, + "step": 17251 + }, + { + "epoch": 2.4360350183563964, + "grad_norm": 3.280262454234508, + "learning_rate": 1.797031698628643e-06, + "loss": 0.1665, + "step": 17252 + }, + { + "epoch": 2.4361762214063822, + "grad_norm": 2.9633046066969797, + "learning_rate": 1.7961599013273312e-06, + "loss": 0.1669, + "step": 17253 + }, + { + "epoch": 2.436317424456368, + "grad_norm": 3.376633298419689, + "learning_rate": 1.7952882946797212e-06, + "loss": 0.1579, + "step": 17254 + }, + { + "epoch": 2.436458627506354, + "grad_norm": 3.540608445739577, + "learning_rate": 1.7944168787060678e-06, + "loss": 0.1841, + "step": 17255 + }, + { + "epoch": 2.43659983055634, + "grad_norm": 3.6602989971620845, + "learning_rate": 1.7935456534266227e-06, + "loss": 0.1215, + "step": 17256 + }, + { + "epoch": 2.4367410336063258, + "grad_norm": 2.9951540927230482, + "learning_rate": 1.7926746188616295e-06, + "loss": 0.1636, + "step": 17257 + }, + { + "epoch": 2.4368822366563117, + "grad_norm": 3.20566298717064, + "learning_rate": 1.7918037750313366e-06, + "loss": 0.1127, + "step": 17258 + }, + { + "epoch": 2.4370234397062975, + "grad_norm": 4.312635640100855, + "learning_rate": 1.7909331219559788e-06, + "loss": 0.1909, + "step": 17259 + }, + { + "epoch": 2.4371646427562834, + "grad_norm": 3.4937181498339553, + "learning_rate": 1.7900626596557924e-06, + "loss": 0.1574, + "step": 17260 + }, + { + "epoch": 2.4373058458062693, + "grad_norm": 3.594099875363766, + "learning_rate": 1.7891923881510021e-06, + "loss": 0.141, + "step": 17261 + }, + { + "epoch": 2.437447048856255, + "grad_norm": 3.476415690439647, + "learning_rate": 1.7883223074618316e-06, + "loss": 0.1913, + "step": 17262 + }, + { + "epoch": 2.437588251906241, + "grad_norm": 2.8753806750818494, + "learning_rate": 1.7874524176085073e-06, + "loss": 0.1406, + "step": 17263 + }, + { + "epoch": 2.437729454956227, + "grad_norm": 3.669772299737988, + "learning_rate": 1.7865827186112429e-06, + "loss": 0.1502, + "step": 17264 + }, + { + "epoch": 2.437870658006213, + "grad_norm": 3.283253555907916, + "learning_rate": 1.7857132104902474e-06, + "loss": 0.1391, + "step": 17265 + }, + { + "epoch": 2.4380118610561987, + "grad_norm": 3.789568305936943, + "learning_rate": 1.7848438932657309e-06, + "loss": 0.1876, + "step": 17266 + }, + { + "epoch": 2.4381530641061846, + "grad_norm": 3.2811100720823476, + "learning_rate": 1.7839747669578932e-06, + "loss": 0.1555, + "step": 17267 + }, + { + "epoch": 2.4382942671561705, + "grad_norm": 2.9257847495670366, + "learning_rate": 1.7831058315869343e-06, + "loss": 0.1026, + "step": 17268 + }, + { + "epoch": 2.4384354702061564, + "grad_norm": 3.1288573534989403, + "learning_rate": 1.7822370871730465e-06, + "loss": 0.1329, + "step": 17269 + }, + { + "epoch": 2.4385766732561422, + "grad_norm": 2.6235434089535663, + "learning_rate": 1.7813685337364205e-06, + "loss": 0.1124, + "step": 17270 + }, + { + "epoch": 2.438717876306128, + "grad_norm": 2.8432602347551716, + "learning_rate": 1.7805001712972415e-06, + "loss": 0.1645, + "step": 17271 + }, + { + "epoch": 2.438859079356114, + "grad_norm": 3.3519059676668106, + "learning_rate": 1.7796319998756872e-06, + "loss": 0.1204, + "step": 17272 + }, + { + "epoch": 2.4390002824061, + "grad_norm": 2.780088035758387, + "learning_rate": 1.7787640194919354e-06, + "loss": 0.1148, + "step": 17273 + }, + { + "epoch": 2.439141485456086, + "grad_norm": 3.0285458365226745, + "learning_rate": 1.777896230166155e-06, + "loss": 0.1228, + "step": 17274 + }, + { + "epoch": 2.4392826885060717, + "grad_norm": 2.8521131343331687, + "learning_rate": 1.7770286319185182e-06, + "loss": 0.1074, + "step": 17275 + }, + { + "epoch": 2.4394238915560575, + "grad_norm": 3.1365560098363408, + "learning_rate": 1.776161224769185e-06, + "loss": 0.1524, + "step": 17276 + }, + { + "epoch": 2.4395650946060434, + "grad_norm": 3.0548683092830284, + "learning_rate": 1.7752940087383153e-06, + "loss": 0.141, + "step": 17277 + }, + { + "epoch": 2.4397062976560293, + "grad_norm": 3.0023053706765404, + "learning_rate": 1.774426983846058e-06, + "loss": 0.1253, + "step": 17278 + }, + { + "epoch": 2.439847500706015, + "grad_norm": 2.870947198062366, + "learning_rate": 1.7735601501125632e-06, + "loss": 0.1358, + "step": 17279 + }, + { + "epoch": 2.439988703756001, + "grad_norm": 3.3658243695686645, + "learning_rate": 1.7726935075579798e-06, + "loss": 0.1525, + "step": 17280 + }, + { + "epoch": 2.440129906805987, + "grad_norm": 3.2900338703543555, + "learning_rate": 1.7718270562024464e-06, + "loss": 0.1654, + "step": 17281 + }, + { + "epoch": 2.440271109855973, + "grad_norm": 2.98088213777779, + "learning_rate": 1.770960796066099e-06, + "loss": 0.1282, + "step": 17282 + }, + { + "epoch": 2.4404123129059587, + "grad_norm": 3.2657632121822813, + "learning_rate": 1.7700947271690693e-06, + "loss": 0.1813, + "step": 17283 + }, + { + "epoch": 2.4405535159559446, + "grad_norm": 3.6475264514872676, + "learning_rate": 1.7692288495314836e-06, + "loss": 0.1353, + "step": 17284 + }, + { + "epoch": 2.4406947190059305, + "grad_norm": 3.0980042599562228, + "learning_rate": 1.7683631631734643e-06, + "loss": 0.1562, + "step": 17285 + }, + { + "epoch": 2.4408359220559164, + "grad_norm": 2.5788459697367854, + "learning_rate": 1.7674976681151302e-06, + "loss": 0.1092, + "step": 17286 + }, + { + "epoch": 2.4409771251059023, + "grad_norm": 3.2534962048122256, + "learning_rate": 1.7666323643765947e-06, + "loss": 0.1337, + "step": 17287 + }, + { + "epoch": 2.441118328155888, + "grad_norm": 2.975086146586491, + "learning_rate": 1.765767251977969e-06, + "loss": 0.1562, + "step": 17288 + }, + { + "epoch": 2.441259531205874, + "grad_norm": 3.4665949999230183, + "learning_rate": 1.7649023309393543e-06, + "loss": 0.1439, + "step": 17289 + }, + { + "epoch": 2.44140073425586, + "grad_norm": 3.0987555210675866, + "learning_rate": 1.7640376012808536e-06, + "loss": 0.1311, + "step": 17290 + }, + { + "epoch": 2.441541937305846, + "grad_norm": 3.3529874140203204, + "learning_rate": 1.7631730630225609e-06, + "loss": 0.1617, + "step": 17291 + }, + { + "epoch": 2.4416831403558317, + "grad_norm": 2.231268159980494, + "learning_rate": 1.7623087161845699e-06, + "loss": 0.0914, + "step": 17292 + }, + { + "epoch": 2.4418243434058176, + "grad_norm": 3.708674345259756, + "learning_rate": 1.7614445607869667e-06, + "loss": 0.1833, + "step": 17293 + }, + { + "epoch": 2.4419655464558034, + "grad_norm": 3.7619961130611914, + "learning_rate": 1.760580596849838e-06, + "loss": 0.2044, + "step": 17294 + }, + { + "epoch": 2.4421067495057893, + "grad_norm": 2.81402149234809, + "learning_rate": 1.7597168243932538e-06, + "loss": 0.1158, + "step": 17295 + }, + { + "epoch": 2.442247952555775, + "grad_norm": 2.6094826557838826, + "learning_rate": 1.758853243437293e-06, + "loss": 0.1044, + "step": 17296 + }, + { + "epoch": 2.442389155605761, + "grad_norm": 3.5392231981004905, + "learning_rate": 1.7579898540020202e-06, + "loss": 0.174, + "step": 17297 + }, + { + "epoch": 2.442530358655747, + "grad_norm": 3.281645220392604, + "learning_rate": 1.7571266561075073e-06, + "loss": 0.1522, + "step": 17298 + }, + { + "epoch": 2.442671561705733, + "grad_norm": 3.5545665440754473, + "learning_rate": 1.7562636497738105e-06, + "loss": 0.1578, + "step": 17299 + }, + { + "epoch": 2.4428127647557187, + "grad_norm": 3.6124390028532765, + "learning_rate": 1.7554008350209862e-06, + "loss": 0.1782, + "step": 17300 + }, + { + "epoch": 2.4429539678057046, + "grad_norm": 3.376967388581666, + "learning_rate": 1.7545382118690868e-06, + "loss": 0.1737, + "step": 17301 + }, + { + "epoch": 2.4430951708556905, + "grad_norm": 2.396380928461803, + "learning_rate": 1.7536757803381576e-06, + "loss": 0.1269, + "step": 17302 + }, + { + "epoch": 2.4432363739056764, + "grad_norm": 4.015094003367481, + "learning_rate": 1.7528135404482415e-06, + "loss": 0.1909, + "step": 17303 + }, + { + "epoch": 2.4433775769556623, + "grad_norm": 3.7380352650518804, + "learning_rate": 1.751951492219378e-06, + "loss": 0.1812, + "step": 17304 + }, + { + "epoch": 2.443518780005648, + "grad_norm": 3.3925321512326114, + "learning_rate": 1.751089635671599e-06, + "loss": 0.1324, + "step": 17305 + }, + { + "epoch": 2.443659983055634, + "grad_norm": 3.499209158994362, + "learning_rate": 1.750227970824936e-06, + "loss": 0.1801, + "step": 17306 + }, + { + "epoch": 2.44380118610562, + "grad_norm": 2.6418144982835408, + "learning_rate": 1.7493664976994106e-06, + "loss": 0.1011, + "step": 17307 + }, + { + "epoch": 2.443942389155606, + "grad_norm": 3.986539252847985, + "learning_rate": 1.7485052163150452e-06, + "loss": 0.1941, + "step": 17308 + }, + { + "epoch": 2.4440835922055917, + "grad_norm": 3.414155276357219, + "learning_rate": 1.747644126691852e-06, + "loss": 0.1503, + "step": 17309 + }, + { + "epoch": 2.4442247952555776, + "grad_norm": 3.3628508066069673, + "learning_rate": 1.746783228849851e-06, + "loss": 0.1258, + "step": 17310 + }, + { + "epoch": 2.4443659983055634, + "grad_norm": 2.8227059519542173, + "learning_rate": 1.7459225228090404e-06, + "loss": 0.1205, + "step": 17311 + }, + { + "epoch": 2.4445072013555493, + "grad_norm": 2.559181681041187, + "learning_rate": 1.7450620085894255e-06, + "loss": 0.1121, + "step": 17312 + }, + { + "epoch": 2.444648404405535, + "grad_norm": 3.1930315449931634, + "learning_rate": 1.7442016862110056e-06, + "loss": 0.1445, + "step": 17313 + }, + { + "epoch": 2.444789607455521, + "grad_norm": 2.9633648016624665, + "learning_rate": 1.7433415556937693e-06, + "loss": 0.1164, + "step": 17314 + }, + { + "epoch": 2.444930810505507, + "grad_norm": 3.062864372879908, + "learning_rate": 1.742481617057713e-06, + "loss": 0.1084, + "step": 17315 + }, + { + "epoch": 2.445072013555493, + "grad_norm": 3.0900978306649876, + "learning_rate": 1.741621870322817e-06, + "loss": 0.1306, + "step": 17316 + }, + { + "epoch": 2.4452132166054787, + "grad_norm": 3.743581038086917, + "learning_rate": 1.7407623155090635e-06, + "loss": 0.187, + "step": 17317 + }, + { + "epoch": 2.4453544196554646, + "grad_norm": 3.3986200457026, + "learning_rate": 1.7399029526364254e-06, + "loss": 0.1624, + "step": 17318 + }, + { + "epoch": 2.4454956227054505, + "grad_norm": 3.4400475745889585, + "learning_rate": 1.7390437817248763e-06, + "loss": 0.1446, + "step": 17319 + }, + { + "epoch": 2.4456368257554364, + "grad_norm": 3.7858254244640084, + "learning_rate": 1.7381848027943815e-06, + "loss": 0.1852, + "step": 17320 + }, + { + "epoch": 2.4457780288054223, + "grad_norm": 2.746608018793526, + "learning_rate": 1.7373260158649042e-06, + "loss": 0.1463, + "step": 17321 + }, + { + "epoch": 2.445919231855408, + "grad_norm": 4.19525552710215, + "learning_rate": 1.7364674209564025e-06, + "loss": 0.1893, + "step": 17322 + }, + { + "epoch": 2.446060434905394, + "grad_norm": 3.0730638725818897, + "learning_rate": 1.7356090180888286e-06, + "loss": 0.1006, + "step": 17323 + }, + { + "epoch": 2.44620163795538, + "grad_norm": 3.5254080261844942, + "learning_rate": 1.7347508072821317e-06, + "loss": 0.133, + "step": 17324 + }, + { + "epoch": 2.446342841005366, + "grad_norm": 3.7564256367011954, + "learning_rate": 1.7338927885562573e-06, + "loss": 0.1798, + "step": 17325 + }, + { + "epoch": 2.4464840440553517, + "grad_norm": 3.541625572551512, + "learning_rate": 1.7330349619311415e-06, + "loss": 0.136, + "step": 17326 + }, + { + "epoch": 2.4466252471053376, + "grad_norm": 4.373165131446355, + "learning_rate": 1.7321773274267284e-06, + "loss": 0.178, + "step": 17327 + }, + { + "epoch": 2.4467664501553235, + "grad_norm": 3.4552968595193008, + "learning_rate": 1.731319885062941e-06, + "loss": 0.1643, + "step": 17328 + }, + { + "epoch": 2.4469076532053093, + "grad_norm": 3.365486453386146, + "learning_rate": 1.7304626348597075e-06, + "loss": 0.169, + "step": 17329 + }, + { + "epoch": 2.4470488562552952, + "grad_norm": 3.837009608908741, + "learning_rate": 1.7296055768369524e-06, + "loss": 0.1628, + "step": 17330 + }, + { + "epoch": 2.447190059305281, + "grad_norm": 2.460743028036989, + "learning_rate": 1.7287487110145896e-06, + "loss": 0.0942, + "step": 17331 + }, + { + "epoch": 2.447331262355267, + "grad_norm": 2.9450621809494755, + "learning_rate": 1.7278920374125362e-06, + "loss": 0.1618, + "step": 17332 + }, + { + "epoch": 2.447472465405253, + "grad_norm": 2.6230917997613643, + "learning_rate": 1.7270355560506991e-06, + "loss": 0.149, + "step": 17333 + }, + { + "epoch": 2.4476136684552388, + "grad_norm": 3.1794081212748866, + "learning_rate": 1.726179266948984e-06, + "loss": 0.1726, + "step": 17334 + }, + { + "epoch": 2.4477548715052246, + "grad_norm": 3.8040983387830605, + "learning_rate": 1.7253231701272887e-06, + "loss": 0.1858, + "step": 17335 + }, + { + "epoch": 2.4478960745552105, + "grad_norm": 2.630983220948977, + "learning_rate": 1.7244672656055105e-06, + "loss": 0.0998, + "step": 17336 + }, + { + "epoch": 2.4480372776051964, + "grad_norm": 3.817498410196405, + "learning_rate": 1.7236115534035381e-06, + "loss": 0.1901, + "step": 17337 + }, + { + "epoch": 2.4481784806551823, + "grad_norm": 2.752501333448148, + "learning_rate": 1.7227560335412597e-06, + "loss": 0.1228, + "step": 17338 + }, + { + "epoch": 2.448319683705168, + "grad_norm": 3.687596708631765, + "learning_rate": 1.721900706038555e-06, + "loss": 0.1759, + "step": 17339 + }, + { + "epoch": 2.448460886755154, + "grad_norm": 2.8532413124648994, + "learning_rate": 1.721045570915304e-06, + "loss": 0.1182, + "step": 17340 + }, + { + "epoch": 2.44860208980514, + "grad_norm": 3.061332123374655, + "learning_rate": 1.7201906281913784e-06, + "loss": 0.1355, + "step": 17341 + }, + { + "epoch": 2.448743292855126, + "grad_norm": 3.0671755286243885, + "learning_rate": 1.7193358778866464e-06, + "loss": 0.1573, + "step": 17342 + }, + { + "epoch": 2.4488844959051117, + "grad_norm": 3.1513989247936163, + "learning_rate": 1.7184813200209704e-06, + "loss": 0.1589, + "step": 17343 + }, + { + "epoch": 2.4490256989550976, + "grad_norm": 3.686118025013961, + "learning_rate": 1.7176269546142166e-06, + "loss": 0.1826, + "step": 17344 + }, + { + "epoch": 2.4491669020050835, + "grad_norm": 3.048702265504464, + "learning_rate": 1.7167727816862333e-06, + "loss": 0.118, + "step": 17345 + }, + { + "epoch": 2.4493081050550694, + "grad_norm": 3.1665239453554648, + "learning_rate": 1.715918801256874e-06, + "loss": 0.1592, + "step": 17346 + }, + { + "epoch": 2.4494493081050552, + "grad_norm": 3.3856624559500195, + "learning_rate": 1.7150650133459835e-06, + "loss": 0.1502, + "step": 17347 + }, + { + "epoch": 2.449590511155041, + "grad_norm": 2.6074767780809345, + "learning_rate": 1.7142114179734004e-06, + "loss": 0.1553, + "step": 17348 + }, + { + "epoch": 2.449731714205027, + "grad_norm": 3.98630562842716, + "learning_rate": 1.7133580151589701e-06, + "loss": 0.2099, + "step": 17349 + }, + { + "epoch": 2.449872917255013, + "grad_norm": 3.8054557540188942, + "learning_rate": 1.712504804922519e-06, + "loss": 0.1779, + "step": 17350 + }, + { + "epoch": 2.4500141203049988, + "grad_norm": 3.3041151327987324, + "learning_rate": 1.7116517872838788e-06, + "loss": 0.1367, + "step": 17351 + }, + { + "epoch": 2.4501553233549846, + "grad_norm": 3.1424163518918946, + "learning_rate": 1.7107989622628706e-06, + "loss": 0.1524, + "step": 17352 + }, + { + "epoch": 2.4502965264049705, + "grad_norm": 3.343362136161125, + "learning_rate": 1.709946329879315e-06, + "loss": 0.1242, + "step": 17353 + }, + { + "epoch": 2.4504377294549564, + "grad_norm": 2.5968900350226694, + "learning_rate": 1.7090938901530264e-06, + "loss": 0.1208, + "step": 17354 + }, + { + "epoch": 2.4505789325049423, + "grad_norm": 3.0575961656830377, + "learning_rate": 1.708241643103815e-06, + "loss": 0.128, + "step": 17355 + }, + { + "epoch": 2.450720135554928, + "grad_norm": 3.2469591216091374, + "learning_rate": 1.7073895887514869e-06, + "loss": 0.1167, + "step": 17356 + }, + { + "epoch": 2.4508613386049136, + "grad_norm": 3.7061651484360527, + "learning_rate": 1.7065377271158434e-06, + "loss": 0.1929, + "step": 17357 + }, + { + "epoch": 2.4510025416548995, + "grad_norm": 3.9234229253414044, + "learning_rate": 1.7056860582166823e-06, + "loss": 0.1812, + "step": 17358 + }, + { + "epoch": 2.4511437447048854, + "grad_norm": 3.036486746602459, + "learning_rate": 1.7048345820737944e-06, + "loss": 0.147, + "step": 17359 + }, + { + "epoch": 2.4512849477548713, + "grad_norm": 3.2581255430568192, + "learning_rate": 1.703983298706966e-06, + "loss": 0.1531, + "step": 17360 + }, + { + "epoch": 2.451426150804857, + "grad_norm": 4.022350787830313, + "learning_rate": 1.703132208135988e-06, + "loss": 0.1855, + "step": 17361 + }, + { + "epoch": 2.451567353854843, + "grad_norm": 2.743292867619651, + "learning_rate": 1.7022813103806324e-06, + "loss": 0.1248, + "step": 17362 + }, + { + "epoch": 2.451708556904829, + "grad_norm": 3.5101056139063993, + "learning_rate": 1.7014306054606744e-06, + "loss": 0.1722, + "step": 17363 + }, + { + "epoch": 2.451849759954815, + "grad_norm": 3.2745120995800137, + "learning_rate": 1.700580093395886e-06, + "loss": 0.1512, + "step": 17364 + }, + { + "epoch": 2.4519909630048007, + "grad_norm": 2.818151591381503, + "learning_rate": 1.6997297742060293e-06, + "loss": 0.1441, + "step": 17365 + }, + { + "epoch": 2.4521321660547866, + "grad_norm": 3.148566655665294, + "learning_rate": 1.6988796479108716e-06, + "loss": 0.1411, + "step": 17366 + }, + { + "epoch": 2.4522733691047724, + "grad_norm": 2.845156259539625, + "learning_rate": 1.698029714530165e-06, + "loss": 0.1481, + "step": 17367 + }, + { + "epoch": 2.4524145721547583, + "grad_norm": 3.4530014071361252, + "learning_rate": 1.6971799740836625e-06, + "loss": 0.2038, + "step": 17368 + }, + { + "epoch": 2.452555775204744, + "grad_norm": 2.4867796448882693, + "learning_rate": 1.6963304265911106e-06, + "loss": 0.1152, + "step": 17369 + }, + { + "epoch": 2.45269697825473, + "grad_norm": 2.844120383077472, + "learning_rate": 1.6954810720722581e-06, + "loss": 0.125, + "step": 17370 + }, + { + "epoch": 2.452838181304716, + "grad_norm": 4.15629160416572, + "learning_rate": 1.6946319105468324e-06, + "loss": 0.1947, + "step": 17371 + }, + { + "epoch": 2.452979384354702, + "grad_norm": 3.0913270672018447, + "learning_rate": 1.6937829420345775e-06, + "loss": 0.1546, + "step": 17372 + }, + { + "epoch": 2.4531205874046877, + "grad_norm": 3.504386294409317, + "learning_rate": 1.6929341665552201e-06, + "loss": 0.1456, + "step": 17373 + }, + { + "epoch": 2.4532617904546736, + "grad_norm": 4.456315651321824, + "learning_rate": 1.6920855841284844e-06, + "loss": 0.214, + "step": 17374 + }, + { + "epoch": 2.4534029935046595, + "grad_norm": 3.1676527952522533, + "learning_rate": 1.6912371947740924e-06, + "loss": 0.138, + "step": 17375 + }, + { + "epoch": 2.4535441965546454, + "grad_norm": 3.1685429172347663, + "learning_rate": 1.6903889985117594e-06, + "loss": 0.1414, + "step": 17376 + }, + { + "epoch": 2.4536853996046313, + "grad_norm": 3.142995656681546, + "learning_rate": 1.6895409953611952e-06, + "loss": 0.1471, + "step": 17377 + }, + { + "epoch": 2.453826602654617, + "grad_norm": 3.2184639539462343, + "learning_rate": 1.688693185342114e-06, + "loss": 0.1742, + "step": 17378 + }, + { + "epoch": 2.453967805704603, + "grad_norm": 2.8140775923100465, + "learning_rate": 1.6878455684742113e-06, + "loss": 0.1138, + "step": 17379 + }, + { + "epoch": 2.454109008754589, + "grad_norm": 2.665841601174925, + "learning_rate": 1.6869981447771876e-06, + "loss": 0.1276, + "step": 17380 + }, + { + "epoch": 2.454250211804575, + "grad_norm": 3.064872193522012, + "learning_rate": 1.6861509142707376e-06, + "loss": 0.1534, + "step": 17381 + }, + { + "epoch": 2.4543914148545607, + "grad_norm": 3.199340274873859, + "learning_rate": 1.6853038769745466e-06, + "loss": 0.1438, + "step": 17382 + }, + { + "epoch": 2.4545326179045466, + "grad_norm": 2.8324053774688305, + "learning_rate": 1.6844570329083066e-06, + "loss": 0.1394, + "step": 17383 + }, + { + "epoch": 2.4546738209545325, + "grad_norm": 2.8735502244207094, + "learning_rate": 1.6836103820916926e-06, + "loss": 0.1288, + "step": 17384 + }, + { + "epoch": 2.4548150240045183, + "grad_norm": 5.835297541494395, + "learning_rate": 1.6827639245443817e-06, + "loss": 0.1403, + "step": 17385 + }, + { + "epoch": 2.4549562270545042, + "grad_norm": 3.1340875889961657, + "learning_rate": 1.6819176602860454e-06, + "loss": 0.1627, + "step": 17386 + }, + { + "epoch": 2.45509743010449, + "grad_norm": 2.6592661536903, + "learning_rate": 1.6810715893363539e-06, + "loss": 0.1094, + "step": 17387 + }, + { + "epoch": 2.455238633154476, + "grad_norm": 3.453254714824574, + "learning_rate": 1.68022571171496e-06, + "loss": 0.1702, + "step": 17388 + }, + { + "epoch": 2.455379836204462, + "grad_norm": 3.482589259047617, + "learning_rate": 1.6793800274415295e-06, + "loss": 0.199, + "step": 17389 + }, + { + "epoch": 2.4555210392544478, + "grad_norm": 3.234346344020309, + "learning_rate": 1.6785345365357153e-06, + "loss": 0.1439, + "step": 17390 + }, + { + "epoch": 2.4556622423044336, + "grad_norm": 2.721238465395554, + "learning_rate": 1.677689239017164e-06, + "loss": 0.1068, + "step": 17391 + }, + { + "epoch": 2.4558034453544195, + "grad_norm": 3.3437502318949375, + "learning_rate": 1.6768441349055197e-06, + "loss": 0.1422, + "step": 17392 + }, + { + "epoch": 2.4559446484044054, + "grad_norm": 3.273245391922852, + "learning_rate": 1.6759992242204247e-06, + "loss": 0.135, + "step": 17393 + }, + { + "epoch": 2.4560858514543913, + "grad_norm": 3.3621167729868, + "learning_rate": 1.675154506981509e-06, + "loss": 0.1809, + "step": 17394 + }, + { + "epoch": 2.456227054504377, + "grad_norm": 2.740774260966187, + "learning_rate": 1.674309983208413e-06, + "loss": 0.1205, + "step": 17395 + }, + { + "epoch": 2.456368257554363, + "grad_norm": 2.798120709908191, + "learning_rate": 1.673465652920755e-06, + "loss": 0.135, + "step": 17396 + }, + { + "epoch": 2.456509460604349, + "grad_norm": 2.7445460921659293, + "learning_rate": 1.6726215161381598e-06, + "loss": 0.1076, + "step": 17397 + }, + { + "epoch": 2.456650663654335, + "grad_norm": 3.2006709477414477, + "learning_rate": 1.6717775728802432e-06, + "loss": 0.148, + "step": 17398 + }, + { + "epoch": 2.4567918667043207, + "grad_norm": 4.764041781983336, + "learning_rate": 1.6709338231666194e-06, + "loss": 0.1354, + "step": 17399 + }, + { + "epoch": 2.4569330697543066, + "grad_norm": 3.523281515072695, + "learning_rate": 1.670090267016895e-06, + "loss": 0.1293, + "step": 17400 + }, + { + "epoch": 2.4570742728042925, + "grad_norm": 3.4260206525199344, + "learning_rate": 1.6692469044506765e-06, + "loss": 0.2088, + "step": 17401 + }, + { + "epoch": 2.4572154758542784, + "grad_norm": 2.927362873414556, + "learning_rate": 1.6684037354875626e-06, + "loss": 0.1731, + "step": 17402 + }, + { + "epoch": 2.4573566789042642, + "grad_norm": 3.4462600511058668, + "learning_rate": 1.6675607601471477e-06, + "loss": 0.1698, + "step": 17403 + }, + { + "epoch": 2.45749788195425, + "grad_norm": 3.5040878839990452, + "learning_rate": 1.6667179784490251e-06, + "loss": 0.1615, + "step": 17404 + }, + { + "epoch": 2.457639085004236, + "grad_norm": 2.8401030768069924, + "learning_rate": 1.6658753904127734e-06, + "loss": 0.1334, + "step": 17405 + }, + { + "epoch": 2.457780288054222, + "grad_norm": 2.412833641588675, + "learning_rate": 1.6650329960579792e-06, + "loss": 0.1113, + "step": 17406 + }, + { + "epoch": 2.4579214911042078, + "grad_norm": 2.5075717106421602, + "learning_rate": 1.6641907954042203e-06, + "loss": 0.1132, + "step": 17407 + }, + { + "epoch": 2.4580626941541937, + "grad_norm": 2.8115048756542302, + "learning_rate": 1.6633487884710663e-06, + "loss": 0.1251, + "step": 17408 + }, + { + "epoch": 2.4582038972041795, + "grad_norm": 3.6483814761494178, + "learning_rate": 1.6625069752780864e-06, + "loss": 0.1161, + "step": 17409 + }, + { + "epoch": 2.4583451002541654, + "grad_norm": 2.50306666117014, + "learning_rate": 1.6616653558448437e-06, + "loss": 0.1275, + "step": 17410 + }, + { + "epoch": 2.4584863033041513, + "grad_norm": 2.753911871622546, + "learning_rate": 1.660823930190897e-06, + "loss": 0.1371, + "step": 17411 + }, + { + "epoch": 2.458627506354137, + "grad_norm": 3.627864550231711, + "learning_rate": 1.6599826983358002e-06, + "loss": 0.1569, + "step": 17412 + }, + { + "epoch": 2.458768709404123, + "grad_norm": 2.8687432752209454, + "learning_rate": 1.659141660299105e-06, + "loss": 0.1281, + "step": 17413 + }, + { + "epoch": 2.458909912454109, + "grad_norm": 3.1283613024557155, + "learning_rate": 1.6583008161003544e-06, + "loss": 0.1517, + "step": 17414 + }, + { + "epoch": 2.459051115504095, + "grad_norm": 3.597307663349181, + "learning_rate": 1.6574601657590904e-06, + "loss": 0.1568, + "step": 17415 + }, + { + "epoch": 2.4591923185540807, + "grad_norm": 3.6533673444485966, + "learning_rate": 1.65661970929485e-06, + "loss": 0.1682, + "step": 17416 + }, + { + "epoch": 2.4593335216040666, + "grad_norm": 2.758510221765672, + "learning_rate": 1.6557794467271616e-06, + "loss": 0.1207, + "step": 17417 + }, + { + "epoch": 2.4594747246540525, + "grad_norm": 3.6437207311166606, + "learning_rate": 1.6549393780755574e-06, + "loss": 0.1728, + "step": 17418 + }, + { + "epoch": 2.4596159277040384, + "grad_norm": 4.259301713743304, + "learning_rate": 1.6540995033595587e-06, + "loss": 0.218, + "step": 17419 + }, + { + "epoch": 2.4597571307540242, + "grad_norm": 2.83964089028987, + "learning_rate": 1.653259822598683e-06, + "loss": 0.1267, + "step": 17420 + }, + { + "epoch": 2.45989833380401, + "grad_norm": 3.024499319090894, + "learning_rate": 1.652420335812447e-06, + "loss": 0.1561, + "step": 17421 + }, + { + "epoch": 2.460039536853996, + "grad_norm": 4.690077713236752, + "learning_rate": 1.6515810430203516e-06, + "loss": 0.2362, + "step": 17422 + }, + { + "epoch": 2.460180739903982, + "grad_norm": 2.699324303351017, + "learning_rate": 1.6507419442419103e-06, + "loss": 0.1045, + "step": 17423 + }, + { + "epoch": 2.4603219429539678, + "grad_norm": 3.2266939451434853, + "learning_rate": 1.649903039496621e-06, + "loss": 0.132, + "step": 17424 + }, + { + "epoch": 2.4604631460039537, + "grad_norm": 2.48723441117315, + "learning_rate": 1.6490643288039776e-06, + "loss": 0.1094, + "step": 17425 + }, + { + "epoch": 2.4606043490539395, + "grad_norm": 3.436449014421852, + "learning_rate": 1.6482258121834737e-06, + "loss": 0.1971, + "step": 17426 + }, + { + "epoch": 2.4607455521039254, + "grad_norm": 2.2810347251519265, + "learning_rate": 1.647387489654595e-06, + "loss": 0.1235, + "step": 17427 + }, + { + "epoch": 2.4608867551539113, + "grad_norm": 2.706304753585215, + "learning_rate": 1.6465493612368233e-06, + "loss": 0.1629, + "step": 17428 + }, + { + "epoch": 2.461027958203897, + "grad_norm": 4.066951427566559, + "learning_rate": 1.6457114269496378e-06, + "loss": 0.1985, + "step": 17429 + }, + { + "epoch": 2.461169161253883, + "grad_norm": 2.997228562186128, + "learning_rate": 1.6448736868125093e-06, + "loss": 0.1414, + "step": 17430 + }, + { + "epoch": 2.461310364303869, + "grad_norm": 2.815938641093809, + "learning_rate": 1.644036140844909e-06, + "loss": 0.1428, + "step": 17431 + }, + { + "epoch": 2.461451567353855, + "grad_norm": 2.4876020556754788, + "learning_rate": 1.6431987890663004e-06, + "loss": 0.1079, + "step": 17432 + }, + { + "epoch": 2.4615927704038407, + "grad_norm": 3.0003607973990327, + "learning_rate": 1.6423616314961421e-06, + "loss": 0.157, + "step": 17433 + }, + { + "epoch": 2.4617339734538266, + "grad_norm": 3.078283265163677, + "learning_rate": 1.6415246681538887e-06, + "loss": 0.1465, + "step": 17434 + }, + { + "epoch": 2.4618751765038125, + "grad_norm": 3.60747833432939, + "learning_rate": 1.6406878990589936e-06, + "loss": 0.1763, + "step": 17435 + }, + { + "epoch": 2.4620163795537984, + "grad_norm": 2.846443610800276, + "learning_rate": 1.6398513242309022e-06, + "loss": 0.1464, + "step": 17436 + }, + { + "epoch": 2.4621575826037843, + "grad_norm": 3.5025215706468185, + "learning_rate": 1.6390149436890556e-06, + "loss": 0.1474, + "step": 17437 + }, + { + "epoch": 2.46229878565377, + "grad_norm": 2.847339648816644, + "learning_rate": 1.638178757452894e-06, + "loss": 0.1251, + "step": 17438 + }, + { + "epoch": 2.462439988703756, + "grad_norm": 3.620187607031835, + "learning_rate": 1.6373427655418406e-06, + "loss": 0.2069, + "step": 17439 + }, + { + "epoch": 2.462581191753742, + "grad_norm": 2.309314331741859, + "learning_rate": 1.6365069679753331e-06, + "loss": 0.1044, + "step": 17440 + }, + { + "epoch": 2.462722394803728, + "grad_norm": 3.9836255030289163, + "learning_rate": 1.6356713647727917e-06, + "loss": 0.133, + "step": 17441 + }, + { + "epoch": 2.4628635978537137, + "grad_norm": 3.465757345704433, + "learning_rate": 1.6348359559536353e-06, + "loss": 0.1477, + "step": 17442 + }, + { + "epoch": 2.4630048009036996, + "grad_norm": 2.8996930796735496, + "learning_rate": 1.6340007415372783e-06, + "loss": 0.1587, + "step": 17443 + }, + { + "epoch": 2.4631460039536854, + "grad_norm": 3.0104116705945203, + "learning_rate": 1.6331657215431319e-06, + "loss": 0.1524, + "step": 17444 + }, + { + "epoch": 2.4632872070036713, + "grad_norm": 2.8736356532574887, + "learning_rate": 1.6323308959906003e-06, + "loss": 0.1642, + "step": 17445 + }, + { + "epoch": 2.463428410053657, + "grad_norm": 3.4576007428280096, + "learning_rate": 1.631496264899085e-06, + "loss": 0.1835, + "step": 17446 + }, + { + "epoch": 2.463569613103643, + "grad_norm": 3.4990671082834006, + "learning_rate": 1.6306618282879816e-06, + "loss": 0.1589, + "step": 17447 + }, + { + "epoch": 2.463710816153629, + "grad_norm": 2.968471656972759, + "learning_rate": 1.6298275861766843e-06, + "loss": 0.1614, + "step": 17448 + }, + { + "epoch": 2.463852019203615, + "grad_norm": 4.266959644294783, + "learning_rate": 1.628993538584578e-06, + "loss": 0.209, + "step": 17449 + }, + { + "epoch": 2.4639932222536007, + "grad_norm": 4.064974056591121, + "learning_rate": 1.6281596855310478e-06, + "loss": 0.1868, + "step": 17450 + }, + { + "epoch": 2.4641344253035866, + "grad_norm": 3.3614911273216883, + "learning_rate": 1.6273260270354673e-06, + "loss": 0.1439, + "step": 17451 + }, + { + "epoch": 2.4642756283535725, + "grad_norm": 3.1364421141816767, + "learning_rate": 1.626492563117217e-06, + "loss": 0.1389, + "step": 17452 + }, + { + "epoch": 2.4644168314035584, + "grad_norm": 3.8346860564277856, + "learning_rate": 1.625659293795664e-06, + "loss": 0.1664, + "step": 17453 + }, + { + "epoch": 2.4645580344535443, + "grad_norm": 3.020833602504063, + "learning_rate": 1.624826219090172e-06, + "loss": 0.1274, + "step": 17454 + }, + { + "epoch": 2.46469923750353, + "grad_norm": 3.9153079557253463, + "learning_rate": 1.6239933390201034e-06, + "loss": 0.2021, + "step": 17455 + }, + { + "epoch": 2.464840440553516, + "grad_norm": 4.13314547113519, + "learning_rate": 1.6231606536048083e-06, + "loss": 0.1687, + "step": 17456 + }, + { + "epoch": 2.464981643603502, + "grad_norm": 3.4186927060255474, + "learning_rate": 1.6223281628636433e-06, + "loss": 0.1703, + "step": 17457 + }, + { + "epoch": 2.465122846653488, + "grad_norm": 3.9303225526105448, + "learning_rate": 1.6214958668159552e-06, + "loss": 0.1692, + "step": 17458 + }, + { + "epoch": 2.4652640497034737, + "grad_norm": 3.0690573155983336, + "learning_rate": 1.6206637654810842e-06, + "loss": 0.1348, + "step": 17459 + }, + { + "epoch": 2.4654052527534596, + "grad_norm": 3.4103732079610047, + "learning_rate": 1.619831858878368e-06, + "loss": 0.1613, + "step": 17460 + }, + { + "epoch": 2.4655464558034454, + "grad_norm": 2.782586447504867, + "learning_rate": 1.6190001470271399e-06, + "loss": 0.1281, + "step": 17461 + }, + { + "epoch": 2.4656876588534313, + "grad_norm": 3.9539585873468046, + "learning_rate": 1.6181686299467303e-06, + "loss": 0.1439, + "step": 17462 + }, + { + "epoch": 2.465828861903417, + "grad_norm": 3.247244987904383, + "learning_rate": 1.6173373076564614e-06, + "loss": 0.1172, + "step": 17463 + }, + { + "epoch": 2.465970064953403, + "grad_norm": 2.9650938131173428, + "learning_rate": 1.6165061801756531e-06, + "loss": 0.1285, + "step": 17464 + }, + { + "epoch": 2.466111268003389, + "grad_norm": 2.580495917000253, + "learning_rate": 1.6156752475236205e-06, + "loss": 0.0995, + "step": 17465 + }, + { + "epoch": 2.466252471053375, + "grad_norm": 3.6374492339703175, + "learning_rate": 1.614844509719674e-06, + "loss": 0.21, + "step": 17466 + }, + { + "epoch": 2.4663936741033607, + "grad_norm": 2.839127941007601, + "learning_rate": 1.61401396678312e-06, + "loss": 0.1518, + "step": 17467 + }, + { + "epoch": 2.4665348771533466, + "grad_norm": 2.7396576525735816, + "learning_rate": 1.6131836187332584e-06, + "loss": 0.1582, + "step": 17468 + }, + { + "epoch": 2.4666760802033325, + "grad_norm": 3.8177171795370146, + "learning_rate": 1.6123534655893891e-06, + "loss": 0.2111, + "step": 17469 + }, + { + "epoch": 2.4668172832533184, + "grad_norm": 3.115372881842822, + "learning_rate": 1.6115235073708024e-06, + "loss": 0.1188, + "step": 17470 + }, + { + "epoch": 2.4669584863033043, + "grad_norm": 8.939925924167861, + "learning_rate": 1.6106937440967897e-06, + "loss": 0.168, + "step": 17471 + }, + { + "epoch": 2.46709968935329, + "grad_norm": 2.4191097076930053, + "learning_rate": 1.6098641757866285e-06, + "loss": 0.1234, + "step": 17472 + }, + { + "epoch": 2.467240892403276, + "grad_norm": 3.459741475404155, + "learning_rate": 1.6090348024596014e-06, + "loss": 0.1623, + "step": 17473 + }, + { + "epoch": 2.467382095453262, + "grad_norm": 3.732640680980925, + "learning_rate": 1.6082056241349787e-06, + "loss": 0.1334, + "step": 17474 + }, + { + "epoch": 2.467523298503248, + "grad_norm": 3.486616431068132, + "learning_rate": 1.6073766408320356e-06, + "loss": 0.1755, + "step": 17475 + }, + { + "epoch": 2.4676645015532337, + "grad_norm": 3.3786364555315616, + "learning_rate": 1.606547852570034e-06, + "loss": 0.1769, + "step": 17476 + }, + { + "epoch": 2.4678057046032196, + "grad_norm": 3.222997775248464, + "learning_rate": 1.605719259368237e-06, + "loss": 0.1162, + "step": 17477 + }, + { + "epoch": 2.4679469076532055, + "grad_norm": 3.409950238763562, + "learning_rate": 1.604890861245898e-06, + "loss": 0.1606, + "step": 17478 + }, + { + "epoch": 2.4680881107031913, + "grad_norm": 3.1805890242857164, + "learning_rate": 1.6040626582222706e-06, + "loss": 0.1673, + "step": 17479 + }, + { + "epoch": 2.4682293137531772, + "grad_norm": 2.785899036729417, + "learning_rate": 1.6032346503166007e-06, + "loss": 0.099, + "step": 17480 + }, + { + "epoch": 2.468370516803163, + "grad_norm": 2.7751807296388615, + "learning_rate": 1.6024068375481316e-06, + "loss": 0.1507, + "step": 17481 + }, + { + "epoch": 2.468511719853149, + "grad_norm": 3.1638597997118376, + "learning_rate": 1.6015792199361003e-06, + "loss": 0.1095, + "step": 17482 + }, + { + "epoch": 2.468652922903135, + "grad_norm": 3.4051973491271985, + "learning_rate": 1.6007517974997411e-06, + "loss": 0.2028, + "step": 17483 + }, + { + "epoch": 2.4687941259531208, + "grad_norm": 2.9150151152812587, + "learning_rate": 1.5999245702582833e-06, + "loss": 0.1532, + "step": 17484 + }, + { + "epoch": 2.4689353290031066, + "grad_norm": 3.341212384410382, + "learning_rate": 1.5990975382309503e-06, + "loss": 0.1778, + "step": 17485 + }, + { + "epoch": 2.4690765320530925, + "grad_norm": 3.635493155859683, + "learning_rate": 1.5982707014369603e-06, + "loss": 0.2279, + "step": 17486 + }, + { + "epoch": 2.4692177351030784, + "grad_norm": 2.8349561273788506, + "learning_rate": 1.5974440598955332e-06, + "loss": 0.1595, + "step": 17487 + }, + { + "epoch": 2.4693589381530643, + "grad_norm": 3.1000972300431178, + "learning_rate": 1.5966176136258794e-06, + "loss": 0.1504, + "step": 17488 + }, + { + "epoch": 2.46950014120305, + "grad_norm": 3.340010291144055, + "learning_rate": 1.595791362647201e-06, + "loss": 0.123, + "step": 17489 + }, + { + "epoch": 2.469641344253036, + "grad_norm": 4.185348719790143, + "learning_rate": 1.5949653069787018e-06, + "loss": 0.219, + "step": 17490 + }, + { + "epoch": 2.469782547303022, + "grad_norm": 2.869577011066909, + "learning_rate": 1.5941394466395766e-06, + "loss": 0.1534, + "step": 17491 + }, + { + "epoch": 2.469923750353008, + "grad_norm": 2.452904297489696, + "learning_rate": 1.5933137816490229e-06, + "loss": 0.1321, + "step": 17492 + }, + { + "epoch": 2.4700649534029937, + "grad_norm": 2.715586916934936, + "learning_rate": 1.5924883120262258e-06, + "loss": 0.1322, + "step": 17493 + }, + { + "epoch": 2.4702061564529796, + "grad_norm": 3.170999805839384, + "learning_rate": 1.5916630377903696e-06, + "loss": 0.1385, + "step": 17494 + }, + { + "epoch": 2.4703473595029655, + "grad_norm": 2.8094828549000512, + "learning_rate": 1.5908379589606338e-06, + "loss": 0.1431, + "step": 17495 + }, + { + "epoch": 2.4704885625529514, + "grad_norm": 3.062954453590655, + "learning_rate": 1.5900130755561916e-06, + "loss": 0.1447, + "step": 17496 + }, + { + "epoch": 2.4706297656029372, + "grad_norm": 3.3837366998767857, + "learning_rate": 1.5891883875962132e-06, + "loss": 0.1411, + "step": 17497 + }, + { + "epoch": 2.470770968652923, + "grad_norm": 3.2222464112601332, + "learning_rate": 1.588363895099866e-06, + "loss": 0.1517, + "step": 17498 + }, + { + "epoch": 2.470912171702909, + "grad_norm": 2.4772060577729, + "learning_rate": 1.5875395980863073e-06, + "loss": 0.1217, + "step": 17499 + }, + { + "epoch": 2.471053374752895, + "grad_norm": 3.4423324885946114, + "learning_rate": 1.5867154965746956e-06, + "loss": 0.1508, + "step": 17500 + }, + { + "epoch": 2.4711945778028808, + "grad_norm": 3.0374894961655245, + "learning_rate": 1.585891590584183e-06, + "loss": 0.1324, + "step": 17501 + }, + { + "epoch": 2.4713357808528666, + "grad_norm": 3.118968647874877, + "learning_rate": 1.585067880133916e-06, + "loss": 0.1307, + "step": 17502 + }, + { + "epoch": 2.4714769839028525, + "grad_norm": 3.4277920608675974, + "learning_rate": 1.584244365243035e-06, + "loss": 0.1552, + "step": 17503 + }, + { + "epoch": 2.4716181869528384, + "grad_norm": 2.5911953714514393, + "learning_rate": 1.583421045930682e-06, + "loss": 0.1153, + "step": 17504 + }, + { + "epoch": 2.4717593900028243, + "grad_norm": 2.7895566997725467, + "learning_rate": 1.5825979222159925e-06, + "loss": 0.1226, + "step": 17505 + }, + { + "epoch": 2.47190059305281, + "grad_norm": 3.9314562349301037, + "learning_rate": 1.5817749941180893e-06, + "loss": 0.159, + "step": 17506 + }, + { + "epoch": 2.472041796102796, + "grad_norm": 2.8682109870292685, + "learning_rate": 1.5809522616560991e-06, + "loss": 0.1511, + "step": 17507 + }, + { + "epoch": 2.472182999152782, + "grad_norm": 2.4711040950046232, + "learning_rate": 1.580129724849141e-06, + "loss": 0.1005, + "step": 17508 + }, + { + "epoch": 2.472324202202768, + "grad_norm": 3.086285901791688, + "learning_rate": 1.579307383716333e-06, + "loss": 0.1231, + "step": 17509 + }, + { + "epoch": 2.4724654052527533, + "grad_norm": 3.322237216498263, + "learning_rate": 1.5784852382767856e-06, + "loss": 0.1322, + "step": 17510 + }, + { + "epoch": 2.472606608302739, + "grad_norm": 3.068353430211476, + "learning_rate": 1.5776632885496045e-06, + "loss": 0.1342, + "step": 17511 + }, + { + "epoch": 2.472747811352725, + "grad_norm": 3.3780245109484093, + "learning_rate": 1.5768415345538911e-06, + "loss": 0.1665, + "step": 17512 + }, + { + "epoch": 2.472889014402711, + "grad_norm": 3.5982552310028706, + "learning_rate": 1.5760199763087425e-06, + "loss": 0.165, + "step": 17513 + }, + { + "epoch": 2.473030217452697, + "grad_norm": 3.1870990774202945, + "learning_rate": 1.575198613833252e-06, + "loss": 0.1308, + "step": 17514 + }, + { + "epoch": 2.4731714205026827, + "grad_norm": 2.789887818065228, + "learning_rate": 1.5743774471465068e-06, + "loss": 0.128, + "step": 17515 + }, + { + "epoch": 2.4733126235526686, + "grad_norm": 3.0491998200312262, + "learning_rate": 1.5735564762675914e-06, + "loss": 0.1275, + "step": 17516 + }, + { + "epoch": 2.4734538266026544, + "grad_norm": 3.3299892960656403, + "learning_rate": 1.572735701215584e-06, + "loss": 0.147, + "step": 17517 + }, + { + "epoch": 2.4735950296526403, + "grad_norm": 3.4027546741820673, + "learning_rate": 1.5719151220095596e-06, + "loss": 0.1351, + "step": 17518 + }, + { + "epoch": 2.473736232702626, + "grad_norm": 2.9995565937027644, + "learning_rate": 1.5710947386685881e-06, + "loss": 0.1043, + "step": 17519 + }, + { + "epoch": 2.473877435752612, + "grad_norm": 2.9120239310611793, + "learning_rate": 1.5702745512117323e-06, + "loss": 0.131, + "step": 17520 + }, + { + "epoch": 2.474018638802598, + "grad_norm": 3.2943396028712555, + "learning_rate": 1.569454559658058e-06, + "loss": 0.1189, + "step": 17521 + }, + { + "epoch": 2.474159841852584, + "grad_norm": 3.7027942697781993, + "learning_rate": 1.5686347640266208e-06, + "loss": 0.1727, + "step": 17522 + }, + { + "epoch": 2.4743010449025697, + "grad_norm": 3.3277307577766115, + "learning_rate": 1.5678151643364692e-06, + "loss": 0.149, + "step": 17523 + }, + { + "epoch": 2.4744422479525556, + "grad_norm": 2.552802389575025, + "learning_rate": 1.56699576060665e-06, + "loss": 0.1467, + "step": 17524 + }, + { + "epoch": 2.4745834510025415, + "grad_norm": 3.0529331249628413, + "learning_rate": 1.5661765528562057e-06, + "loss": 0.156, + "step": 17525 + }, + { + "epoch": 2.4747246540525274, + "grad_norm": 3.1889106045306086, + "learning_rate": 1.5653575411041788e-06, + "loss": 0.1381, + "step": 17526 + }, + { + "epoch": 2.4748658571025133, + "grad_norm": 3.2774968889926073, + "learning_rate": 1.5645387253695998e-06, + "loss": 0.1592, + "step": 17527 + }, + { + "epoch": 2.475007060152499, + "grad_norm": 2.76625527204261, + "learning_rate": 1.5637201056714967e-06, + "loss": 0.1408, + "step": 17528 + }, + { + "epoch": 2.475148263202485, + "grad_norm": 2.8575636908666704, + "learning_rate": 1.5629016820288966e-06, + "loss": 0.1597, + "step": 17529 + }, + { + "epoch": 2.475289466252471, + "grad_norm": 3.3435659455285394, + "learning_rate": 1.5620834544608166e-06, + "loss": 0.1193, + "step": 17530 + }, + { + "epoch": 2.475430669302457, + "grad_norm": 3.197004504653835, + "learning_rate": 1.5612654229862734e-06, + "loss": 0.1353, + "step": 17531 + }, + { + "epoch": 2.4755718723524427, + "grad_norm": 3.4938399935295466, + "learning_rate": 1.5604475876242775e-06, + "loss": 0.1235, + "step": 17532 + }, + { + "epoch": 2.4757130754024286, + "grad_norm": 2.808277216274006, + "learning_rate": 1.5596299483938348e-06, + "loss": 0.1204, + "step": 17533 + }, + { + "epoch": 2.4758542784524145, + "grad_norm": 3.7501660756823747, + "learning_rate": 1.558812505313947e-06, + "loss": 0.1465, + "step": 17534 + }, + { + "epoch": 2.4759954815024003, + "grad_norm": 3.5110235989917253, + "learning_rate": 1.5579952584036117e-06, + "loss": 0.1502, + "step": 17535 + }, + { + "epoch": 2.4761366845523862, + "grad_norm": 2.6060063753929734, + "learning_rate": 1.5571782076818197e-06, + "loss": 0.1123, + "step": 17536 + }, + { + "epoch": 2.476277887602372, + "grad_norm": 3.9335809926105876, + "learning_rate": 1.556361353167558e-06, + "loss": 0.1805, + "step": 17537 + }, + { + "epoch": 2.476419090652358, + "grad_norm": 3.346129541368894, + "learning_rate": 1.5555446948798147e-06, + "loss": 0.1285, + "step": 17538 + }, + { + "epoch": 2.476560293702344, + "grad_norm": 2.812795225340383, + "learning_rate": 1.5547282328375678e-06, + "loss": 0.1395, + "step": 17539 + }, + { + "epoch": 2.4767014967523298, + "grad_norm": 3.703814615676757, + "learning_rate": 1.553911967059788e-06, + "loss": 0.1733, + "step": 17540 + }, + { + "epoch": 2.4768426998023156, + "grad_norm": 2.586212251506088, + "learning_rate": 1.5530958975654454e-06, + "loss": 0.1323, + "step": 17541 + }, + { + "epoch": 2.4769839028523015, + "grad_norm": 3.5477150890357776, + "learning_rate": 1.5522800243735037e-06, + "loss": 0.1421, + "step": 17542 + }, + { + "epoch": 2.4771251059022874, + "grad_norm": 3.7601379108465065, + "learning_rate": 1.551464347502929e-06, + "loss": 0.1491, + "step": 17543 + }, + { + "epoch": 2.4772663089522733, + "grad_norm": 3.8945412677106686, + "learning_rate": 1.5506488669726738e-06, + "loss": 0.1672, + "step": 17544 + }, + { + "epoch": 2.477407512002259, + "grad_norm": 3.0989518214987744, + "learning_rate": 1.5498335828016909e-06, + "loss": 0.1587, + "step": 17545 + }, + { + "epoch": 2.477548715052245, + "grad_norm": 5.255567504835095, + "learning_rate": 1.549018495008925e-06, + "loss": 0.1956, + "step": 17546 + }, + { + "epoch": 2.477689918102231, + "grad_norm": 5.168274462591074, + "learning_rate": 1.5482036036133197e-06, + "loss": 0.1355, + "step": 17547 + }, + { + "epoch": 2.477831121152217, + "grad_norm": 2.927846773749783, + "learning_rate": 1.5473889086338134e-06, + "loss": 0.115, + "step": 17548 + }, + { + "epoch": 2.4779723242022027, + "grad_norm": 2.7801101330000138, + "learning_rate": 1.5465744100893377e-06, + "loss": 0.0779, + "step": 17549 + }, + { + "epoch": 2.4781135272521886, + "grad_norm": 2.8953702455430776, + "learning_rate": 1.5457601079988226e-06, + "loss": 0.1168, + "step": 17550 + }, + { + "epoch": 2.4782547303021745, + "grad_norm": 3.3148804331646837, + "learning_rate": 1.5449460023811913e-06, + "loss": 0.1664, + "step": 17551 + }, + { + "epoch": 2.4783959333521604, + "grad_norm": 4.025901712913785, + "learning_rate": 1.5441320932553627e-06, + "loss": 0.1633, + "step": 17552 + }, + { + "epoch": 2.4785371364021462, + "grad_norm": 3.0077367367990178, + "learning_rate": 1.543318380640253e-06, + "loss": 0.1417, + "step": 17553 + }, + { + "epoch": 2.478678339452132, + "grad_norm": 2.3914597141889855, + "learning_rate": 1.5425048645547703e-06, + "loss": 0.0908, + "step": 17554 + }, + { + "epoch": 2.478819542502118, + "grad_norm": 3.578360414352736, + "learning_rate": 1.5416915450178238e-06, + "loss": 0.169, + "step": 17555 + }, + { + "epoch": 2.478960745552104, + "grad_norm": 3.2555868666849253, + "learning_rate": 1.5408784220483152e-06, + "loss": 0.1755, + "step": 17556 + }, + { + "epoch": 2.4791019486020898, + "grad_norm": 3.095855915544936, + "learning_rate": 1.5400654956651362e-06, + "loss": 0.1584, + "step": 17557 + }, + { + "epoch": 2.4792431516520756, + "grad_norm": 3.1106867032734837, + "learning_rate": 1.5392527658871813e-06, + "loss": 0.108, + "step": 17558 + }, + { + "epoch": 2.4793843547020615, + "grad_norm": 2.3079200992962345, + "learning_rate": 1.538440232733337e-06, + "loss": 0.1153, + "step": 17559 + }, + { + "epoch": 2.4795255577520474, + "grad_norm": 3.98520952609747, + "learning_rate": 1.537627896222489e-06, + "loss": 0.1893, + "step": 17560 + }, + { + "epoch": 2.4796667608020333, + "grad_norm": 3.1316306142290364, + "learning_rate": 1.5368157563735142e-06, + "loss": 0.1599, + "step": 17561 + }, + { + "epoch": 2.479807963852019, + "grad_norm": 3.5687148132743958, + "learning_rate": 1.5360038132052869e-06, + "loss": 0.1529, + "step": 17562 + }, + { + "epoch": 2.479949166902005, + "grad_norm": 3.016658880577094, + "learning_rate": 1.5351920667366749e-06, + "loss": 0.1272, + "step": 17563 + }, + { + "epoch": 2.480090369951991, + "grad_norm": 4.533544539336306, + "learning_rate": 1.5343805169865434e-06, + "loss": 0.1952, + "step": 17564 + }, + { + "epoch": 2.480231573001977, + "grad_norm": 3.0762225497191684, + "learning_rate": 1.5335691639737528e-06, + "loss": 0.1605, + "step": 17565 + }, + { + "epoch": 2.4803727760519627, + "grad_norm": 3.0325499461721654, + "learning_rate": 1.5327580077171589e-06, + "loss": 0.1316, + "step": 17566 + }, + { + "epoch": 2.4805139791019486, + "grad_norm": 2.955595019961034, + "learning_rate": 1.5319470482356125e-06, + "loss": 0.1437, + "step": 17567 + }, + { + "epoch": 2.4806551821519345, + "grad_norm": 2.939285281850652, + "learning_rate": 1.5311362855479584e-06, + "loss": 0.1402, + "step": 17568 + }, + { + "epoch": 2.4807963852019204, + "grad_norm": 3.425360283311531, + "learning_rate": 1.5303257196730403e-06, + "loss": 0.1489, + "step": 17569 + }, + { + "epoch": 2.4809375882519062, + "grad_norm": 3.7449174537883, + "learning_rate": 1.5295153506296944e-06, + "loss": 0.1707, + "step": 17570 + }, + { + "epoch": 2.481078791301892, + "grad_norm": 2.7661406994854625, + "learning_rate": 1.5287051784367524e-06, + "loss": 0.1328, + "step": 17571 + }, + { + "epoch": 2.481219994351878, + "grad_norm": 3.3193693787937595, + "learning_rate": 1.5278952031130445e-06, + "loss": 0.1169, + "step": 17572 + }, + { + "epoch": 2.481361197401864, + "grad_norm": 3.5496135187085294, + "learning_rate": 1.527085424677397e-06, + "loss": 0.1616, + "step": 17573 + }, + { + "epoch": 2.4815024004518498, + "grad_norm": 2.671518400695353, + "learning_rate": 1.5262758431486213e-06, + "loss": 0.12, + "step": 17574 + }, + { + "epoch": 2.4816436035018357, + "grad_norm": 2.958518437444626, + "learning_rate": 1.525466458545536e-06, + "loss": 0.1131, + "step": 17575 + }, + { + "epoch": 2.4817848065518215, + "grad_norm": 2.61450492102859, + "learning_rate": 1.524657270886951e-06, + "loss": 0.1321, + "step": 17576 + }, + { + "epoch": 2.4819260096018074, + "grad_norm": 3.1517575614491427, + "learning_rate": 1.5238482801916676e-06, + "loss": 0.1464, + "step": 17577 + }, + { + "epoch": 2.4820672126517933, + "grad_norm": 3.569853137524002, + "learning_rate": 1.5230394864784925e-06, + "loss": 0.158, + "step": 17578 + }, + { + "epoch": 2.482208415701779, + "grad_norm": 2.621176894006861, + "learning_rate": 1.5222308897662185e-06, + "loss": 0.1244, + "step": 17579 + }, + { + "epoch": 2.482349618751765, + "grad_norm": 3.7318544841751398, + "learning_rate": 1.5214224900736375e-06, + "loss": 0.1623, + "step": 17580 + }, + { + "epoch": 2.482490821801751, + "grad_norm": 3.0417534735483613, + "learning_rate": 1.5206142874195362e-06, + "loss": 0.131, + "step": 17581 + }, + { + "epoch": 2.482632024851737, + "grad_norm": 3.2938696808151655, + "learning_rate": 1.5198062818226967e-06, + "loss": 0.1266, + "step": 17582 + }, + { + "epoch": 2.4827732279017227, + "grad_norm": 3.1852431550670084, + "learning_rate": 1.518998473301897e-06, + "loss": 0.1202, + "step": 17583 + }, + { + "epoch": 2.4829144309517086, + "grad_norm": 4.0581561771278665, + "learning_rate": 1.5181908618759101e-06, + "loss": 0.1659, + "step": 17584 + }, + { + "epoch": 2.4830556340016945, + "grad_norm": 3.1108744921911677, + "learning_rate": 1.5173834475635042e-06, + "loss": 0.1217, + "step": 17585 + }, + { + "epoch": 2.4831968370516804, + "grad_norm": 3.264317306737899, + "learning_rate": 1.5165762303834442e-06, + "loss": 0.1156, + "step": 17586 + }, + { + "epoch": 2.4833380401016663, + "grad_norm": 3.2798832204496637, + "learning_rate": 1.5157692103544884e-06, + "loss": 0.1505, + "step": 17587 + }, + { + "epoch": 2.483479243151652, + "grad_norm": 3.091248537062963, + "learning_rate": 1.5149623874953922e-06, + "loss": 0.141, + "step": 17588 + }, + { + "epoch": 2.483620446201638, + "grad_norm": 3.549769446011815, + "learning_rate": 1.5141557618249036e-06, + "loss": 0.182, + "step": 17589 + }, + { + "epoch": 2.483761649251624, + "grad_norm": 2.6999325024117535, + "learning_rate": 1.5133493333617755e-06, + "loss": 0.1135, + "step": 17590 + }, + { + "epoch": 2.48390285230161, + "grad_norm": 3.102197807182706, + "learning_rate": 1.5125431021247406e-06, + "loss": 0.1644, + "step": 17591 + }, + { + "epoch": 2.4840440553515957, + "grad_norm": 3.105625791971736, + "learning_rate": 1.5117370681325393e-06, + "loss": 0.1785, + "step": 17592 + }, + { + "epoch": 2.4841852584015816, + "grad_norm": 3.3425189850768042, + "learning_rate": 1.5109312314039027e-06, + "loss": 0.1289, + "step": 17593 + }, + { + "epoch": 2.4843264614515674, + "grad_norm": 3.664054501402457, + "learning_rate": 1.5101255919575552e-06, + "loss": 0.163, + "step": 17594 + }, + { + "epoch": 2.4844676645015533, + "grad_norm": 3.1671716217886168, + "learning_rate": 1.5093201498122246e-06, + "loss": 0.1712, + "step": 17595 + }, + { + "epoch": 2.484608867551539, + "grad_norm": 3.1665732967678952, + "learning_rate": 1.5085149049866277e-06, + "loss": 0.1081, + "step": 17596 + }, + { + "epoch": 2.484750070601525, + "grad_norm": 4.158963507598375, + "learning_rate": 1.5077098574994763e-06, + "loss": 0.2032, + "step": 17597 + }, + { + "epoch": 2.484891273651511, + "grad_norm": 3.375506140054457, + "learning_rate": 1.5069050073694813e-06, + "loss": 0.1293, + "step": 17598 + }, + { + "epoch": 2.485032476701497, + "grad_norm": 3.0920788624373072, + "learning_rate": 1.5061003546153452e-06, + "loss": 0.112, + "step": 17599 + }, + { + "epoch": 2.4851736797514827, + "grad_norm": 3.3849232947374905, + "learning_rate": 1.5052958992557687e-06, + "loss": 0.1539, + "step": 17600 + }, + { + "epoch": 2.4853148828014686, + "grad_norm": 3.486714769602391, + "learning_rate": 1.5044916413094478e-06, + "loss": 0.132, + "step": 17601 + }, + { + "epoch": 2.4854560858514545, + "grad_norm": 3.5646193416498027, + "learning_rate": 1.5036875807950712e-06, + "loss": 0.1777, + "step": 17602 + }, + { + "epoch": 2.4855972889014404, + "grad_norm": 2.626828645203138, + "learning_rate": 1.502883717731326e-06, + "loss": 0.0978, + "step": 17603 + }, + { + "epoch": 2.4857384919514263, + "grad_norm": 2.8047197023968526, + "learning_rate": 1.502080052136894e-06, + "loss": 0.1239, + "step": 17604 + }, + { + "epoch": 2.485879695001412, + "grad_norm": 3.2551043324515128, + "learning_rate": 1.5012765840304522e-06, + "loss": 0.1446, + "step": 17605 + }, + { + "epoch": 2.486020898051398, + "grad_norm": 2.7173452193609067, + "learning_rate": 1.5004733134306692e-06, + "loss": 0.1243, + "step": 17606 + }, + { + "epoch": 2.486162101101384, + "grad_norm": 2.5948833901245334, + "learning_rate": 1.4996702403562202e-06, + "loss": 0.1089, + "step": 17607 + }, + { + "epoch": 2.48630330415137, + "grad_norm": 2.9663052104352388, + "learning_rate": 1.4988673648257624e-06, + "loss": 0.1435, + "step": 17608 + }, + { + "epoch": 2.4864445072013557, + "grad_norm": 2.8957222292630442, + "learning_rate": 1.498064686857954e-06, + "loss": 0.1409, + "step": 17609 + }, + { + "epoch": 2.4865857102513416, + "grad_norm": 3.3490107451707085, + "learning_rate": 1.4972622064714515e-06, + "loss": 0.1513, + "step": 17610 + }, + { + "epoch": 2.4867269133013274, + "grad_norm": 3.3136083917892876, + "learning_rate": 1.496459923684902e-06, + "loss": 0.1467, + "step": 17611 + }, + { + "epoch": 2.4868681163513133, + "grad_norm": 2.8165355954495537, + "learning_rate": 1.495657838516953e-06, + "loss": 0.0933, + "step": 17612 + }, + { + "epoch": 2.487009319401299, + "grad_norm": 3.683759204585895, + "learning_rate": 1.4948559509862426e-06, + "loss": 0.1759, + "step": 17613 + }, + { + "epoch": 2.487150522451285, + "grad_norm": 3.4138961903378657, + "learning_rate": 1.4940542611114073e-06, + "loss": 0.1583, + "step": 17614 + }, + { + "epoch": 2.487291725501271, + "grad_norm": 3.735169473794392, + "learning_rate": 1.4932527689110764e-06, + "loss": 0.1832, + "step": 17615 + }, + { + "epoch": 2.487432928551257, + "grad_norm": 3.2170959087045548, + "learning_rate": 1.4924514744038787e-06, + "loss": 0.1477, + "step": 17616 + }, + { + "epoch": 2.4875741316012427, + "grad_norm": 2.4690067280094934, + "learning_rate": 1.4916503776084345e-06, + "loss": 0.116, + "step": 17617 + }, + { + "epoch": 2.4877153346512286, + "grad_norm": 3.2862290320364846, + "learning_rate": 1.4908494785433603e-06, + "loss": 0.1946, + "step": 17618 + }, + { + "epoch": 2.4878565377012145, + "grad_norm": 3.6546785571413523, + "learning_rate": 1.490048777227271e-06, + "loss": 0.1559, + "step": 17619 + }, + { + "epoch": 2.4879977407512004, + "grad_norm": 3.4036193841056077, + "learning_rate": 1.4892482736787717e-06, + "loss": 0.171, + "step": 17620 + }, + { + "epoch": 2.4881389438011863, + "grad_norm": 2.711937421368551, + "learning_rate": 1.4884479679164664e-06, + "loss": 0.1214, + "step": 17621 + }, + { + "epoch": 2.488280146851172, + "grad_norm": 3.3735292347676684, + "learning_rate": 1.487647859958956e-06, + "loss": 0.1665, + "step": 17622 + }, + { + "epoch": 2.488421349901158, + "grad_norm": 2.752707937336635, + "learning_rate": 1.48684794982483e-06, + "loss": 0.134, + "step": 17623 + }, + { + "epoch": 2.488562552951144, + "grad_norm": 2.968423486010832, + "learning_rate": 1.4860482375326857e-06, + "loss": 0.1291, + "step": 17624 + }, + { + "epoch": 2.48870375600113, + "grad_norm": 3.172802763592464, + "learning_rate": 1.4852487231011014e-06, + "loss": 0.1542, + "step": 17625 + }, + { + "epoch": 2.4888449590511157, + "grad_norm": 2.816323071394256, + "learning_rate": 1.4844494065486592e-06, + "loss": 0.1445, + "step": 17626 + }, + { + "epoch": 2.4889861621011016, + "grad_norm": 4.087037557995734, + "learning_rate": 1.4836502878939351e-06, + "loss": 0.1511, + "step": 17627 + }, + { + "epoch": 2.4891273651510875, + "grad_norm": 2.8674684954784597, + "learning_rate": 1.4828513671554978e-06, + "loss": 0.1226, + "step": 17628 + }, + { + "epoch": 2.489268568201073, + "grad_norm": 3.9774349405132448, + "learning_rate": 1.48205264435192e-06, + "loss": 0.2094, + "step": 17629 + }, + { + "epoch": 2.489409771251059, + "grad_norm": 3.4092866882016675, + "learning_rate": 1.4812541195017593e-06, + "loss": 0.1469, + "step": 17630 + }, + { + "epoch": 2.4895509743010447, + "grad_norm": 3.6014494489813553, + "learning_rate": 1.480455792623574e-06, + "loss": 0.164, + "step": 17631 + }, + { + "epoch": 2.4896921773510305, + "grad_norm": 3.310735723643198, + "learning_rate": 1.4796576637359194e-06, + "loss": 0.148, + "step": 17632 + }, + { + "epoch": 2.4898333804010164, + "grad_norm": 3.4002429927075943, + "learning_rate": 1.4788597328573362e-06, + "loss": 0.1396, + "step": 17633 + }, + { + "epoch": 2.4899745834510023, + "grad_norm": 2.820426823362409, + "learning_rate": 1.478062000006375e-06, + "loss": 0.1195, + "step": 17634 + }, + { + "epoch": 2.490115786500988, + "grad_norm": 3.3137533043652794, + "learning_rate": 1.4772644652015722e-06, + "loss": 0.1833, + "step": 17635 + }, + { + "epoch": 2.490256989550974, + "grad_norm": 3.8286886264338005, + "learning_rate": 1.4764671284614629e-06, + "loss": 0.1204, + "step": 17636 + }, + { + "epoch": 2.49039819260096, + "grad_norm": 3.748248426270184, + "learning_rate": 1.4756699898045767e-06, + "loss": 0.1634, + "step": 17637 + }, + { + "epoch": 2.490539395650946, + "grad_norm": 2.8661830166724362, + "learning_rate": 1.474873049249439e-06, + "loss": 0.1191, + "step": 17638 + }, + { + "epoch": 2.4906805987009317, + "grad_norm": 3.7964122146022365, + "learning_rate": 1.4740763068145692e-06, + "loss": 0.1872, + "step": 17639 + }, + { + "epoch": 2.4908218017509176, + "grad_norm": 3.4287227724208837, + "learning_rate": 1.4732797625184814e-06, + "loss": 0.1694, + "step": 17640 + }, + { + "epoch": 2.4909630048009035, + "grad_norm": 3.6434173886332735, + "learning_rate": 1.4724834163796942e-06, + "loss": 0.1778, + "step": 17641 + }, + { + "epoch": 2.4911042078508894, + "grad_norm": 3.1408643009788713, + "learning_rate": 1.4716872684167082e-06, + "loss": 0.1508, + "step": 17642 + }, + { + "epoch": 2.4912454109008753, + "grad_norm": 2.878316055019368, + "learning_rate": 1.4708913186480266e-06, + "loss": 0.1562, + "step": 17643 + }, + { + "epoch": 2.491386613950861, + "grad_norm": 2.6831706547096497, + "learning_rate": 1.4700955670921468e-06, + "loss": 0.119, + "step": 17644 + }, + { + "epoch": 2.491527817000847, + "grad_norm": 2.8761823819257857, + "learning_rate": 1.4693000137675605e-06, + "loss": 0.1197, + "step": 17645 + }, + { + "epoch": 2.491669020050833, + "grad_norm": 2.8153197980929408, + "learning_rate": 1.4685046586927598e-06, + "loss": 0.126, + "step": 17646 + }, + { + "epoch": 2.491810223100819, + "grad_norm": 2.5314129963771057, + "learning_rate": 1.4677095018862264e-06, + "loss": 0.1246, + "step": 17647 + }, + { + "epoch": 2.4919514261508047, + "grad_norm": 3.8199524010882855, + "learning_rate": 1.46691454336644e-06, + "loss": 0.1872, + "step": 17648 + }, + { + "epoch": 2.4920926292007906, + "grad_norm": 3.000554238600249, + "learning_rate": 1.4661197831518759e-06, + "loss": 0.1481, + "step": 17649 + }, + { + "epoch": 2.4922338322507764, + "grad_norm": 2.9145598270844144, + "learning_rate": 1.465325221260998e-06, + "loss": 0.1304, + "step": 17650 + }, + { + "epoch": 2.4923750353007623, + "grad_norm": 2.927832959293454, + "learning_rate": 1.4645308577122786e-06, + "loss": 0.1281, + "step": 17651 + }, + { + "epoch": 2.492516238350748, + "grad_norm": 4.019479257118664, + "learning_rate": 1.463736692524176e-06, + "loss": 0.1375, + "step": 17652 + }, + { + "epoch": 2.492657441400734, + "grad_norm": 2.9943075521314064, + "learning_rate": 1.4629427257151462e-06, + "loss": 0.1384, + "step": 17653 + }, + { + "epoch": 2.49279864445072, + "grad_norm": 3.3232067279097666, + "learning_rate": 1.4621489573036407e-06, + "loss": 0.1787, + "step": 17654 + }, + { + "epoch": 2.492939847500706, + "grad_norm": 2.6769758857248713, + "learning_rate": 1.4613553873081054e-06, + "loss": 0.1531, + "step": 17655 + }, + { + "epoch": 2.4930810505506917, + "grad_norm": 2.8277921668040626, + "learning_rate": 1.4605620157469835e-06, + "loss": 0.1308, + "step": 17656 + }, + { + "epoch": 2.4932222536006776, + "grad_norm": 2.7161753447922794, + "learning_rate": 1.4597688426387114e-06, + "loss": 0.1087, + "step": 17657 + }, + { + "epoch": 2.4933634566506635, + "grad_norm": 2.769098729745969, + "learning_rate": 1.4589758680017263e-06, + "loss": 0.128, + "step": 17658 + }, + { + "epoch": 2.4935046597006494, + "grad_norm": 2.810276770286583, + "learning_rate": 1.4581830918544515e-06, + "loss": 0.1276, + "step": 17659 + }, + { + "epoch": 2.4936458627506353, + "grad_norm": 2.961492398297367, + "learning_rate": 1.4573905142153134e-06, + "loss": 0.1558, + "step": 17660 + }, + { + "epoch": 2.493787065800621, + "grad_norm": 2.9936755533757564, + "learning_rate": 1.4565981351027303e-06, + "loss": 0.1131, + "step": 17661 + }, + { + "epoch": 2.493928268850607, + "grad_norm": 4.57162151288452, + "learning_rate": 1.4558059545351144e-06, + "loss": 0.2249, + "step": 17662 + }, + { + "epoch": 2.494069471900593, + "grad_norm": 3.4459026064783216, + "learning_rate": 1.45501397253088e-06, + "loss": 0.1597, + "step": 17663 + }, + { + "epoch": 2.494210674950579, + "grad_norm": 3.4957554275345175, + "learning_rate": 1.4542221891084307e-06, + "loss": 0.1386, + "step": 17664 + }, + { + "epoch": 2.4943518780005647, + "grad_norm": 2.7017313861424643, + "learning_rate": 1.453430604286168e-06, + "loss": 0.13, + "step": 17665 + }, + { + "epoch": 2.4944930810505506, + "grad_norm": 2.550638618392824, + "learning_rate": 1.4526392180824888e-06, + "loss": 0.1266, + "step": 17666 + }, + { + "epoch": 2.4946342841005364, + "grad_norm": 2.846820340041814, + "learning_rate": 1.4518480305157801e-06, + "loss": 0.1378, + "step": 17667 + }, + { + "epoch": 2.4947754871505223, + "grad_norm": 3.144776172013608, + "learning_rate": 1.45105704160443e-06, + "loss": 0.106, + "step": 17668 + }, + { + "epoch": 2.494916690200508, + "grad_norm": 2.9809593820751874, + "learning_rate": 1.4502662513668241e-06, + "loss": 0.1468, + "step": 17669 + }, + { + "epoch": 2.495057893250494, + "grad_norm": 2.666691103771677, + "learning_rate": 1.4494756598213377e-06, + "loss": 0.0924, + "step": 17670 + }, + { + "epoch": 2.49519909630048, + "grad_norm": 3.496307187786648, + "learning_rate": 1.448685266986345e-06, + "loss": 0.1535, + "step": 17671 + }, + { + "epoch": 2.495340299350466, + "grad_norm": 2.872519467735819, + "learning_rate": 1.4478950728802132e-06, + "loss": 0.1408, + "step": 17672 + }, + { + "epoch": 2.4954815024004517, + "grad_norm": 2.855515221737581, + "learning_rate": 1.4471050775213068e-06, + "loss": 0.0981, + "step": 17673 + }, + { + "epoch": 2.4956227054504376, + "grad_norm": 3.4126606031176303, + "learning_rate": 1.4463152809279824e-06, + "loss": 0.1625, + "step": 17674 + }, + { + "epoch": 2.4957639085004235, + "grad_norm": 3.3112861265930493, + "learning_rate": 1.4455256831186016e-06, + "loss": 0.1637, + "step": 17675 + }, + { + "epoch": 2.4959051115504094, + "grad_norm": 3.372083804849956, + "learning_rate": 1.4447362841115075e-06, + "loss": 0.1615, + "step": 17676 + }, + { + "epoch": 2.4960463146003953, + "grad_norm": 2.9093657186264097, + "learning_rate": 1.4439470839250469e-06, + "loss": 0.1199, + "step": 17677 + }, + { + "epoch": 2.496187517650381, + "grad_norm": 3.64037147727848, + "learning_rate": 1.4431580825775604e-06, + "loss": 0.141, + "step": 17678 + }, + { + "epoch": 2.496328720700367, + "grad_norm": 2.909907819158235, + "learning_rate": 1.4423692800873857e-06, + "loss": 0.1306, + "step": 17679 + }, + { + "epoch": 2.496469923750353, + "grad_norm": 3.5049646057336306, + "learning_rate": 1.44158067647285e-06, + "loss": 0.1811, + "step": 17680 + }, + { + "epoch": 2.496611126800339, + "grad_norm": 2.7887536102456725, + "learning_rate": 1.440792271752287e-06, + "loss": 0.1397, + "step": 17681 + }, + { + "epoch": 2.4967523298503247, + "grad_norm": 3.6735652905911405, + "learning_rate": 1.440004065944014e-06, + "loss": 0.1544, + "step": 17682 + }, + { + "epoch": 2.4968935329003106, + "grad_norm": 3.1963855450477716, + "learning_rate": 1.4392160590663517e-06, + "loss": 0.1394, + "step": 17683 + }, + { + "epoch": 2.4970347359502965, + "grad_norm": 3.2308228696393937, + "learning_rate": 1.4384282511376102e-06, + "loss": 0.1485, + "step": 17684 + }, + { + "epoch": 2.4971759390002823, + "grad_norm": 3.6047256226610376, + "learning_rate": 1.4376406421760946e-06, + "loss": 0.1713, + "step": 17685 + }, + { + "epoch": 2.4973171420502682, + "grad_norm": 2.9735617087654393, + "learning_rate": 1.4368532322001161e-06, + "loss": 0.1348, + "step": 17686 + }, + { + "epoch": 2.497458345100254, + "grad_norm": 2.876349457098305, + "learning_rate": 1.4360660212279698e-06, + "loss": 0.0845, + "step": 17687 + }, + { + "epoch": 2.49759954815024, + "grad_norm": 3.7817743996086275, + "learning_rate": 1.4352790092779511e-06, + "loss": 0.2039, + "step": 17688 + }, + { + "epoch": 2.497740751200226, + "grad_norm": 3.9864033731864734, + "learning_rate": 1.4344921963683501e-06, + "loss": 0.2017, + "step": 17689 + }, + { + "epoch": 2.4978819542502118, + "grad_norm": 3.641398258853611, + "learning_rate": 1.4337055825174506e-06, + "loss": 0.1722, + "step": 17690 + }, + { + "epoch": 2.4980231573001976, + "grad_norm": 3.1718990632451174, + "learning_rate": 1.4329191677435338e-06, + "loss": 0.1535, + "step": 17691 + }, + { + "epoch": 2.4981643603501835, + "grad_norm": 2.7504988441699285, + "learning_rate": 1.4321329520648752e-06, + "loss": 0.1107, + "step": 17692 + }, + { + "epoch": 2.4983055634001694, + "grad_norm": 3.2757433255544717, + "learning_rate": 1.4313469354997468e-06, + "loss": 0.1477, + "step": 17693 + }, + { + "epoch": 2.4984467664501553, + "grad_norm": 3.422196087027472, + "learning_rate": 1.4305611180664157e-06, + "loss": 0.107, + "step": 17694 + }, + { + "epoch": 2.498587969500141, + "grad_norm": 3.2388472357516185, + "learning_rate": 1.4297754997831436e-06, + "loss": 0.1602, + "step": 17695 + }, + { + "epoch": 2.498729172550127, + "grad_norm": 2.8631982454898286, + "learning_rate": 1.4289900806681866e-06, + "loss": 0.1255, + "step": 17696 + }, + { + "epoch": 2.498870375600113, + "grad_norm": 3.666937346087086, + "learning_rate": 1.4282048607397969e-06, + "loss": 0.1914, + "step": 17697 + }, + { + "epoch": 2.499011578650099, + "grad_norm": 2.7074175093749577, + "learning_rate": 1.4274198400162265e-06, + "loss": 0.1354, + "step": 17698 + }, + { + "epoch": 2.4991527817000847, + "grad_norm": 2.9387672306406647, + "learning_rate": 1.4266350185157175e-06, + "loss": 0.1558, + "step": 17699 + }, + { + "epoch": 2.4992939847500706, + "grad_norm": 3.5205264886051446, + "learning_rate": 1.4258503962565096e-06, + "loss": 0.1275, + "step": 17700 + }, + { + "epoch": 2.4994351878000565, + "grad_norm": 3.606292690442444, + "learning_rate": 1.425065973256833e-06, + "loss": 0.1139, + "step": 17701 + }, + { + "epoch": 2.4995763908500424, + "grad_norm": 2.8633068203393615, + "learning_rate": 1.424281749534918e-06, + "loss": 0.1518, + "step": 17702 + }, + { + "epoch": 2.4997175939000282, + "grad_norm": 4.6914760522605405, + "learning_rate": 1.4234977251089944e-06, + "loss": 0.2149, + "step": 17703 + }, + { + "epoch": 2.499858796950014, + "grad_norm": 2.8935568446058713, + "learning_rate": 1.4227138999972801e-06, + "loss": 0.1451, + "step": 17704 + }, + { + "epoch": 2.5, + "grad_norm": 3.2382524583135885, + "learning_rate": 1.4219302742179897e-06, + "loss": 0.1344, + "step": 17705 + }, + { + "epoch": 2.500141203049986, + "grad_norm": 3.077644609991341, + "learning_rate": 1.4211468477893352e-06, + "loss": 0.1096, + "step": 17706 + }, + { + "epoch": 2.5002824060999718, + "grad_norm": 3.6937340168387167, + "learning_rate": 1.4203636207295223e-06, + "loss": 0.186, + "step": 17707 + }, + { + "epoch": 2.5004236091499576, + "grad_norm": 4.376475668854339, + "learning_rate": 1.4195805930567552e-06, + "loss": 0.1904, + "step": 17708 + }, + { + "epoch": 2.5005648121999435, + "grad_norm": 3.3251145702051663, + "learning_rate": 1.418797764789228e-06, + "loss": 0.1593, + "step": 17709 + }, + { + "epoch": 2.5007060152499294, + "grad_norm": 2.948120597645961, + "learning_rate": 1.4180151359451367e-06, + "loss": 0.1191, + "step": 17710 + }, + { + "epoch": 2.5008472182999153, + "grad_norm": 2.773734105597387, + "learning_rate": 1.417232706542666e-06, + "loss": 0.1001, + "step": 17711 + }, + { + "epoch": 2.500988421349901, + "grad_norm": 2.8698132276112265, + "learning_rate": 1.416450476600001e-06, + "loss": 0.1002, + "step": 17712 + }, + { + "epoch": 2.501129624399887, + "grad_norm": 2.7899202480713763, + "learning_rate": 1.4156684461353188e-06, + "loss": 0.112, + "step": 17713 + }, + { + "epoch": 2.501270827449873, + "grad_norm": 4.037037634023028, + "learning_rate": 1.414886615166794e-06, + "loss": 0.1598, + "step": 17714 + }, + { + "epoch": 2.501412030499859, + "grad_norm": 3.5715976057079195, + "learning_rate": 1.4141049837125975e-06, + "loss": 0.1834, + "step": 17715 + }, + { + "epoch": 2.5015532335498447, + "grad_norm": 2.9350536063306603, + "learning_rate": 1.4133235517908938e-06, + "loss": 0.1133, + "step": 17716 + }, + { + "epoch": 2.5016944365998306, + "grad_norm": 2.4682416029059264, + "learning_rate": 1.4125423194198451e-06, + "loss": 0.1348, + "step": 17717 + }, + { + "epoch": 2.5018356396498165, + "grad_norm": 2.374448658115203, + "learning_rate": 1.4117612866176022e-06, + "loss": 0.099, + "step": 17718 + }, + { + "epoch": 2.5019768426998024, + "grad_norm": 3.74376659795953, + "learning_rate": 1.4109804534023153e-06, + "loss": 0.1829, + "step": 17719 + }, + { + "epoch": 2.5021180457497882, + "grad_norm": 3.3656452519731994, + "learning_rate": 1.4101998197921352e-06, + "loss": 0.1497, + "step": 17720 + }, + { + "epoch": 2.502259248799774, + "grad_norm": 3.5764279477386074, + "learning_rate": 1.409419385805202e-06, + "loss": 0.1369, + "step": 17721 + }, + { + "epoch": 2.50240045184976, + "grad_norm": 3.671451877334214, + "learning_rate": 1.4086391514596532e-06, + "loss": 0.1631, + "step": 17722 + }, + { + "epoch": 2.502541654899746, + "grad_norm": 2.784895270238234, + "learning_rate": 1.407859116773619e-06, + "loss": 0.1171, + "step": 17723 + }, + { + "epoch": 2.5026828579497318, + "grad_norm": 2.7587274399623705, + "learning_rate": 1.407079281765229e-06, + "loss": 0.1587, + "step": 17724 + }, + { + "epoch": 2.5028240609997177, + "grad_norm": 2.9712312438809274, + "learning_rate": 1.4062996464526046e-06, + "loss": 0.1502, + "step": 17725 + }, + { + "epoch": 2.5029652640497035, + "grad_norm": 2.779483522738431, + "learning_rate": 1.4055202108538657e-06, + "loss": 0.1255, + "step": 17726 + }, + { + "epoch": 2.5031064670996894, + "grad_norm": 2.406566453506488, + "learning_rate": 1.4047409749871255e-06, + "loss": 0.1372, + "step": 17727 + }, + { + "epoch": 2.5032476701496753, + "grad_norm": 3.598050669433387, + "learning_rate": 1.4039619388704928e-06, + "loss": 0.1672, + "step": 17728 + }, + { + "epoch": 2.503388873199661, + "grad_norm": 4.3880446045780275, + "learning_rate": 1.4031831025220722e-06, + "loss": 0.1767, + "step": 17729 + }, + { + "epoch": 2.503530076249647, + "grad_norm": 2.915244914417148, + "learning_rate": 1.4024044659599633e-06, + "loss": 0.1357, + "step": 17730 + }, + { + "epoch": 2.503671279299633, + "grad_norm": 3.6895058595832446, + "learning_rate": 1.40162602920226e-06, + "loss": 0.182, + "step": 17731 + }, + { + "epoch": 2.503812482349619, + "grad_norm": 3.2464712447418442, + "learning_rate": 1.4008477922670571e-06, + "loss": 0.1362, + "step": 17732 + }, + { + "epoch": 2.5039536853996047, + "grad_norm": 2.840192158536428, + "learning_rate": 1.4000697551724362e-06, + "loss": 0.1597, + "step": 17733 + }, + { + "epoch": 2.5040948884495906, + "grad_norm": 4.164212328946199, + "learning_rate": 1.399291917936484e-06, + "loss": 0.1536, + "step": 17734 + }, + { + "epoch": 2.5042360914995765, + "grad_norm": 2.9629184691248773, + "learning_rate": 1.39851428057727e-06, + "loss": 0.1488, + "step": 17735 + }, + { + "epoch": 2.5043772945495624, + "grad_norm": 2.9731145085347115, + "learning_rate": 1.3977368431128679e-06, + "loss": 0.1738, + "step": 17736 + }, + { + "epoch": 2.5045184975995483, + "grad_norm": 3.0920639576385414, + "learning_rate": 1.3969596055613489e-06, + "loss": 0.149, + "step": 17737 + }, + { + "epoch": 2.504659700649534, + "grad_norm": 2.8473342593088997, + "learning_rate": 1.3961825679407726e-06, + "loss": 0.1555, + "step": 17738 + }, + { + "epoch": 2.50480090369952, + "grad_norm": 3.1679016095029953, + "learning_rate": 1.3954057302691981e-06, + "loss": 0.153, + "step": 17739 + }, + { + "epoch": 2.504942106749506, + "grad_norm": 2.8458042187524195, + "learning_rate": 1.3946290925646788e-06, + "loss": 0.1316, + "step": 17740 + }, + { + "epoch": 2.505083309799492, + "grad_norm": 3.5263398375881976, + "learning_rate": 1.3938526548452625e-06, + "loss": 0.1578, + "step": 17741 + }, + { + "epoch": 2.5052245128494777, + "grad_norm": 2.8584185506630155, + "learning_rate": 1.3930764171289935e-06, + "loss": 0.1183, + "step": 17742 + }, + { + "epoch": 2.5053657158994636, + "grad_norm": 3.5149181460571577, + "learning_rate": 1.3923003794339119e-06, + "loss": 0.1993, + "step": 17743 + }, + { + "epoch": 2.5055069189494494, + "grad_norm": 4.118995532529588, + "learning_rate": 1.391524541778052e-06, + "loss": 0.15, + "step": 17744 + }, + { + "epoch": 2.5056481219994353, + "grad_norm": 2.682071286008766, + "learning_rate": 1.3907489041794442e-06, + "loss": 0.1137, + "step": 17745 + }, + { + "epoch": 2.505789325049421, + "grad_norm": 3.0904447931600187, + "learning_rate": 1.3899734666561138e-06, + "loss": 0.1448, + "step": 17746 + }, + { + "epoch": 2.505930528099407, + "grad_norm": 3.253627278654687, + "learning_rate": 1.389198229226081e-06, + "loss": 0.1569, + "step": 17747 + }, + { + "epoch": 2.506071731149393, + "grad_norm": 3.433113002227054, + "learning_rate": 1.388423191907361e-06, + "loss": 0.1563, + "step": 17748 + }, + { + "epoch": 2.506212934199379, + "grad_norm": 3.288083247069401, + "learning_rate": 1.3876483547179688e-06, + "loss": 0.1626, + "step": 17749 + }, + { + "epoch": 2.5063541372493647, + "grad_norm": 3.2312605029722437, + "learning_rate": 1.3868737176759105e-06, + "loss": 0.1067, + "step": 17750 + }, + { + "epoch": 2.5064953402993506, + "grad_norm": 3.7956596571605097, + "learning_rate": 1.386099280799188e-06, + "loss": 0.1655, + "step": 17751 + }, + { + "epoch": 2.5066365433493365, + "grad_norm": 3.890507407601209, + "learning_rate": 1.3853250441057975e-06, + "loss": 0.1646, + "step": 17752 + }, + { + "epoch": 2.5067777463993224, + "grad_norm": 2.828658426710023, + "learning_rate": 1.3845510076137293e-06, + "loss": 0.1397, + "step": 17753 + }, + { + "epoch": 2.5069189494493083, + "grad_norm": 3.2862781996478385, + "learning_rate": 1.3837771713409776e-06, + "loss": 0.1676, + "step": 17754 + }, + { + "epoch": 2.507060152499294, + "grad_norm": 2.457387412741747, + "learning_rate": 1.3830035353055226e-06, + "loss": 0.1151, + "step": 17755 + }, + { + "epoch": 2.50720135554928, + "grad_norm": 3.770826527989192, + "learning_rate": 1.3822300995253445e-06, + "loss": 0.1907, + "step": 17756 + }, + { + "epoch": 2.507342558599266, + "grad_norm": 3.3760808042425965, + "learning_rate": 1.381456864018418e-06, + "loss": 0.1494, + "step": 17757 + }, + { + "epoch": 2.507483761649252, + "grad_norm": 2.5337683551288634, + "learning_rate": 1.3806838288027113e-06, + "loss": 0.1102, + "step": 17758 + }, + { + "epoch": 2.5076249646992377, + "grad_norm": 2.5196957495452796, + "learning_rate": 1.3799109938961897e-06, + "loss": 0.1208, + "step": 17759 + }, + { + "epoch": 2.5077661677492236, + "grad_norm": 3.477083957112909, + "learning_rate": 1.379138359316814e-06, + "loss": 0.1583, + "step": 17760 + }, + { + "epoch": 2.5079073707992094, + "grad_norm": 3.127146039160101, + "learning_rate": 1.37836592508254e-06, + "loss": 0.145, + "step": 17761 + }, + { + "epoch": 2.5080485738491953, + "grad_norm": 2.7706950583164187, + "learning_rate": 1.3775936912113187e-06, + "loss": 0.1339, + "step": 17762 + }, + { + "epoch": 2.508189776899181, + "grad_norm": 3.687061228170924, + "learning_rate": 1.3768216577210959e-06, + "loss": 0.1497, + "step": 17763 + }, + { + "epoch": 2.508330979949167, + "grad_norm": 3.1651781421865497, + "learning_rate": 1.3760498246298138e-06, + "loss": 0.1485, + "step": 17764 + }, + { + "epoch": 2.508472182999153, + "grad_norm": 2.640539815433156, + "learning_rate": 1.375278191955407e-06, + "loss": 0.1216, + "step": 17765 + }, + { + "epoch": 2.508613386049139, + "grad_norm": 3.0437877265039934, + "learning_rate": 1.3745067597158123e-06, + "loss": 0.1648, + "step": 17766 + }, + { + "epoch": 2.5087545890991247, + "grad_norm": 4.359468123888233, + "learning_rate": 1.3737355279289566e-06, + "loss": 0.1967, + "step": 17767 + }, + { + "epoch": 2.5088957921491106, + "grad_norm": 3.0790462012443562, + "learning_rate": 1.372964496612763e-06, + "loss": 0.1557, + "step": 17768 + }, + { + "epoch": 2.5090369951990965, + "grad_norm": 3.102154557241754, + "learning_rate": 1.3721936657851464e-06, + "loss": 0.1421, + "step": 17769 + }, + { + "epoch": 2.5091781982490824, + "grad_norm": 2.972293204873582, + "learning_rate": 1.3714230354640234e-06, + "loss": 0.1358, + "step": 17770 + }, + { + "epoch": 2.5093194012990683, + "grad_norm": 2.7600427766839273, + "learning_rate": 1.3706526056673008e-06, + "loss": 0.1156, + "step": 17771 + }, + { + "epoch": 2.509460604349054, + "grad_norm": 2.960362660522951, + "learning_rate": 1.3698823764128867e-06, + "loss": 0.1755, + "step": 17772 + }, + { + "epoch": 2.50960180739904, + "grad_norm": 3.4562601887374353, + "learning_rate": 1.3691123477186786e-06, + "loss": 0.1385, + "step": 17773 + }, + { + "epoch": 2.509743010449026, + "grad_norm": 3.1566639756322648, + "learning_rate": 1.3683425196025734e-06, + "loss": 0.1566, + "step": 17774 + }, + { + "epoch": 2.509884213499012, + "grad_norm": 3.0130359470943495, + "learning_rate": 1.3675728920824593e-06, + "loss": 0.1276, + "step": 17775 + }, + { + "epoch": 2.5100254165489977, + "grad_norm": 3.369965768926957, + "learning_rate": 1.366803465176223e-06, + "loss": 0.1342, + "step": 17776 + }, + { + "epoch": 2.5101666195989836, + "grad_norm": 3.5547676888308466, + "learning_rate": 1.3660342389017466e-06, + "loss": 0.1567, + "step": 17777 + }, + { + "epoch": 2.5103078226489695, + "grad_norm": 3.4652555477147984, + "learning_rate": 1.365265213276905e-06, + "loss": 0.1896, + "step": 17778 + }, + { + "epoch": 2.5104490256989553, + "grad_norm": 3.8968470072630126, + "learning_rate": 1.3644963883195716e-06, + "loss": 0.1723, + "step": 17779 + }, + { + "epoch": 2.510590228748941, + "grad_norm": 2.6756036320989938, + "learning_rate": 1.363727764047612e-06, + "loss": 0.1274, + "step": 17780 + }, + { + "epoch": 2.510731431798927, + "grad_norm": 3.1059373548846447, + "learning_rate": 1.36295934047889e-06, + "loss": 0.1267, + "step": 17781 + }, + { + "epoch": 2.510872634848913, + "grad_norm": 3.7443498129390207, + "learning_rate": 1.362191117631263e-06, + "loss": 0.1433, + "step": 17782 + }, + { + "epoch": 2.511013837898899, + "grad_norm": 2.5140585181932353, + "learning_rate": 1.3614230955225817e-06, + "loss": 0.1048, + "step": 17783 + }, + { + "epoch": 2.5111550409488848, + "grad_norm": 3.2964762220893173, + "learning_rate": 1.360655274170698e-06, + "loss": 0.1446, + "step": 17784 + }, + { + "epoch": 2.5112962439988706, + "grad_norm": 3.3729307613321304, + "learning_rate": 1.359887653593458e-06, + "loss": 0.1973, + "step": 17785 + }, + { + "epoch": 2.5114374470488565, + "grad_norm": 3.078843701808194, + "learning_rate": 1.359120233808695e-06, + "loss": 0.145, + "step": 17786 + }, + { + "epoch": 2.5115786500988424, + "grad_norm": 3.759164441460674, + "learning_rate": 1.3583530148342461e-06, + "loss": 0.1508, + "step": 17787 + }, + { + "epoch": 2.5117198531488283, + "grad_norm": 2.920605521794752, + "learning_rate": 1.357585996687939e-06, + "loss": 0.1237, + "step": 17788 + }, + { + "epoch": 2.511861056198814, + "grad_norm": 3.675097491352447, + "learning_rate": 1.356819179387604e-06, + "loss": 0.181, + "step": 17789 + }, + { + "epoch": 2.5120022592488, + "grad_norm": 3.014531669693543, + "learning_rate": 1.3560525629510567e-06, + "loss": 0.1384, + "step": 17790 + }, + { + "epoch": 2.5121434622987855, + "grad_norm": 2.7500545920779804, + "learning_rate": 1.3552861473961164e-06, + "loss": 0.1164, + "step": 17791 + }, + { + "epoch": 2.5122846653487714, + "grad_norm": 3.0599655268110935, + "learning_rate": 1.3545199327405922e-06, + "loss": 0.1478, + "step": 17792 + }, + { + "epoch": 2.5124258683987573, + "grad_norm": 2.816875012272122, + "learning_rate": 1.3537539190022909e-06, + "loss": 0.1443, + "step": 17793 + }, + { + "epoch": 2.512567071448743, + "grad_norm": 3.8263027919260573, + "learning_rate": 1.3529881061990147e-06, + "loss": 0.1767, + "step": 17794 + }, + { + "epoch": 2.512708274498729, + "grad_norm": 2.4378935489279105, + "learning_rate": 1.3522224943485606e-06, + "loss": 0.1169, + "step": 17795 + }, + { + "epoch": 2.512849477548715, + "grad_norm": 2.672608408610488, + "learning_rate": 1.3514570834687203e-06, + "loss": 0.1461, + "step": 17796 + }, + { + "epoch": 2.512990680598701, + "grad_norm": 2.981794932241918, + "learning_rate": 1.3506918735772833e-06, + "loss": 0.1332, + "step": 17797 + }, + { + "epoch": 2.5131318836486867, + "grad_norm": 3.6424684184609992, + "learning_rate": 1.3499268646920317e-06, + "loss": 0.1237, + "step": 17798 + }, + { + "epoch": 2.5132730866986726, + "grad_norm": 3.293564571589729, + "learning_rate": 1.349162056830744e-06, + "loss": 0.1212, + "step": 17799 + }, + { + "epoch": 2.5134142897486584, + "grad_norm": 2.7906831206007308, + "learning_rate": 1.3483974500111907e-06, + "loss": 0.1497, + "step": 17800 + }, + { + "epoch": 2.5135554927986443, + "grad_norm": 2.904687114161842, + "learning_rate": 1.3476330442511476e-06, + "loss": 0.1431, + "step": 17801 + }, + { + "epoch": 2.51369669584863, + "grad_norm": 3.3867467954904678, + "learning_rate": 1.3468688395683783e-06, + "loss": 0.1601, + "step": 17802 + }, + { + "epoch": 2.513837898898616, + "grad_norm": 2.7875826630949203, + "learning_rate": 1.3461048359806384e-06, + "loss": 0.1174, + "step": 17803 + }, + { + "epoch": 2.513979101948602, + "grad_norm": 3.306422385859353, + "learning_rate": 1.3453410335056837e-06, + "loss": 0.1584, + "step": 17804 + }, + { + "epoch": 2.514120304998588, + "grad_norm": 2.689681870730258, + "learning_rate": 1.3445774321612637e-06, + "loss": 0.1318, + "step": 17805 + }, + { + "epoch": 2.5142615080485737, + "grad_norm": 2.691615647805514, + "learning_rate": 1.3438140319651283e-06, + "loss": 0.161, + "step": 17806 + }, + { + "epoch": 2.5144027110985596, + "grad_norm": 3.1359956681233316, + "learning_rate": 1.3430508329350166e-06, + "loss": 0.135, + "step": 17807 + }, + { + "epoch": 2.5145439141485455, + "grad_norm": 3.020221122671316, + "learning_rate": 1.3422878350886658e-06, + "loss": 0.1586, + "step": 17808 + }, + { + "epoch": 2.5146851171985314, + "grad_norm": 3.883703848195002, + "learning_rate": 1.341525038443806e-06, + "loss": 0.1527, + "step": 17809 + }, + { + "epoch": 2.5148263202485173, + "grad_norm": 3.5719676440572763, + "learning_rate": 1.3407624430181644e-06, + "loss": 0.1609, + "step": 17810 + }, + { + "epoch": 2.514967523298503, + "grad_norm": 3.5975357001848316, + "learning_rate": 1.3400000488294651e-06, + "loss": 0.2061, + "step": 17811 + }, + { + "epoch": 2.515108726348489, + "grad_norm": 4.417238649831551, + "learning_rate": 1.3392378558954233e-06, + "loss": 0.1952, + "step": 17812 + }, + { + "epoch": 2.515249929398475, + "grad_norm": 3.3287719425128612, + "learning_rate": 1.3384758642337547e-06, + "loss": 0.1723, + "step": 17813 + }, + { + "epoch": 2.515391132448461, + "grad_norm": 2.5588965650273945, + "learning_rate": 1.3377140738621663e-06, + "loss": 0.1251, + "step": 17814 + }, + { + "epoch": 2.5155323354984467, + "grad_norm": 2.3893668428827897, + "learning_rate": 1.3369524847983617e-06, + "loss": 0.1166, + "step": 17815 + }, + { + "epoch": 2.5156735385484326, + "grad_norm": 3.0073237600303133, + "learning_rate": 1.33619109706004e-06, + "loss": 0.1258, + "step": 17816 + }, + { + "epoch": 2.5158147415984184, + "grad_norm": 3.8270897914115145, + "learning_rate": 1.3354299106648927e-06, + "loss": 0.1705, + "step": 17817 + }, + { + "epoch": 2.5159559446484043, + "grad_norm": 2.865850416565983, + "learning_rate": 1.3346689256306155e-06, + "loss": 0.1218, + "step": 17818 + }, + { + "epoch": 2.51609714769839, + "grad_norm": 3.043978326866323, + "learning_rate": 1.3339081419748922e-06, + "loss": 0.1358, + "step": 17819 + }, + { + "epoch": 2.516238350748376, + "grad_norm": 3.1790148414003787, + "learning_rate": 1.3331475597153988e-06, + "loss": 0.1516, + "step": 17820 + }, + { + "epoch": 2.516379553798362, + "grad_norm": 3.159448421592378, + "learning_rate": 1.3323871788698129e-06, + "loss": 0.1603, + "step": 17821 + }, + { + "epoch": 2.516520756848348, + "grad_norm": 3.3134719346330654, + "learning_rate": 1.331626999455804e-06, + "loss": 0.117, + "step": 17822 + }, + { + "epoch": 2.5166619598983337, + "grad_norm": 2.9465247194714013, + "learning_rate": 1.3308670214910413e-06, + "loss": 0.1309, + "step": 17823 + }, + { + "epoch": 2.5168031629483196, + "grad_norm": 3.5150496964863995, + "learning_rate": 1.3301072449931862e-06, + "loss": 0.1269, + "step": 17824 + }, + { + "epoch": 2.5169443659983055, + "grad_norm": 3.090989502713918, + "learning_rate": 1.3293476699798936e-06, + "loss": 0.1529, + "step": 17825 + }, + { + "epoch": 2.5170855690482914, + "grad_norm": 2.8104367727431416, + "learning_rate": 1.3285882964688168e-06, + "loss": 0.135, + "step": 17826 + }, + { + "epoch": 2.5172267720982773, + "grad_norm": 2.3917480967527753, + "learning_rate": 1.3278291244776042e-06, + "loss": 0.109, + "step": 17827 + }, + { + "epoch": 2.517367975148263, + "grad_norm": 3.590218447312786, + "learning_rate": 1.3270701540238962e-06, + "loss": 0.1638, + "step": 17828 + }, + { + "epoch": 2.517509178198249, + "grad_norm": 2.898975789987712, + "learning_rate": 1.326311385125333e-06, + "loss": 0.1576, + "step": 17829 + }, + { + "epoch": 2.517650381248235, + "grad_norm": 3.3151279428839793, + "learning_rate": 1.325552817799547e-06, + "loss": 0.1703, + "step": 17830 + }, + { + "epoch": 2.517791584298221, + "grad_norm": 3.170275309793015, + "learning_rate": 1.3247944520641676e-06, + "loss": 0.1431, + "step": 17831 + }, + { + "epoch": 2.5179327873482067, + "grad_norm": 2.872940723178655, + "learning_rate": 1.3240362879368184e-06, + "loss": 0.139, + "step": 17832 + }, + { + "epoch": 2.5180739903981926, + "grad_norm": 3.361711937741, + "learning_rate": 1.3232783254351189e-06, + "loss": 0.1424, + "step": 17833 + }, + { + "epoch": 2.5182151934481785, + "grad_norm": 2.980034823747822, + "learning_rate": 1.3225205645766815e-06, + "loss": 0.1347, + "step": 17834 + }, + { + "epoch": 2.5183563964981643, + "grad_norm": 3.774093300491614, + "learning_rate": 1.3217630053791209e-06, + "loss": 0.1905, + "step": 17835 + }, + { + "epoch": 2.5184975995481502, + "grad_norm": 3.095581223696831, + "learning_rate": 1.3210056478600431e-06, + "loss": 0.1447, + "step": 17836 + }, + { + "epoch": 2.518638802598136, + "grad_norm": 3.308932938084319, + "learning_rate": 1.3202484920370429e-06, + "loss": 0.1341, + "step": 17837 + }, + { + "epoch": 2.518780005648122, + "grad_norm": 3.2545220217658373, + "learning_rate": 1.3194915379277195e-06, + "loss": 0.1302, + "step": 17838 + }, + { + "epoch": 2.518921208698108, + "grad_norm": 3.164203567260968, + "learning_rate": 1.3187347855496624e-06, + "loss": 0.1399, + "step": 17839 + }, + { + "epoch": 2.5190624117480938, + "grad_norm": 3.0568965598678624, + "learning_rate": 1.3179782349204618e-06, + "loss": 0.1027, + "step": 17840 + }, + { + "epoch": 2.5192036147980796, + "grad_norm": 3.1524694720792077, + "learning_rate": 1.3172218860576968e-06, + "loss": 0.1781, + "step": 17841 + }, + { + "epoch": 2.5193448178480655, + "grad_norm": 2.218604617404965, + "learning_rate": 1.3164657389789459e-06, + "loss": 0.1225, + "step": 17842 + }, + { + "epoch": 2.5194860208980514, + "grad_norm": 3.654881000488937, + "learning_rate": 1.3157097937017804e-06, + "loss": 0.1631, + "step": 17843 + }, + { + "epoch": 2.5196272239480373, + "grad_norm": 2.7272340935657624, + "learning_rate": 1.314954050243772e-06, + "loss": 0.1181, + "step": 17844 + }, + { + "epoch": 2.519768426998023, + "grad_norm": 3.437392178145744, + "learning_rate": 1.3141985086224751e-06, + "loss": 0.1702, + "step": 17845 + }, + { + "epoch": 2.519909630048009, + "grad_norm": 2.4036414645486817, + "learning_rate": 1.3134431688554572e-06, + "loss": 0.1085, + "step": 17846 + }, + { + "epoch": 2.520050833097995, + "grad_norm": 3.776039234562146, + "learning_rate": 1.3126880309602674e-06, + "loss": 0.1906, + "step": 17847 + }, + { + "epoch": 2.520192036147981, + "grad_norm": 3.0893740787679684, + "learning_rate": 1.3119330949544573e-06, + "loss": 0.1337, + "step": 17848 + }, + { + "epoch": 2.5203332391979667, + "grad_norm": 3.579956223361852, + "learning_rate": 1.3111783608555695e-06, + "loss": 0.1684, + "step": 17849 + }, + { + "epoch": 2.5204744422479526, + "grad_norm": 2.8015465770148054, + "learning_rate": 1.3104238286811433e-06, + "loss": 0.1164, + "step": 17850 + }, + { + "epoch": 2.5206156452979385, + "grad_norm": 3.0556484374696096, + "learning_rate": 1.3096694984487134e-06, + "loss": 0.109, + "step": 17851 + }, + { + "epoch": 2.5207568483479244, + "grad_norm": 3.6848313578823038, + "learning_rate": 1.3089153701758128e-06, + "loss": 0.1515, + "step": 17852 + }, + { + "epoch": 2.5208980513979102, + "grad_norm": 3.5538462746564656, + "learning_rate": 1.3081614438799684e-06, + "loss": 0.1413, + "step": 17853 + }, + { + "epoch": 2.521039254447896, + "grad_norm": 2.613997912471486, + "learning_rate": 1.307407719578696e-06, + "loss": 0.1581, + "step": 17854 + }, + { + "epoch": 2.521180457497882, + "grad_norm": 3.1105543243159373, + "learning_rate": 1.306654197289514e-06, + "loss": 0.1515, + "step": 17855 + }, + { + "epoch": 2.521321660547868, + "grad_norm": 3.336654300983237, + "learning_rate": 1.305900877029932e-06, + "loss": 0.1492, + "step": 17856 + }, + { + "epoch": 2.5214628635978538, + "grad_norm": 3.310056258786906, + "learning_rate": 1.305147758817461e-06, + "loss": 0.1251, + "step": 17857 + }, + { + "epoch": 2.5216040666478396, + "grad_norm": 3.39931861375549, + "learning_rate": 1.3043948426696019e-06, + "loss": 0.1536, + "step": 17858 + }, + { + "epoch": 2.5217452696978255, + "grad_norm": 3.420519212846632, + "learning_rate": 1.3036421286038502e-06, + "loss": 0.1469, + "step": 17859 + }, + { + "epoch": 2.5218864727478114, + "grad_norm": 3.0318853896333993, + "learning_rate": 1.3028896166377003e-06, + "loss": 0.1259, + "step": 17860 + }, + { + "epoch": 2.5220276757977973, + "grad_norm": 2.8629541730566093, + "learning_rate": 1.3021373067886423e-06, + "loss": 0.1231, + "step": 17861 + }, + { + "epoch": 2.522168878847783, + "grad_norm": 4.126518572045825, + "learning_rate": 1.301385199074151e-06, + "loss": 0.1698, + "step": 17862 + }, + { + "epoch": 2.522310081897769, + "grad_norm": 2.9011912505895676, + "learning_rate": 1.3006332935117149e-06, + "loss": 0.1382, + "step": 17863 + }, + { + "epoch": 2.522451284947755, + "grad_norm": 2.755854095489973, + "learning_rate": 1.2998815901188033e-06, + "loss": 0.1472, + "step": 17864 + }, + { + "epoch": 2.522592487997741, + "grad_norm": 3.540766189193318, + "learning_rate": 1.2991300889128867e-06, + "loss": 0.1453, + "step": 17865 + }, + { + "epoch": 2.5227336910477267, + "grad_norm": 3.0412141638485504, + "learning_rate": 1.2983787899114286e-06, + "loss": 0.134, + "step": 17866 + }, + { + "epoch": 2.5228748940977126, + "grad_norm": 3.334270044690637, + "learning_rate": 1.2976276931318899e-06, + "loss": 0.1518, + "step": 17867 + }, + { + "epoch": 2.5230160971476985, + "grad_norm": 3.7472718444192927, + "learning_rate": 1.296876798591723e-06, + "loss": 0.148, + "step": 17868 + }, + { + "epoch": 2.5231573001976844, + "grad_norm": 2.504443856369491, + "learning_rate": 1.296126106308383e-06, + "loss": 0.0945, + "step": 17869 + }, + { + "epoch": 2.5232985032476702, + "grad_norm": 4.586699043844658, + "learning_rate": 1.2953756162993158e-06, + "loss": 0.1783, + "step": 17870 + }, + { + "epoch": 2.523439706297656, + "grad_norm": 3.1535144873287484, + "learning_rate": 1.2946253285819576e-06, + "loss": 0.1593, + "step": 17871 + }, + { + "epoch": 2.523580909347642, + "grad_norm": 2.5678730355108965, + "learning_rate": 1.2938752431737467e-06, + "loss": 0.133, + "step": 17872 + }, + { + "epoch": 2.523722112397628, + "grad_norm": 2.501959820838591, + "learning_rate": 1.2931253600921157e-06, + "loss": 0.1191, + "step": 17873 + }, + { + "epoch": 2.5238633154476138, + "grad_norm": 3.5755679798366873, + "learning_rate": 1.2923756793544895e-06, + "loss": 0.1532, + "step": 17874 + }, + { + "epoch": 2.5240045184975997, + "grad_norm": 4.333123912382137, + "learning_rate": 1.2916262009782932e-06, + "loss": 0.207, + "step": 17875 + }, + { + "epoch": 2.5241457215475855, + "grad_norm": 3.130525655914661, + "learning_rate": 1.290876924980944e-06, + "loss": 0.1239, + "step": 17876 + }, + { + "epoch": 2.5242869245975714, + "grad_norm": 2.9370417061050134, + "learning_rate": 1.2901278513798533e-06, + "loss": 0.1425, + "step": 17877 + }, + { + "epoch": 2.5244281276475573, + "grad_norm": 3.0114264811866565, + "learning_rate": 1.2893789801924328e-06, + "loss": 0.157, + "step": 17878 + }, + { + "epoch": 2.524569330697543, + "grad_norm": 3.074452252281101, + "learning_rate": 1.2886303114360777e-06, + "loss": 0.1223, + "step": 17879 + }, + { + "epoch": 2.524710533747529, + "grad_norm": 2.638178710052066, + "learning_rate": 1.2878818451281939e-06, + "loss": 0.1154, + "step": 17880 + }, + { + "epoch": 2.524851736797515, + "grad_norm": 3.324860966289068, + "learning_rate": 1.287133581286174e-06, + "loss": 0.1336, + "step": 17881 + }, + { + "epoch": 2.524992939847501, + "grad_norm": 3.8958962065012592, + "learning_rate": 1.2863855199274079e-06, + "loss": 0.1306, + "step": 17882 + }, + { + "epoch": 2.5251341428974867, + "grad_norm": 3.833048029144912, + "learning_rate": 1.2856376610692777e-06, + "loss": 0.1737, + "step": 17883 + }, + { + "epoch": 2.5252753459474726, + "grad_norm": 3.924621752457308, + "learning_rate": 1.2848900047291657e-06, + "loss": 0.1688, + "step": 17884 + }, + { + "epoch": 2.5254165489974585, + "grad_norm": 2.9610364217986698, + "learning_rate": 1.2841425509244453e-06, + "loss": 0.1307, + "step": 17885 + }, + { + "epoch": 2.5255577520474444, + "grad_norm": 3.1188489589227886, + "learning_rate": 1.2833952996724864e-06, + "loss": 0.1339, + "step": 17886 + }, + { + "epoch": 2.5256989550974303, + "grad_norm": 2.9292128683730243, + "learning_rate": 1.2826482509906613e-06, + "loss": 0.1261, + "step": 17887 + }, + { + "epoch": 2.525840158147416, + "grad_norm": 2.845658638002225, + "learning_rate": 1.281901404896323e-06, + "loss": 0.1251, + "step": 17888 + }, + { + "epoch": 2.525981361197402, + "grad_norm": 3.04497922687233, + "learning_rate": 1.281154761406831e-06, + "loss": 0.143, + "step": 17889 + }, + { + "epoch": 2.526122564247388, + "grad_norm": 3.214037483231397, + "learning_rate": 1.280408320539538e-06, + "loss": 0.1425, + "step": 17890 + }, + { + "epoch": 2.526263767297374, + "grad_norm": 2.908666696091374, + "learning_rate": 1.2796620823117866e-06, + "loss": 0.1489, + "step": 17891 + }, + { + "epoch": 2.5264049703473597, + "grad_norm": 4.105252374008186, + "learning_rate": 1.2789160467409244e-06, + "loss": 0.1695, + "step": 17892 + }, + { + "epoch": 2.526546173397345, + "grad_norm": 3.812597119509838, + "learning_rate": 1.2781702138442874e-06, + "loss": 0.1485, + "step": 17893 + }, + { + "epoch": 2.526687376447331, + "grad_norm": 3.307336179256453, + "learning_rate": 1.2774245836392085e-06, + "loss": 0.1569, + "step": 17894 + }, + { + "epoch": 2.526828579497317, + "grad_norm": 3.1161358727169888, + "learning_rate": 1.276679156143017e-06, + "loss": 0.1286, + "step": 17895 + }, + { + "epoch": 2.5269697825473028, + "grad_norm": 3.2250704341595804, + "learning_rate": 1.2759339313730302e-06, + "loss": 0.1671, + "step": 17896 + }, + { + "epoch": 2.5271109855972886, + "grad_norm": 2.726062067820866, + "learning_rate": 1.275188909346573e-06, + "loss": 0.1082, + "step": 17897 + }, + { + "epoch": 2.5272521886472745, + "grad_norm": 2.90972061794221, + "learning_rate": 1.2744440900809584e-06, + "loss": 0.1507, + "step": 17898 + }, + { + "epoch": 2.5273933916972604, + "grad_norm": 2.4721872594343988, + "learning_rate": 1.2736994735934949e-06, + "loss": 0.0995, + "step": 17899 + }, + { + "epoch": 2.5275345947472463, + "grad_norm": 2.827791874576526, + "learning_rate": 1.2729550599014862e-06, + "loss": 0.1883, + "step": 17900 + }, + { + "epoch": 2.527675797797232, + "grad_norm": 4.156221819471408, + "learning_rate": 1.272210849022234e-06, + "loss": 0.161, + "step": 17901 + }, + { + "epoch": 2.527817000847218, + "grad_norm": 3.38930211263252, + "learning_rate": 1.2714668409730312e-06, + "loss": 0.1453, + "step": 17902 + }, + { + "epoch": 2.527958203897204, + "grad_norm": 3.5936047884381765, + "learning_rate": 1.2707230357711686e-06, + "loss": 0.1566, + "step": 17903 + }, + { + "epoch": 2.52809940694719, + "grad_norm": 3.4384232930137233, + "learning_rate": 1.2699794334339356e-06, + "loss": 0.1282, + "step": 17904 + }, + { + "epoch": 2.5282406099971757, + "grad_norm": 3.0540859944531817, + "learning_rate": 1.2692360339786092e-06, + "loss": 0.1287, + "step": 17905 + }, + { + "epoch": 2.5283818130471616, + "grad_norm": 2.9732653266311564, + "learning_rate": 1.268492837422467e-06, + "loss": 0.1593, + "step": 17906 + }, + { + "epoch": 2.5285230160971475, + "grad_norm": 3.0226697379079894, + "learning_rate": 1.2677498437827796e-06, + "loss": 0.1305, + "step": 17907 + }, + { + "epoch": 2.5286642191471334, + "grad_norm": 3.325004731993773, + "learning_rate": 1.2670070530768131e-06, + "loss": 0.1619, + "step": 17908 + }, + { + "epoch": 2.5288054221971192, + "grad_norm": 3.657026739442091, + "learning_rate": 1.2662644653218336e-06, + "loss": 0.1876, + "step": 17909 + }, + { + "epoch": 2.528946625247105, + "grad_norm": 3.081597299036925, + "learning_rate": 1.2655220805350953e-06, + "loss": 0.1153, + "step": 17910 + }, + { + "epoch": 2.529087828297091, + "grad_norm": 3.171658293877184, + "learning_rate": 1.2647798987338523e-06, + "loss": 0.1323, + "step": 17911 + }, + { + "epoch": 2.529229031347077, + "grad_norm": 3.0492145281410084, + "learning_rate": 1.2640379199353536e-06, + "loss": 0.1315, + "step": 17912 + }, + { + "epoch": 2.5293702343970628, + "grad_norm": 3.6350764348370914, + "learning_rate": 1.263296144156837e-06, + "loss": 0.1426, + "step": 17913 + }, + { + "epoch": 2.5295114374470486, + "grad_norm": 3.7049718902194035, + "learning_rate": 1.2625545714155474e-06, + "loss": 0.1582, + "step": 17914 + }, + { + "epoch": 2.5296526404970345, + "grad_norm": 3.0134031011172833, + "learning_rate": 1.2618132017287154e-06, + "loss": 0.1212, + "step": 17915 + }, + { + "epoch": 2.5297938435470204, + "grad_norm": 3.183295819096301, + "learning_rate": 1.2610720351135718e-06, + "loss": 0.1276, + "step": 17916 + }, + { + "epoch": 2.5299350465970063, + "grad_norm": 2.990624145301635, + "learning_rate": 1.2603310715873396e-06, + "loss": 0.1386, + "step": 17917 + }, + { + "epoch": 2.530076249646992, + "grad_norm": 3.2218574780251115, + "learning_rate": 1.259590311167238e-06, + "loss": 0.1351, + "step": 17918 + }, + { + "epoch": 2.530217452696978, + "grad_norm": 4.022017394200225, + "learning_rate": 1.2588497538704836e-06, + "loss": 0.1584, + "step": 17919 + }, + { + "epoch": 2.530358655746964, + "grad_norm": 3.574689234653763, + "learning_rate": 1.2581093997142846e-06, + "loss": 0.1882, + "step": 17920 + }, + { + "epoch": 2.53049985879695, + "grad_norm": 3.565516823029238, + "learning_rate": 1.2573692487158507e-06, + "loss": 0.1819, + "step": 17921 + }, + { + "epoch": 2.5306410618469357, + "grad_norm": 2.7373813086691934, + "learning_rate": 1.256629300892379e-06, + "loss": 0.1216, + "step": 17922 + }, + { + "epoch": 2.5307822648969216, + "grad_norm": 2.944383859194827, + "learning_rate": 1.2558895562610652e-06, + "loss": 0.1284, + "step": 17923 + }, + { + "epoch": 2.5309234679469075, + "grad_norm": 3.337290051686374, + "learning_rate": 1.2551500148391026e-06, + "loss": 0.1565, + "step": 17924 + }, + { + "epoch": 2.5310646709968934, + "grad_norm": 3.4279352818867084, + "learning_rate": 1.2544106766436747e-06, + "loss": 0.1437, + "step": 17925 + }, + { + "epoch": 2.5312058740468792, + "grad_norm": 3.3104953242678987, + "learning_rate": 1.2536715416919676e-06, + "loss": 0.1623, + "step": 17926 + }, + { + "epoch": 2.531347077096865, + "grad_norm": 3.2516262132200473, + "learning_rate": 1.2529326100011575e-06, + "loss": 0.1588, + "step": 17927 + }, + { + "epoch": 2.531488280146851, + "grad_norm": 3.2735550571650567, + "learning_rate": 1.252193881588415e-06, + "loss": 0.1121, + "step": 17928 + }, + { + "epoch": 2.531629483196837, + "grad_norm": 2.9386585500832765, + "learning_rate": 1.2514553564709108e-06, + "loss": 0.1513, + "step": 17929 + }, + { + "epoch": 2.5317706862468228, + "grad_norm": 3.8575094432437766, + "learning_rate": 1.2507170346658027e-06, + "loss": 0.1628, + "step": 17930 + }, + { + "epoch": 2.5319118892968087, + "grad_norm": 3.68098040739097, + "learning_rate": 1.2499789161902532e-06, + "loss": 0.2038, + "step": 17931 + }, + { + "epoch": 2.5320530923467945, + "grad_norm": 3.916451549711646, + "learning_rate": 1.2492410010614154e-06, + "loss": 0.2006, + "step": 17932 + }, + { + "epoch": 2.5321942953967804, + "grad_norm": 4.213964961682608, + "learning_rate": 1.2485032892964378e-06, + "loss": 0.2163, + "step": 17933 + }, + { + "epoch": 2.5323354984467663, + "grad_norm": 3.3735887402905838, + "learning_rate": 1.2477657809124632e-06, + "loss": 0.145, + "step": 17934 + }, + { + "epoch": 2.532476701496752, + "grad_norm": 3.001783780144544, + "learning_rate": 1.2470284759266339e-06, + "loss": 0.127, + "step": 17935 + }, + { + "epoch": 2.532617904546738, + "grad_norm": 3.329448586744464, + "learning_rate": 1.246291374356081e-06, + "loss": 0.1341, + "step": 17936 + }, + { + "epoch": 2.532759107596724, + "grad_norm": 3.2206152528201994, + "learning_rate": 1.245554476217935e-06, + "loss": 0.1413, + "step": 17937 + }, + { + "epoch": 2.53290031064671, + "grad_norm": 3.7621965677205145, + "learning_rate": 1.244817781529326e-06, + "loss": 0.1651, + "step": 17938 + }, + { + "epoch": 2.5330415136966957, + "grad_norm": 2.8096233127634243, + "learning_rate": 1.2440812903073685e-06, + "loss": 0.1262, + "step": 17939 + }, + { + "epoch": 2.5331827167466816, + "grad_norm": 3.7880928687248105, + "learning_rate": 1.2433450025691807e-06, + "loss": 0.201, + "step": 17940 + }, + { + "epoch": 2.5333239197966675, + "grad_norm": 3.512445285956427, + "learning_rate": 1.2426089183318736e-06, + "loss": 0.1564, + "step": 17941 + }, + { + "epoch": 2.5334651228466534, + "grad_norm": 3.832429262458635, + "learning_rate": 1.24187303761255e-06, + "loss": 0.2013, + "step": 17942 + }, + { + "epoch": 2.5336063258966393, + "grad_norm": 3.1646061846412508, + "learning_rate": 1.2411373604283173e-06, + "loss": 0.1699, + "step": 17943 + }, + { + "epoch": 2.533747528946625, + "grad_norm": 3.5536167828080103, + "learning_rate": 1.2404018867962697e-06, + "loss": 0.1769, + "step": 17944 + }, + { + "epoch": 2.533888731996611, + "grad_norm": 3.6389528998608354, + "learning_rate": 1.2396666167335002e-06, + "loss": 0.2009, + "step": 17945 + }, + { + "epoch": 2.534029935046597, + "grad_norm": 2.943832723543698, + "learning_rate": 1.2389315502570965e-06, + "loss": 0.1168, + "step": 17946 + }, + { + "epoch": 2.534171138096583, + "grad_norm": 3.507909220029245, + "learning_rate": 1.2381966873841377e-06, + "loss": 0.1412, + "step": 17947 + }, + { + "epoch": 2.5343123411465687, + "grad_norm": 2.9891693906352956, + "learning_rate": 1.2374620281317019e-06, + "loss": 0.1749, + "step": 17948 + }, + { + "epoch": 2.5344535441965546, + "grad_norm": 3.30558816419717, + "learning_rate": 1.236727572516867e-06, + "loss": 0.1495, + "step": 17949 + }, + { + "epoch": 2.5345947472465404, + "grad_norm": 2.4361804430501803, + "learning_rate": 1.2359933205566987e-06, + "loss": 0.1002, + "step": 17950 + }, + { + "epoch": 2.5347359502965263, + "grad_norm": 3.14070621259883, + "learning_rate": 1.23525927226826e-06, + "loss": 0.1449, + "step": 17951 + }, + { + "epoch": 2.534877153346512, + "grad_norm": 3.4048775836978646, + "learning_rate": 1.2345254276686114e-06, + "loss": 0.1873, + "step": 17952 + }, + { + "epoch": 2.535018356396498, + "grad_norm": 2.872671392855132, + "learning_rate": 1.233791786774805e-06, + "loss": 0.1309, + "step": 17953 + }, + { + "epoch": 2.535159559446484, + "grad_norm": 2.453820147212221, + "learning_rate": 1.2330583496038929e-06, + "loss": 0.1124, + "step": 17954 + }, + { + "epoch": 2.53530076249647, + "grad_norm": 3.299496187608401, + "learning_rate": 1.232325116172919e-06, + "loss": 0.1328, + "step": 17955 + }, + { + "epoch": 2.5354419655464557, + "grad_norm": 2.8010480907765274, + "learning_rate": 1.2315920864989218e-06, + "loss": 0.1413, + "step": 17956 + }, + { + "epoch": 2.5355831685964416, + "grad_norm": 3.3913207111764856, + "learning_rate": 1.2308592605989378e-06, + "loss": 0.1528, + "step": 17957 + }, + { + "epoch": 2.5357243716464275, + "grad_norm": 4.269614184310873, + "learning_rate": 1.230126638489998e-06, + "loss": 0.164, + "step": 17958 + }, + { + "epoch": 2.5358655746964134, + "grad_norm": 3.1456667558458546, + "learning_rate": 1.2293942201891275e-06, + "loss": 0.1417, + "step": 17959 + }, + { + "epoch": 2.5360067777463993, + "grad_norm": 2.616921534103586, + "learning_rate": 1.2286620057133459e-06, + "loss": 0.1277, + "step": 17960 + }, + { + "epoch": 2.536147980796385, + "grad_norm": 3.5435002934719284, + "learning_rate": 1.227929995079673e-06, + "loss": 0.1559, + "step": 17961 + }, + { + "epoch": 2.536289183846371, + "grad_norm": 2.499789768934979, + "learning_rate": 1.2271981883051187e-06, + "loss": 0.1076, + "step": 17962 + }, + { + "epoch": 2.536430386896357, + "grad_norm": 2.8052997226549268, + "learning_rate": 1.2264665854066915e-06, + "loss": 0.1169, + "step": 17963 + }, + { + "epoch": 2.536571589946343, + "grad_norm": 3.467757325058574, + "learning_rate": 1.22573518640139e-06, + "loss": 0.1349, + "step": 17964 + }, + { + "epoch": 2.5367127929963287, + "grad_norm": 3.149765665791565, + "learning_rate": 1.2250039913062118e-06, + "loss": 0.1581, + "step": 17965 + }, + { + "epoch": 2.5368539960463146, + "grad_norm": 3.2217024430721994, + "learning_rate": 1.2242730001381532e-06, + "loss": 0.1526, + "step": 17966 + }, + { + "epoch": 2.5369951990963004, + "grad_norm": 2.6459250832005488, + "learning_rate": 1.2235422129141993e-06, + "loss": 0.1272, + "step": 17967 + }, + { + "epoch": 2.5371364021462863, + "grad_norm": 3.563561349237218, + "learning_rate": 1.2228116296513348e-06, + "loss": 0.1554, + "step": 17968 + }, + { + "epoch": 2.537277605196272, + "grad_norm": 2.8949328197456135, + "learning_rate": 1.2220812503665369e-06, + "loss": 0.1592, + "step": 17969 + }, + { + "epoch": 2.537418808246258, + "grad_norm": 4.090879771299867, + "learning_rate": 1.221351075076781e-06, + "loss": 0.2142, + "step": 17970 + }, + { + "epoch": 2.537560011296244, + "grad_norm": 3.3000035737741467, + "learning_rate": 1.2206211037990346e-06, + "loss": 0.159, + "step": 17971 + }, + { + "epoch": 2.53770121434623, + "grad_norm": 3.28890129834144, + "learning_rate": 1.2198913365502606e-06, + "loss": 0.1574, + "step": 17972 + }, + { + "epoch": 2.5378424173962157, + "grad_norm": 3.6398646237433656, + "learning_rate": 1.2191617733474214e-06, + "loss": 0.1599, + "step": 17973 + }, + { + "epoch": 2.5379836204462016, + "grad_norm": 3.6789510764510833, + "learning_rate": 1.218432414207471e-06, + "loss": 0.1407, + "step": 17974 + }, + { + "epoch": 2.5381248234961875, + "grad_norm": 2.397577175634883, + "learning_rate": 1.2177032591473582e-06, + "loss": 0.1011, + "step": 17975 + }, + { + "epoch": 2.5382660265461734, + "grad_norm": 3.037995309263503, + "learning_rate": 1.216974308184029e-06, + "loss": 0.1209, + "step": 17976 + }, + { + "epoch": 2.5384072295961593, + "grad_norm": 3.3815067449837555, + "learning_rate": 1.2162455613344214e-06, + "loss": 0.1664, + "step": 17977 + }, + { + "epoch": 2.538548432646145, + "grad_norm": 3.726942759271576, + "learning_rate": 1.2155170186154753e-06, + "loss": 0.1893, + "step": 17978 + }, + { + "epoch": 2.538689635696131, + "grad_norm": 2.941641481621435, + "learning_rate": 1.2147886800441211e-06, + "loss": 0.1424, + "step": 17979 + }, + { + "epoch": 2.538830838746117, + "grad_norm": 3.6561841344733628, + "learning_rate": 1.2140605456372856e-06, + "loss": 0.1865, + "step": 17980 + }, + { + "epoch": 2.538972041796103, + "grad_norm": 3.7497889684476595, + "learning_rate": 1.2133326154118862e-06, + "loss": 0.1693, + "step": 17981 + }, + { + "epoch": 2.5391132448460887, + "grad_norm": 2.559632452741716, + "learning_rate": 1.2126048893848396e-06, + "loss": 0.1226, + "step": 17982 + }, + { + "epoch": 2.5392544478960746, + "grad_norm": 3.674432623228928, + "learning_rate": 1.2118773675730633e-06, + "loss": 0.18, + "step": 17983 + }, + { + "epoch": 2.5393956509460605, + "grad_norm": 2.69022657877782, + "learning_rate": 1.2111500499934613e-06, + "loss": 0.1137, + "step": 17984 + }, + { + "epoch": 2.5395368539960463, + "grad_norm": 2.2270626533320605, + "learning_rate": 1.2104229366629372e-06, + "loss": 0.1012, + "step": 17985 + }, + { + "epoch": 2.5396780570460322, + "grad_norm": 3.1877022658913474, + "learning_rate": 1.2096960275983872e-06, + "loss": 0.1197, + "step": 17986 + }, + { + "epoch": 2.539819260096018, + "grad_norm": 3.2463164643972675, + "learning_rate": 1.2089693228167054e-06, + "loss": 0.1323, + "step": 17987 + }, + { + "epoch": 2.539960463146004, + "grad_norm": 3.298669733793099, + "learning_rate": 1.208242822334781e-06, + "loss": 0.1598, + "step": 17988 + }, + { + "epoch": 2.54010166619599, + "grad_norm": 2.8560693585959607, + "learning_rate": 1.2075165261694954e-06, + "loss": 0.1217, + "step": 17989 + }, + { + "epoch": 2.5402428692459758, + "grad_norm": 3.8388646052403694, + "learning_rate": 1.206790434337729e-06, + "loss": 0.1884, + "step": 17990 + }, + { + "epoch": 2.5403840722959616, + "grad_norm": 3.1679835728061323, + "learning_rate": 1.206064546856356e-06, + "loss": 0.1321, + "step": 17991 + }, + { + "epoch": 2.5405252753459475, + "grad_norm": 2.9902272323883987, + "learning_rate": 1.2053388637422437e-06, + "loss": 0.1377, + "step": 17992 + }, + { + "epoch": 2.5406664783959334, + "grad_norm": 3.245041660848904, + "learning_rate": 1.2046133850122587e-06, + "loss": 0.1376, + "step": 17993 + }, + { + "epoch": 2.5408076814459193, + "grad_norm": 2.630845905352137, + "learning_rate": 1.203888110683259e-06, + "loss": 0.1008, + "step": 17994 + }, + { + "epoch": 2.540948884495905, + "grad_norm": 2.65815234242837, + "learning_rate": 1.2031630407721018e-06, + "loss": 0.1148, + "step": 17995 + }, + { + "epoch": 2.541090087545891, + "grad_norm": 2.710279901800701, + "learning_rate": 1.2024381752956372e-06, + "loss": 0.1031, + "step": 17996 + }, + { + "epoch": 2.541231290595877, + "grad_norm": 2.942529364963399, + "learning_rate": 1.2017135142707115e-06, + "loss": 0.1428, + "step": 17997 + }, + { + "epoch": 2.541372493645863, + "grad_norm": 3.320646235592123, + "learning_rate": 1.2009890577141625e-06, + "loss": 0.1702, + "step": 17998 + }, + { + "epoch": 2.5415136966958487, + "grad_norm": 3.316460452522499, + "learning_rate": 1.2002648056428257e-06, + "loss": 0.1405, + "step": 17999 + }, + { + "epoch": 2.5416548997458346, + "grad_norm": 3.0692207464299903, + "learning_rate": 1.1995407580735364e-06, + "loss": 0.1505, + "step": 18000 + }, + { + "epoch": 2.5417961027958205, + "grad_norm": 2.6537113515097777, + "learning_rate": 1.1988169150231188e-06, + "loss": 0.1237, + "step": 18001 + }, + { + "epoch": 2.5419373058458063, + "grad_norm": 3.197743118005143, + "learning_rate": 1.1980932765083964e-06, + "loss": 0.1418, + "step": 18002 + }, + { + "epoch": 2.5420785088957922, + "grad_norm": 3.8095796529863266, + "learning_rate": 1.1973698425461832e-06, + "loss": 0.1572, + "step": 18003 + }, + { + "epoch": 2.542219711945778, + "grad_norm": 3.271490336134001, + "learning_rate": 1.196646613153295e-06, + "loss": 0.1694, + "step": 18004 + }, + { + "epoch": 2.542360914995764, + "grad_norm": 2.5480965960182638, + "learning_rate": 1.195923588346537e-06, + "loss": 0.1154, + "step": 18005 + }, + { + "epoch": 2.54250211804575, + "grad_norm": 3.7332051020000576, + "learning_rate": 1.1952007681427124e-06, + "loss": 0.1586, + "step": 18006 + }, + { + "epoch": 2.5426433210957358, + "grad_norm": 3.4506593120840336, + "learning_rate": 1.1944781525586192e-06, + "loss": 0.1775, + "step": 18007 + }, + { + "epoch": 2.5427845241457216, + "grad_norm": 3.262620531093886, + "learning_rate": 1.1937557416110512e-06, + "loss": 0.1101, + "step": 18008 + }, + { + "epoch": 2.5429257271957075, + "grad_norm": 4.028409640785629, + "learning_rate": 1.1930335353167965e-06, + "loss": 0.185, + "step": 18009 + }, + { + "epoch": 2.5430669302456934, + "grad_norm": 2.951010363710715, + "learning_rate": 1.1923115336926394e-06, + "loss": 0.1421, + "step": 18010 + }, + { + "epoch": 2.5432081332956793, + "grad_norm": 3.4403129332878293, + "learning_rate": 1.1915897367553564e-06, + "loss": 0.1095, + "step": 18011 + }, + { + "epoch": 2.543349336345665, + "grad_norm": 2.944385899245262, + "learning_rate": 1.1908681445217263e-06, + "loss": 0.1138, + "step": 18012 + }, + { + "epoch": 2.543490539395651, + "grad_norm": 3.98474718398513, + "learning_rate": 1.1901467570085156e-06, + "loss": 0.1889, + "step": 18013 + }, + { + "epoch": 2.543631742445637, + "grad_norm": 3.1336577949796247, + "learning_rate": 1.189425574232491e-06, + "loss": 0.1448, + "step": 18014 + }, + { + "epoch": 2.543772945495623, + "grad_norm": 2.770974564885659, + "learning_rate": 1.1887045962104105e-06, + "loss": 0.1266, + "step": 18015 + }, + { + "epoch": 2.5439141485456087, + "grad_norm": 2.612356569656299, + "learning_rate": 1.1879838229590269e-06, + "loss": 0.1161, + "step": 18016 + }, + { + "epoch": 2.5440553515955946, + "grad_norm": 3.075850860487638, + "learning_rate": 1.1872632544950958e-06, + "loss": 0.1369, + "step": 18017 + }, + { + "epoch": 2.5441965546455805, + "grad_norm": 3.5621999151482253, + "learning_rate": 1.1865428908353606e-06, + "loss": 0.1515, + "step": 18018 + }, + { + "epoch": 2.5443377576955664, + "grad_norm": 3.2412328872226506, + "learning_rate": 1.1858227319965621e-06, + "loss": 0.1478, + "step": 18019 + }, + { + "epoch": 2.5444789607455522, + "grad_norm": 3.4444295464598937, + "learning_rate": 1.1851027779954373e-06, + "loss": 0.1803, + "step": 18020 + }, + { + "epoch": 2.544620163795538, + "grad_norm": 3.1800538468958575, + "learning_rate": 1.1843830288487167e-06, + "loss": 0.1393, + "step": 18021 + }, + { + "epoch": 2.544761366845524, + "grad_norm": 3.0040951503758935, + "learning_rate": 1.1836634845731288e-06, + "loss": 0.1308, + "step": 18022 + }, + { + "epoch": 2.54490256989551, + "grad_norm": 2.816886911754463, + "learning_rate": 1.1829441451853919e-06, + "loss": 0.1236, + "step": 18023 + }, + { + "epoch": 2.5450437729454958, + "grad_norm": 3.302845677279997, + "learning_rate": 1.1822250107022271e-06, + "loss": 0.142, + "step": 18024 + }, + { + "epoch": 2.5451849759954817, + "grad_norm": 2.9720327913561806, + "learning_rate": 1.1815060811403434e-06, + "loss": 0.1201, + "step": 18025 + }, + { + "epoch": 2.5453261790454675, + "grad_norm": 3.8874735885430507, + "learning_rate": 1.1807873565164507e-06, + "loss": 0.1123, + "step": 18026 + }, + { + "epoch": 2.5454673820954534, + "grad_norm": 3.475900603093404, + "learning_rate": 1.1800688368472512e-06, + "loss": 0.1745, + "step": 18027 + }, + { + "epoch": 2.5456085851454393, + "grad_norm": 2.9107744500302757, + "learning_rate": 1.1793505221494405e-06, + "loss": 0.1559, + "step": 18028 + }, + { + "epoch": 2.545749788195425, + "grad_norm": 3.0786138703302313, + "learning_rate": 1.1786324124397165e-06, + "loss": 0.1468, + "step": 18029 + }, + { + "epoch": 2.545890991245411, + "grad_norm": 3.731138841321838, + "learning_rate": 1.1779145077347653e-06, + "loss": 0.1782, + "step": 18030 + }, + { + "epoch": 2.546032194295397, + "grad_norm": 2.830469795015861, + "learning_rate": 1.177196808051274e-06, + "loss": 0.1555, + "step": 18031 + }, + { + "epoch": 2.546173397345383, + "grad_norm": 3.6894928452401388, + "learning_rate": 1.176479313405916e-06, + "loss": 0.1474, + "step": 18032 + }, + { + "epoch": 2.5463146003953687, + "grad_norm": 3.390879953146881, + "learning_rate": 1.1757620238153656e-06, + "loss": 0.17, + "step": 18033 + }, + { + "epoch": 2.5464558034453546, + "grad_norm": 2.43978710311581, + "learning_rate": 1.175044939296297e-06, + "loss": 0.108, + "step": 18034 + }, + { + "epoch": 2.5465970064953405, + "grad_norm": 3.497768875509265, + "learning_rate": 1.174328059865374e-06, + "loss": 0.1674, + "step": 18035 + }, + { + "epoch": 2.5467382095453264, + "grad_norm": 5.042378530346302, + "learning_rate": 1.173611385539254e-06, + "loss": 0.2188, + "step": 18036 + }, + { + "epoch": 2.5468794125953123, + "grad_norm": 2.9308614221893112, + "learning_rate": 1.1728949163345937e-06, + "loss": 0.1526, + "step": 18037 + }, + { + "epoch": 2.547020615645298, + "grad_norm": 4.380124410677033, + "learning_rate": 1.1721786522680445e-06, + "loss": 0.1817, + "step": 18038 + }, + { + "epoch": 2.547161818695284, + "grad_norm": 3.980288169557924, + "learning_rate": 1.1714625933562507e-06, + "loss": 0.1795, + "step": 18039 + }, + { + "epoch": 2.54730302174527, + "grad_norm": 3.8329245587214715, + "learning_rate": 1.1707467396158524e-06, + "loss": 0.1841, + "step": 18040 + }, + { + "epoch": 2.547444224795256, + "grad_norm": 2.9347048692641127, + "learning_rate": 1.170031091063487e-06, + "loss": 0.1176, + "step": 18041 + }, + { + "epoch": 2.5475854278452417, + "grad_norm": 3.4046373850704796, + "learning_rate": 1.1693156477157863e-06, + "loss": 0.137, + "step": 18042 + }, + { + "epoch": 2.5477266308952276, + "grad_norm": 2.483233751326855, + "learning_rate": 1.1686004095893766e-06, + "loss": 0.1025, + "step": 18043 + }, + { + "epoch": 2.5478678339452134, + "grad_norm": 2.9655940304046453, + "learning_rate": 1.167885376700879e-06, + "loss": 0.1331, + "step": 18044 + }, + { + "epoch": 2.5480090369951993, + "grad_norm": 3.7713804727746787, + "learning_rate": 1.1671705490669082e-06, + "loss": 0.1365, + "step": 18045 + }, + { + "epoch": 2.548150240045185, + "grad_norm": 2.7548235605644606, + "learning_rate": 1.1664559267040821e-06, + "loss": 0.1377, + "step": 18046 + }, + { + "epoch": 2.548291443095171, + "grad_norm": 3.8604996499237973, + "learning_rate": 1.1657415096290058e-06, + "loss": 0.1694, + "step": 18047 + }, + { + "epoch": 2.548432646145157, + "grad_norm": 3.027151331129241, + "learning_rate": 1.1650272978582823e-06, + "loss": 0.1127, + "step": 18048 + }, + { + "epoch": 2.548573849195143, + "grad_norm": 3.106206959979416, + "learning_rate": 1.1643132914085075e-06, + "loss": 0.1136, + "step": 18049 + }, + { + "epoch": 2.5487150522451287, + "grad_norm": 4.167226492324366, + "learning_rate": 1.1635994902962767e-06, + "loss": 0.167, + "step": 18050 + }, + { + "epoch": 2.5488562552951146, + "grad_norm": 2.98155320226957, + "learning_rate": 1.1628858945381738e-06, + "loss": 0.1364, + "step": 18051 + }, + { + "epoch": 2.5489974583451005, + "grad_norm": 3.3069314638588017, + "learning_rate": 1.1621725041507904e-06, + "loss": 0.1563, + "step": 18052 + }, + { + "epoch": 2.5491386613950864, + "grad_norm": 3.0351683589254264, + "learning_rate": 1.1614593191506996e-06, + "loss": 0.1395, + "step": 18053 + }, + { + "epoch": 2.5492798644450723, + "grad_norm": 3.8310010464328776, + "learning_rate": 1.1607463395544782e-06, + "loss": 0.1648, + "step": 18054 + }, + { + "epoch": 2.549421067495058, + "grad_norm": 2.7225886091780804, + "learning_rate": 1.1600335653786932e-06, + "loss": 0.1329, + "step": 18055 + }, + { + "epoch": 2.549562270545044, + "grad_norm": 2.461510485585992, + "learning_rate": 1.159320996639911e-06, + "loss": 0.0946, + "step": 18056 + }, + { + "epoch": 2.54970347359503, + "grad_norm": 3.35022717074725, + "learning_rate": 1.158608633354692e-06, + "loss": 0.1471, + "step": 18057 + }, + { + "epoch": 2.549844676645016, + "grad_norm": 3.6888856154658907, + "learning_rate": 1.1578964755395883e-06, + "loss": 0.138, + "step": 18058 + }, + { + "epoch": 2.5499858796950017, + "grad_norm": 2.8291703678835147, + "learning_rate": 1.1571845232111534e-06, + "loss": 0.1137, + "step": 18059 + }, + { + "epoch": 2.5501270827449876, + "grad_norm": 3.0398151577173684, + "learning_rate": 1.1564727763859306e-06, + "loss": 0.1108, + "step": 18060 + }, + { + "epoch": 2.5502682857949734, + "grad_norm": 2.5235305031587303, + "learning_rate": 1.1557612350804615e-06, + "loss": 0.1092, + "step": 18061 + }, + { + "epoch": 2.5504094888449593, + "grad_norm": 2.5665500716176926, + "learning_rate": 1.1550498993112812e-06, + "loss": 0.1363, + "step": 18062 + }, + { + "epoch": 2.5505506918949448, + "grad_norm": 3.1220160967744626, + "learning_rate": 1.1543387690949192e-06, + "loss": 0.1498, + "step": 18063 + }, + { + "epoch": 2.5506918949449306, + "grad_norm": 3.921495450402653, + "learning_rate": 1.1536278444479066e-06, + "loss": 0.1411, + "step": 18064 + }, + { + "epoch": 2.5508330979949165, + "grad_norm": 3.1476092801392084, + "learning_rate": 1.1529171253867643e-06, + "loss": 0.1689, + "step": 18065 + }, + { + "epoch": 2.5509743010449024, + "grad_norm": 3.6333385522977135, + "learning_rate": 1.1522066119280062e-06, + "loss": 0.1526, + "step": 18066 + }, + { + "epoch": 2.5511155040948883, + "grad_norm": 2.5591838453408333, + "learning_rate": 1.1514963040881444e-06, + "loss": 0.1155, + "step": 18067 + }, + { + "epoch": 2.551256707144874, + "grad_norm": 2.4611500114420806, + "learning_rate": 1.1507862018836846e-06, + "loss": 0.1122, + "step": 18068 + }, + { + "epoch": 2.55139791019486, + "grad_norm": 3.2327999350536327, + "learning_rate": 1.1500763053311347e-06, + "loss": 0.1427, + "step": 18069 + }, + { + "epoch": 2.551539113244846, + "grad_norm": 3.4988095596876376, + "learning_rate": 1.1493666144469894e-06, + "loss": 0.1486, + "step": 18070 + }, + { + "epoch": 2.551680316294832, + "grad_norm": 3.029366707035611, + "learning_rate": 1.1486571292477412e-06, + "loss": 0.1131, + "step": 18071 + }, + { + "epoch": 2.5518215193448177, + "grad_norm": 3.9587800218189613, + "learning_rate": 1.1479478497498796e-06, + "loss": 0.1634, + "step": 18072 + }, + { + "epoch": 2.5519627223948036, + "grad_norm": 2.838950276117492, + "learning_rate": 1.1472387759698855e-06, + "loss": 0.1408, + "step": 18073 + }, + { + "epoch": 2.5521039254447895, + "grad_norm": 2.832469843744986, + "learning_rate": 1.146529907924241e-06, + "loss": 0.1159, + "step": 18074 + }, + { + "epoch": 2.5522451284947754, + "grad_norm": 3.105750667829268, + "learning_rate": 1.145821245629416e-06, + "loss": 0.1525, + "step": 18075 + }, + { + "epoch": 2.5523863315447612, + "grad_norm": 3.782061526304485, + "learning_rate": 1.1451127891018832e-06, + "loss": 0.173, + "step": 18076 + }, + { + "epoch": 2.552527534594747, + "grad_norm": 3.2271584840012326, + "learning_rate": 1.1444045383581037e-06, + "loss": 0.1433, + "step": 18077 + }, + { + "epoch": 2.552668737644733, + "grad_norm": 2.835457888887627, + "learning_rate": 1.143696493414539e-06, + "loss": 0.1372, + "step": 18078 + }, + { + "epoch": 2.552809940694719, + "grad_norm": 2.6953410528419317, + "learning_rate": 1.1429886542876423e-06, + "loss": 0.1274, + "step": 18079 + }, + { + "epoch": 2.5529511437447048, + "grad_norm": 3.702785131878803, + "learning_rate": 1.1422810209938627e-06, + "loss": 0.1914, + "step": 18080 + }, + { + "epoch": 2.5530923467946907, + "grad_norm": 3.6474775356559195, + "learning_rate": 1.1415735935496497e-06, + "loss": 0.179, + "step": 18081 + }, + { + "epoch": 2.5532335498446765, + "grad_norm": 2.8929043996782804, + "learning_rate": 1.1408663719714418e-06, + "loss": 0.1279, + "step": 18082 + }, + { + "epoch": 2.5533747528946624, + "grad_norm": 3.3660289851576866, + "learning_rate": 1.1401593562756718e-06, + "loss": 0.1271, + "step": 18083 + }, + { + "epoch": 2.5535159559446483, + "grad_norm": 4.157580854387625, + "learning_rate": 1.1394525464787708e-06, + "loss": 0.2027, + "step": 18084 + }, + { + "epoch": 2.553657158994634, + "grad_norm": 3.327971017984178, + "learning_rate": 1.1387459425971659e-06, + "loss": 0.1506, + "step": 18085 + }, + { + "epoch": 2.55379836204462, + "grad_norm": 3.012474743731629, + "learning_rate": 1.138039544647279e-06, + "loss": 0.1481, + "step": 18086 + }, + { + "epoch": 2.553939565094606, + "grad_norm": 3.419300261358047, + "learning_rate": 1.1373333526455265e-06, + "loss": 0.19, + "step": 18087 + }, + { + "epoch": 2.554080768144592, + "grad_norm": 3.144514108389352, + "learning_rate": 1.1366273666083194e-06, + "loss": 0.1317, + "step": 18088 + }, + { + "epoch": 2.5542219711945777, + "grad_norm": 2.859776146905121, + "learning_rate": 1.1359215865520645e-06, + "loss": 0.1022, + "step": 18089 + }, + { + "epoch": 2.5543631742445636, + "grad_norm": 2.6622662921025553, + "learning_rate": 1.1352160124931644e-06, + "loss": 0.1109, + "step": 18090 + }, + { + "epoch": 2.5545043772945495, + "grad_norm": 3.0801916851106528, + "learning_rate": 1.1345106444480148e-06, + "loss": 0.1166, + "step": 18091 + }, + { + "epoch": 2.5546455803445354, + "grad_norm": 3.082024860517313, + "learning_rate": 1.1338054824330092e-06, + "loss": 0.1076, + "step": 18092 + }, + { + "epoch": 2.5547867833945213, + "grad_norm": 2.465112602366522, + "learning_rate": 1.1331005264645355e-06, + "loss": 0.1151, + "step": 18093 + }, + { + "epoch": 2.554927986444507, + "grad_norm": 3.08804543734582, + "learning_rate": 1.1323957765589766e-06, + "loss": 0.1236, + "step": 18094 + }, + { + "epoch": 2.555069189494493, + "grad_norm": 3.515058287089412, + "learning_rate": 1.13169123273271e-06, + "loss": 0.1632, + "step": 18095 + }, + { + "epoch": 2.555210392544479, + "grad_norm": 3.10288954747099, + "learning_rate": 1.1309868950021085e-06, + "loss": 0.1463, + "step": 18096 + }, + { + "epoch": 2.555351595594465, + "grad_norm": 3.040650413140555, + "learning_rate": 1.13028276338354e-06, + "loss": 0.1179, + "step": 18097 + }, + { + "epoch": 2.5554927986444507, + "grad_norm": 3.1141678092501315, + "learning_rate": 1.1295788378933713e-06, + "loss": 0.156, + "step": 18098 + }, + { + "epoch": 2.5556340016944366, + "grad_norm": 2.487107612767889, + "learning_rate": 1.1288751185479618e-06, + "loss": 0.1169, + "step": 18099 + }, + { + "epoch": 2.5557752047444224, + "grad_norm": 4.290609793135492, + "learning_rate": 1.1281716053636616e-06, + "loss": 0.1995, + "step": 18100 + }, + { + "epoch": 2.5559164077944083, + "grad_norm": 3.8539310608864423, + "learning_rate": 1.127468298356822e-06, + "loss": 0.1461, + "step": 18101 + }, + { + "epoch": 2.556057610844394, + "grad_norm": 3.48803233667669, + "learning_rate": 1.1267651975437844e-06, + "loss": 0.1318, + "step": 18102 + }, + { + "epoch": 2.55619881389438, + "grad_norm": 3.2696010754786697, + "learning_rate": 1.1260623029408945e-06, + "loss": 0.1471, + "step": 18103 + }, + { + "epoch": 2.556340016944366, + "grad_norm": 3.660739877020991, + "learning_rate": 1.125359614564483e-06, + "loss": 0.139, + "step": 18104 + }, + { + "epoch": 2.556481219994352, + "grad_norm": 3.8033224258300145, + "learning_rate": 1.124657132430883e-06, + "loss": 0.1855, + "step": 18105 + }, + { + "epoch": 2.5566224230443377, + "grad_norm": 3.5532140615639856, + "learning_rate": 1.1239548565564173e-06, + "loss": 0.1538, + "step": 18106 + }, + { + "epoch": 2.5567636260943236, + "grad_norm": 3.712564770397251, + "learning_rate": 1.1232527869574083e-06, + "loss": 0.1714, + "step": 18107 + }, + { + "epoch": 2.5569048291443095, + "grad_norm": 3.290367390283262, + "learning_rate": 1.1225509236501698e-06, + "loss": 0.1763, + "step": 18108 + }, + { + "epoch": 2.5570460321942954, + "grad_norm": 3.460779686341147, + "learning_rate": 1.1218492666510151e-06, + "loss": 0.1718, + "step": 18109 + }, + { + "epoch": 2.5571872352442813, + "grad_norm": 2.689111240164661, + "learning_rate": 1.121147815976248e-06, + "loss": 0.1237, + "step": 18110 + }, + { + "epoch": 2.557328438294267, + "grad_norm": 2.9138543998963446, + "learning_rate": 1.120446571642172e-06, + "loss": 0.1232, + "step": 18111 + }, + { + "epoch": 2.557469641344253, + "grad_norm": 3.26815718229328, + "learning_rate": 1.119745533665083e-06, + "loss": 0.1777, + "step": 18112 + }, + { + "epoch": 2.557610844394239, + "grad_norm": 3.2549554086435664, + "learning_rate": 1.1190447020612726e-06, + "loss": 0.1499, + "step": 18113 + }, + { + "epoch": 2.557752047444225, + "grad_norm": 3.3978601491214597, + "learning_rate": 1.1183440768470255e-06, + "loss": 0.1326, + "step": 18114 + }, + { + "epoch": 2.5578932504942107, + "grad_norm": 3.9562328731414356, + "learning_rate": 1.1176436580386307e-06, + "loss": 0.1592, + "step": 18115 + }, + { + "epoch": 2.5580344535441966, + "grad_norm": 3.2973267990483777, + "learning_rate": 1.1169434456523598e-06, + "loss": 0.1126, + "step": 18116 + }, + { + "epoch": 2.5581756565941824, + "grad_norm": 3.302925173769135, + "learning_rate": 1.1162434397044863e-06, + "loss": 0.1116, + "step": 18117 + }, + { + "epoch": 2.5583168596441683, + "grad_norm": 3.389054406538706, + "learning_rate": 1.1155436402112785e-06, + "loss": 0.1554, + "step": 18118 + }, + { + "epoch": 2.558458062694154, + "grad_norm": 2.73220642882601, + "learning_rate": 1.1148440471889977e-06, + "loss": 0.1529, + "step": 18119 + }, + { + "epoch": 2.55859926574414, + "grad_norm": 4.060580075112043, + "learning_rate": 1.1141446606539063e-06, + "loss": 0.1983, + "step": 18120 + }, + { + "epoch": 2.558740468794126, + "grad_norm": 3.0731511384582855, + "learning_rate": 1.113445480622255e-06, + "loss": 0.0988, + "step": 18121 + }, + { + "epoch": 2.558881671844112, + "grad_norm": 3.7965665954326107, + "learning_rate": 1.1127465071102938e-06, + "loss": 0.2159, + "step": 18122 + }, + { + "epoch": 2.5590228748940977, + "grad_norm": 3.315577842645405, + "learning_rate": 1.1120477401342656e-06, + "loss": 0.1648, + "step": 18123 + }, + { + "epoch": 2.5591640779440836, + "grad_norm": 3.023036913243173, + "learning_rate": 1.1113491797104093e-06, + "loss": 0.1275, + "step": 18124 + }, + { + "epoch": 2.5593052809940695, + "grad_norm": 3.3573236739514294, + "learning_rate": 1.1106508258549587e-06, + "loss": 0.1812, + "step": 18125 + }, + { + "epoch": 2.5594464840440554, + "grad_norm": 3.2516434271076387, + "learning_rate": 1.109952678584144e-06, + "loss": 0.172, + "step": 18126 + }, + { + "epoch": 2.5595876870940413, + "grad_norm": 3.4198705127120337, + "learning_rate": 1.1092547379141905e-06, + "loss": 0.1384, + "step": 18127 + }, + { + "epoch": 2.559728890144027, + "grad_norm": 2.902061681787575, + "learning_rate": 1.108557003861317e-06, + "loss": 0.153, + "step": 18128 + }, + { + "epoch": 2.559870093194013, + "grad_norm": 2.930432337116502, + "learning_rate": 1.1078594764417382e-06, + "loss": 0.1614, + "step": 18129 + }, + { + "epoch": 2.560011296243999, + "grad_norm": 3.552939862115775, + "learning_rate": 1.107162155671665e-06, + "loss": 0.171, + "step": 18130 + }, + { + "epoch": 2.560152499293985, + "grad_norm": 3.586560767200432, + "learning_rate": 1.1064650415673016e-06, + "loss": 0.1433, + "step": 18131 + }, + { + "epoch": 2.5602937023439707, + "grad_norm": 3.4286223640051556, + "learning_rate": 1.1057681341448533e-06, + "loss": 0.1497, + "step": 18132 + }, + { + "epoch": 2.5604349053939566, + "grad_norm": 2.5372202011605998, + "learning_rate": 1.1050714334205104e-06, + "loss": 0.0962, + "step": 18133 + }, + { + "epoch": 2.5605761084439425, + "grad_norm": 2.8853521794412216, + "learning_rate": 1.1043749394104665e-06, + "loss": 0.1395, + "step": 18134 + }, + { + "epoch": 2.5607173114939283, + "grad_norm": 3.9096006147146736, + "learning_rate": 1.1036786521309062e-06, + "loss": 0.1633, + "step": 18135 + }, + { + "epoch": 2.560858514543914, + "grad_norm": 3.054653504504503, + "learning_rate": 1.1029825715980115e-06, + "loss": 0.1364, + "step": 18136 + }, + { + "epoch": 2.5609997175939, + "grad_norm": 3.838996444428988, + "learning_rate": 1.10228669782796e-06, + "loss": 0.1586, + "step": 18137 + }, + { + "epoch": 2.561140920643886, + "grad_norm": 3.135102087103943, + "learning_rate": 1.1015910308369239e-06, + "loss": 0.1104, + "step": 18138 + }, + { + "epoch": 2.561282123693872, + "grad_norm": 3.2179668168799265, + "learning_rate": 1.1008955706410696e-06, + "loss": 0.1598, + "step": 18139 + }, + { + "epoch": 2.5614233267438578, + "grad_norm": 3.271310524133325, + "learning_rate": 1.1002003172565579e-06, + "loss": 0.1493, + "step": 18140 + }, + { + "epoch": 2.5615645297938436, + "grad_norm": 3.5599349137492755, + "learning_rate": 1.0995052706995502e-06, + "loss": 0.2049, + "step": 18141 + }, + { + "epoch": 2.5617057328438295, + "grad_norm": 3.2979125750237177, + "learning_rate": 1.0988104309861913e-06, + "loss": 0.1632, + "step": 18142 + }, + { + "epoch": 2.5618469358938154, + "grad_norm": 3.633654913261387, + "learning_rate": 1.0981157981326374e-06, + "loss": 0.1673, + "step": 18143 + }, + { + "epoch": 2.5619881389438013, + "grad_norm": 3.5391782869550203, + "learning_rate": 1.0974213721550264e-06, + "loss": 0.2031, + "step": 18144 + }, + { + "epoch": 2.562129341993787, + "grad_norm": 3.0082461760370736, + "learning_rate": 1.096727153069499e-06, + "loss": 0.1312, + "step": 18145 + }, + { + "epoch": 2.562270545043773, + "grad_norm": 3.612543799885159, + "learning_rate": 1.0960331408921865e-06, + "loss": 0.1493, + "step": 18146 + }, + { + "epoch": 2.562411748093759, + "grad_norm": 3.8173948131604303, + "learning_rate": 1.0953393356392195e-06, + "loss": 0.173, + "step": 18147 + }, + { + "epoch": 2.562552951143745, + "grad_norm": 2.885934360680415, + "learning_rate": 1.0946457373267183e-06, + "loss": 0.1306, + "step": 18148 + }, + { + "epoch": 2.5626941541937307, + "grad_norm": 3.099886123455802, + "learning_rate": 1.0939523459708078e-06, + "loss": 0.1536, + "step": 18149 + }, + { + "epoch": 2.5628353572437166, + "grad_norm": 3.067752137872937, + "learning_rate": 1.0932591615875975e-06, + "loss": 0.1259, + "step": 18150 + }, + { + "epoch": 2.5629765602937025, + "grad_norm": 3.7695212656849257, + "learning_rate": 1.0925661841931966e-06, + "loss": 0.1488, + "step": 18151 + }, + { + "epoch": 2.5631177633436883, + "grad_norm": 3.573925175723294, + "learning_rate": 1.0918734138037113e-06, + "loss": 0.1671, + "step": 18152 + }, + { + "epoch": 2.5632589663936742, + "grad_norm": 3.4212301352630394, + "learning_rate": 1.0911808504352405e-06, + "loss": 0.1682, + "step": 18153 + }, + { + "epoch": 2.56340016944366, + "grad_norm": 3.6352337091832427, + "learning_rate": 1.0904884941038784e-06, + "loss": 0.1788, + "step": 18154 + }, + { + "epoch": 2.563541372493646, + "grad_norm": 3.778296393861006, + "learning_rate": 1.0897963448257165e-06, + "loss": 0.1815, + "step": 18155 + }, + { + "epoch": 2.563682575543632, + "grad_norm": 2.50133110295319, + "learning_rate": 1.0891044026168407e-06, + "loss": 0.1167, + "step": 18156 + }, + { + "epoch": 2.5638237785936178, + "grad_norm": 3.741980515475324, + "learning_rate": 1.0884126674933293e-06, + "loss": 0.1531, + "step": 18157 + }, + { + "epoch": 2.5639649816436036, + "grad_norm": 3.5704914876166125, + "learning_rate": 1.0877211394712617e-06, + "loss": 0.1635, + "step": 18158 + }, + { + "epoch": 2.5641061846935895, + "grad_norm": 3.240132676162482, + "learning_rate": 1.0870298185667016e-06, + "loss": 0.1503, + "step": 18159 + }, + { + "epoch": 2.5642473877435754, + "grad_norm": 3.7617660447284846, + "learning_rate": 1.086338704795722e-06, + "loss": 0.1453, + "step": 18160 + }, + { + "epoch": 2.5643885907935613, + "grad_norm": 3.7918414763313066, + "learning_rate": 1.0856477981743808e-06, + "loss": 0.2035, + "step": 18161 + }, + { + "epoch": 2.564529793843547, + "grad_norm": 3.3431136720726222, + "learning_rate": 1.0849570987187341e-06, + "loss": 0.1392, + "step": 18162 + }, + { + "epoch": 2.564670996893533, + "grad_norm": 3.220944775321384, + "learning_rate": 1.0842666064448347e-06, + "loss": 0.1839, + "step": 18163 + }, + { + "epoch": 2.564812199943519, + "grad_norm": 2.891574019879635, + "learning_rate": 1.08357632136873e-06, + "loss": 0.1248, + "step": 18164 + }, + { + "epoch": 2.5649534029935044, + "grad_norm": 2.4113915616055595, + "learning_rate": 1.0828862435064603e-06, + "loss": 0.1094, + "step": 18165 + }, + { + "epoch": 2.5650946060434903, + "grad_norm": 3.3791706142500963, + "learning_rate": 1.0821963728740626e-06, + "loss": 0.1252, + "step": 18166 + }, + { + "epoch": 2.565235809093476, + "grad_norm": 2.9113410783353912, + "learning_rate": 1.0815067094875708e-06, + "loss": 0.1431, + "step": 18167 + }, + { + "epoch": 2.565377012143462, + "grad_norm": 2.897003436337826, + "learning_rate": 1.0808172533630113e-06, + "loss": 0.1152, + "step": 18168 + }, + { + "epoch": 2.565518215193448, + "grad_norm": 3.042873770378299, + "learning_rate": 1.0801280045164063e-06, + "loss": 0.1659, + "step": 18169 + }, + { + "epoch": 2.565659418243434, + "grad_norm": 4.321938523503987, + "learning_rate": 1.0794389629637747e-06, + "loss": 0.1832, + "step": 18170 + }, + { + "epoch": 2.5658006212934197, + "grad_norm": 4.114005242173793, + "learning_rate": 1.0787501287211277e-06, + "loss": 0.2262, + "step": 18171 + }, + { + "epoch": 2.5659418243434056, + "grad_norm": 3.213989771688702, + "learning_rate": 1.078061501804476e-06, + "loss": 0.1383, + "step": 18172 + }, + { + "epoch": 2.5660830273933914, + "grad_norm": 3.0151628400762496, + "learning_rate": 1.0773730822298223e-06, + "loss": 0.1117, + "step": 18173 + }, + { + "epoch": 2.5662242304433773, + "grad_norm": 3.103031178639752, + "learning_rate": 1.076684870013165e-06, + "loss": 0.1308, + "step": 18174 + }, + { + "epoch": 2.566365433493363, + "grad_norm": 2.891695603615598, + "learning_rate": 1.0759968651704987e-06, + "loss": 0.1347, + "step": 18175 + }, + { + "epoch": 2.566506636543349, + "grad_norm": 2.465769411976776, + "learning_rate": 1.075309067717808e-06, + "loss": 0.1207, + "step": 18176 + }, + { + "epoch": 2.566647839593335, + "grad_norm": 2.7200952550984683, + "learning_rate": 1.0746214776710827e-06, + "loss": 0.1277, + "step": 18177 + }, + { + "epoch": 2.566789042643321, + "grad_norm": 3.5850253879045555, + "learning_rate": 1.0739340950462996e-06, + "loss": 0.1427, + "step": 18178 + }, + { + "epoch": 2.5669302456933067, + "grad_norm": 2.8567922025712362, + "learning_rate": 1.073246919859432e-06, + "loss": 0.1527, + "step": 18179 + }, + { + "epoch": 2.5670714487432926, + "grad_norm": 3.78069613439659, + "learning_rate": 1.0725599521264518e-06, + "loss": 0.1617, + "step": 18180 + }, + { + "epoch": 2.5672126517932785, + "grad_norm": 2.7094698462715585, + "learning_rate": 1.071873191863323e-06, + "loss": 0.1213, + "step": 18181 + }, + { + "epoch": 2.5673538548432644, + "grad_norm": 2.301144886142465, + "learning_rate": 1.071186639086005e-06, + "loss": 0.1044, + "step": 18182 + }, + { + "epoch": 2.5674950578932503, + "grad_norm": 2.87505874717895, + "learning_rate": 1.0705002938104537e-06, + "loss": 0.1384, + "step": 18183 + }, + { + "epoch": 2.567636260943236, + "grad_norm": 3.17409717148107, + "learning_rate": 1.0698141560526198e-06, + "loss": 0.143, + "step": 18184 + }, + { + "epoch": 2.567777463993222, + "grad_norm": 3.3648154801796095, + "learning_rate": 1.0691282258284474e-06, + "loss": 0.1661, + "step": 18185 + }, + { + "epoch": 2.567918667043208, + "grad_norm": 3.1753453082822976, + "learning_rate": 1.068442503153878e-06, + "loss": 0.1486, + "step": 18186 + }, + { + "epoch": 2.568059870093194, + "grad_norm": 2.932756331752433, + "learning_rate": 1.0677569880448479e-06, + "loss": 0.1173, + "step": 18187 + }, + { + "epoch": 2.5682010731431797, + "grad_norm": 2.972902090876545, + "learning_rate": 1.0670716805172865e-06, + "loss": 0.1361, + "step": 18188 + }, + { + "epoch": 2.5683422761931656, + "grad_norm": 3.6542442153204764, + "learning_rate": 1.066386580587122e-06, + "loss": 0.1798, + "step": 18189 + }, + { + "epoch": 2.5684834792431515, + "grad_norm": 3.3376484908692223, + "learning_rate": 1.0657016882702764e-06, + "loss": 0.1385, + "step": 18190 + }, + { + "epoch": 2.5686246822931373, + "grad_norm": 3.356504209906913, + "learning_rate": 1.0650170035826646e-06, + "loss": 0.1579, + "step": 18191 + }, + { + "epoch": 2.5687658853431232, + "grad_norm": 3.5502112551226648, + "learning_rate": 1.0643325265402016e-06, + "loss": 0.1495, + "step": 18192 + }, + { + "epoch": 2.568907088393109, + "grad_norm": 2.32659482971839, + "learning_rate": 1.063648257158787e-06, + "loss": 0.1217, + "step": 18193 + }, + { + "epoch": 2.569048291443095, + "grad_norm": 2.561751731971891, + "learning_rate": 1.062964195454329e-06, + "loss": 0.136, + "step": 18194 + }, + { + "epoch": 2.569189494493081, + "grad_norm": 3.660093123088902, + "learning_rate": 1.0622803414427252e-06, + "loss": 0.1608, + "step": 18195 + }, + { + "epoch": 2.5693306975430668, + "grad_norm": 3.398086444076299, + "learning_rate": 1.061596695139865e-06, + "loss": 0.163, + "step": 18196 + }, + { + "epoch": 2.5694719005930526, + "grad_norm": 2.635536701318488, + "learning_rate": 1.0609132565616376e-06, + "loss": 0.1373, + "step": 18197 + }, + { + "epoch": 2.5696131036430385, + "grad_norm": 3.305819570136027, + "learning_rate": 1.0602300257239262e-06, + "loss": 0.1376, + "step": 18198 + }, + { + "epoch": 2.5697543066930244, + "grad_norm": 3.808664689361435, + "learning_rate": 1.059547002642608e-06, + "loss": 0.157, + "step": 18199 + }, + { + "epoch": 2.5698955097430103, + "grad_norm": 2.789163044721818, + "learning_rate": 1.0588641873335558e-06, + "loss": 0.1113, + "step": 18200 + }, + { + "epoch": 2.570036712792996, + "grad_norm": 2.8943611642770968, + "learning_rate": 1.0581815798126393e-06, + "loss": 0.1257, + "step": 18201 + }, + { + "epoch": 2.570177915842982, + "grad_norm": 4.243559460351196, + "learning_rate": 1.0574991800957203e-06, + "loss": 0.1814, + "step": 18202 + }, + { + "epoch": 2.570319118892968, + "grad_norm": 3.3450133729402722, + "learning_rate": 1.0568169881986589e-06, + "loss": 0.1234, + "step": 18203 + }, + { + "epoch": 2.570460321942954, + "grad_norm": 3.318325888590931, + "learning_rate": 1.0561350041373086e-06, + "loss": 0.1206, + "step": 18204 + }, + { + "epoch": 2.5706015249929397, + "grad_norm": 3.009003797308653, + "learning_rate": 1.0554532279275154e-06, + "loss": 0.1551, + "step": 18205 + }, + { + "epoch": 2.5707427280429256, + "grad_norm": 3.6622518859616418, + "learning_rate": 1.0547716595851298e-06, + "loss": 0.1947, + "step": 18206 + }, + { + "epoch": 2.5708839310929115, + "grad_norm": 3.24804185062602, + "learning_rate": 1.054090299125986e-06, + "loss": 0.1591, + "step": 18207 + }, + { + "epoch": 2.5710251341428974, + "grad_norm": 2.922792275753425, + "learning_rate": 1.0534091465659212e-06, + "loss": 0.1415, + "step": 18208 + }, + { + "epoch": 2.5711663371928832, + "grad_norm": 3.6057591749240854, + "learning_rate": 1.0527282019207663e-06, + "loss": 0.1782, + "step": 18209 + }, + { + "epoch": 2.571307540242869, + "grad_norm": 3.177133369797866, + "learning_rate": 1.0520474652063395e-06, + "loss": 0.114, + "step": 18210 + }, + { + "epoch": 2.571448743292855, + "grad_norm": 3.1041901881829195, + "learning_rate": 1.0513669364384682e-06, + "loss": 0.1275, + "step": 18211 + }, + { + "epoch": 2.571589946342841, + "grad_norm": 3.86945972660611, + "learning_rate": 1.0506866156329632e-06, + "loss": 0.172, + "step": 18212 + }, + { + "epoch": 2.5717311493928268, + "grad_norm": 4.074950466853784, + "learning_rate": 1.0500065028056372e-06, + "loss": 0.1664, + "step": 18213 + }, + { + "epoch": 2.5718723524428126, + "grad_norm": 3.5395755949801235, + "learning_rate": 1.0493265979722944e-06, + "loss": 0.1751, + "step": 18214 + }, + { + "epoch": 2.5720135554927985, + "grad_norm": 2.6903462539930287, + "learning_rate": 1.0486469011487366e-06, + "loss": 0.1337, + "step": 18215 + }, + { + "epoch": 2.5721547585427844, + "grad_norm": 3.284920940289745, + "learning_rate": 1.0479674123507588e-06, + "loss": 0.1849, + "step": 18216 + }, + { + "epoch": 2.5722959615927703, + "grad_norm": 3.886629190673133, + "learning_rate": 1.0472881315941518e-06, + "loss": 0.1814, + "step": 18217 + }, + { + "epoch": 2.572437164642756, + "grad_norm": 2.5827770557910807, + "learning_rate": 1.046609058894703e-06, + "loss": 0.125, + "step": 18218 + }, + { + "epoch": 2.572578367692742, + "grad_norm": 3.6173817555605985, + "learning_rate": 1.045930194268192e-06, + "loss": 0.1546, + "step": 18219 + }, + { + "epoch": 2.572719570742728, + "grad_norm": 3.0574163647817723, + "learning_rate": 1.0452515377303974e-06, + "loss": 0.1631, + "step": 18220 + }, + { + "epoch": 2.572860773792714, + "grad_norm": 3.0764926182927557, + "learning_rate": 1.0445730892970896e-06, + "loss": 0.1415, + "step": 18221 + }, + { + "epoch": 2.5730019768426997, + "grad_norm": 3.288885087861002, + "learning_rate": 1.0438948489840327e-06, + "loss": 0.1498, + "step": 18222 + }, + { + "epoch": 2.5731431798926856, + "grad_norm": 4.700359879095835, + "learning_rate": 1.0432168168069946e-06, + "loss": 0.204, + "step": 18223 + }, + { + "epoch": 2.5732843829426715, + "grad_norm": 3.322002456412449, + "learning_rate": 1.0425389927817298e-06, + "loss": 0.1193, + "step": 18224 + }, + { + "epoch": 2.5734255859926574, + "grad_norm": 2.3577581401731997, + "learning_rate": 1.0418613769239893e-06, + "loss": 0.0907, + "step": 18225 + }, + { + "epoch": 2.5735667890426432, + "grad_norm": 3.1867619218355734, + "learning_rate": 1.0411839692495241e-06, + "loss": 0.1581, + "step": 18226 + }, + { + "epoch": 2.573707992092629, + "grad_norm": 3.6110460741866346, + "learning_rate": 1.0405067697740711e-06, + "loss": 0.1703, + "step": 18227 + }, + { + "epoch": 2.573849195142615, + "grad_norm": 3.579059029724683, + "learning_rate": 1.0398297785133727e-06, + "loss": 0.1853, + "step": 18228 + }, + { + "epoch": 2.573990398192601, + "grad_norm": 2.164622420654352, + "learning_rate": 1.0391529954831603e-06, + "loss": 0.098, + "step": 18229 + }, + { + "epoch": 2.5741316012425868, + "grad_norm": 2.343851012616829, + "learning_rate": 1.0384764206991638e-06, + "loss": 0.1146, + "step": 18230 + }, + { + "epoch": 2.5742728042925727, + "grad_norm": 3.488372132388196, + "learning_rate": 1.0378000541771038e-06, + "loss": 0.1765, + "step": 18231 + }, + { + "epoch": 2.5744140073425585, + "grad_norm": 3.224470441164652, + "learning_rate": 1.0371238959327001e-06, + "loss": 0.1626, + "step": 18232 + }, + { + "epoch": 2.5745552103925444, + "grad_norm": 3.5011964245965554, + "learning_rate": 1.0364479459816668e-06, + "loss": 0.1504, + "step": 18233 + }, + { + "epoch": 2.5746964134425303, + "grad_norm": 3.025529715933069, + "learning_rate": 1.0357722043397122e-06, + "loss": 0.1489, + "step": 18234 + }, + { + "epoch": 2.574837616492516, + "grad_norm": 4.153344796615868, + "learning_rate": 1.0350966710225408e-06, + "loss": 0.1798, + "step": 18235 + }, + { + "epoch": 2.574978819542502, + "grad_norm": 3.2941692259375777, + "learning_rate": 1.0344213460458496e-06, + "loss": 0.1527, + "step": 18236 + }, + { + "epoch": 2.575120022592488, + "grad_norm": 3.8903219656010717, + "learning_rate": 1.0337462294253353e-06, + "loss": 0.2063, + "step": 18237 + }, + { + "epoch": 2.575261225642474, + "grad_norm": 3.5045042108634528, + "learning_rate": 1.0330713211766864e-06, + "loss": 0.1631, + "step": 18238 + }, + { + "epoch": 2.5754024286924597, + "grad_norm": 2.8978463366662717, + "learning_rate": 1.0323966213155856e-06, + "loss": 0.1161, + "step": 18239 + }, + { + "epoch": 2.5755436317424456, + "grad_norm": 2.598559774526446, + "learning_rate": 1.0317221298577163e-06, + "loss": 0.1372, + "step": 18240 + }, + { + "epoch": 2.5756848347924315, + "grad_norm": 2.720845988033095, + "learning_rate": 1.0310478468187512e-06, + "loss": 0.1219, + "step": 18241 + }, + { + "epoch": 2.5758260378424174, + "grad_norm": 2.7778949173073957, + "learning_rate": 1.0303737722143614e-06, + "loss": 0.1134, + "step": 18242 + }, + { + "epoch": 2.5759672408924033, + "grad_norm": 3.478645413779483, + "learning_rate": 1.0296999060602132e-06, + "loss": 0.1278, + "step": 18243 + }, + { + "epoch": 2.576108443942389, + "grad_norm": 3.1789726678680976, + "learning_rate": 1.0290262483719637e-06, + "loss": 0.1227, + "step": 18244 + }, + { + "epoch": 2.576249646992375, + "grad_norm": 2.9640174092526355, + "learning_rate": 1.0283527991652675e-06, + "loss": 0.1083, + "step": 18245 + }, + { + "epoch": 2.576390850042361, + "grad_norm": 3.3832616879377473, + "learning_rate": 1.0276795584557796e-06, + "loss": 0.1607, + "step": 18246 + }, + { + "epoch": 2.576532053092347, + "grad_norm": 3.0153525818427673, + "learning_rate": 1.0270065262591434e-06, + "loss": 0.1556, + "step": 18247 + }, + { + "epoch": 2.5766732561423327, + "grad_norm": 2.7718519159096564, + "learning_rate": 1.0263337025910015e-06, + "loss": 0.1059, + "step": 18248 + }, + { + "epoch": 2.5768144591923186, + "grad_norm": 2.9121698167443584, + "learning_rate": 1.025661087466988e-06, + "loss": 0.1407, + "step": 18249 + }, + { + "epoch": 2.5769556622423044, + "grad_norm": 2.832800270914015, + "learning_rate": 1.0249886809027355e-06, + "loss": 0.1208, + "step": 18250 + }, + { + "epoch": 2.5770968652922903, + "grad_norm": 3.5259936700038046, + "learning_rate": 1.0243164829138697e-06, + "loss": 0.1525, + "step": 18251 + }, + { + "epoch": 2.577238068342276, + "grad_norm": 3.841510813912918, + "learning_rate": 1.0236444935160129e-06, + "loss": 0.1357, + "step": 18252 + }, + { + "epoch": 2.577379271392262, + "grad_norm": 3.1505332762522236, + "learning_rate": 1.0229727127247812e-06, + "loss": 0.1229, + "step": 18253 + }, + { + "epoch": 2.577520474442248, + "grad_norm": 2.9391792209367043, + "learning_rate": 1.022301140555787e-06, + "loss": 0.1301, + "step": 18254 + }, + { + "epoch": 2.577661677492234, + "grad_norm": 2.9446023011871647, + "learning_rate": 1.0216297770246374e-06, + "loss": 0.1401, + "step": 18255 + }, + { + "epoch": 2.5778028805422197, + "grad_norm": 3.0687947747388544, + "learning_rate": 1.0209586221469336e-06, + "loss": 0.1451, + "step": 18256 + }, + { + "epoch": 2.5779440835922056, + "grad_norm": 3.761039335236753, + "learning_rate": 1.0202876759382729e-06, + "loss": 0.187, + "step": 18257 + }, + { + "epoch": 2.5780852866421915, + "grad_norm": 2.995899272726151, + "learning_rate": 1.0196169384142495e-06, + "loss": 0.1492, + "step": 18258 + }, + { + "epoch": 2.5782264896921774, + "grad_norm": 3.565628582696319, + "learning_rate": 1.01894640959045e-06, + "loss": 0.1615, + "step": 18259 + }, + { + "epoch": 2.5783676927421633, + "grad_norm": 3.128708148480872, + "learning_rate": 1.0182760894824607e-06, + "loss": 0.1655, + "step": 18260 + }, + { + "epoch": 2.578508895792149, + "grad_norm": 3.1857521746964945, + "learning_rate": 1.0176059781058523e-06, + "loss": 0.1242, + "step": 18261 + }, + { + "epoch": 2.578650098842135, + "grad_norm": 2.1670526820191407, + "learning_rate": 1.0169360754762013e-06, + "loss": 0.0957, + "step": 18262 + }, + { + "epoch": 2.578791301892121, + "grad_norm": 3.825303779557316, + "learning_rate": 1.016266381609078e-06, + "loss": 0.1942, + "step": 18263 + }, + { + "epoch": 2.578932504942107, + "grad_norm": 2.9613911884992525, + "learning_rate": 1.0155968965200435e-06, + "loss": 0.115, + "step": 18264 + }, + { + "epoch": 2.5790737079920927, + "grad_norm": 4.1997669042623835, + "learning_rate": 1.0149276202246571e-06, + "loss": 0.1718, + "step": 18265 + }, + { + "epoch": 2.5792149110420786, + "grad_norm": 4.549322040203865, + "learning_rate": 1.014258552738473e-06, + "loss": 0.215, + "step": 18266 + }, + { + "epoch": 2.5793561140920644, + "grad_norm": 3.772133904265247, + "learning_rate": 1.0135896940770396e-06, + "loss": 0.2094, + "step": 18267 + }, + { + "epoch": 2.5794973171420503, + "grad_norm": 2.8829291606143514, + "learning_rate": 1.0129210442559011e-06, + "loss": 0.1578, + "step": 18268 + }, + { + "epoch": 2.579638520192036, + "grad_norm": 3.2274315491051784, + "learning_rate": 1.0122526032905956e-06, + "loss": 0.1465, + "step": 18269 + }, + { + "epoch": 2.579779723242022, + "grad_norm": 2.8738337211240514, + "learning_rate": 1.0115843711966577e-06, + "loss": 0.1386, + "step": 18270 + }, + { + "epoch": 2.579920926292008, + "grad_norm": 3.8014679463134144, + "learning_rate": 1.0109163479896179e-06, + "loss": 0.1743, + "step": 18271 + }, + { + "epoch": 2.580062129341994, + "grad_norm": 3.2462179452088438, + "learning_rate": 1.0102485336849998e-06, + "loss": 0.1353, + "step": 18272 + }, + { + "epoch": 2.5802033323919797, + "grad_norm": 3.1370534420246488, + "learning_rate": 1.0095809282983238e-06, + "loss": 0.1186, + "step": 18273 + }, + { + "epoch": 2.5803445354419656, + "grad_norm": 2.915764062365713, + "learning_rate": 1.0089135318451026e-06, + "loss": 0.1314, + "step": 18274 + }, + { + "epoch": 2.5804857384919515, + "grad_norm": 3.568857921978798, + "learning_rate": 1.0082463443408496e-06, + "loss": 0.1646, + "step": 18275 + }, + { + "epoch": 2.5806269415419374, + "grad_norm": 3.49192486269061, + "learning_rate": 1.00757936580107e-06, + "loss": 0.1475, + "step": 18276 + }, + { + "epoch": 2.5807681445919233, + "grad_norm": 3.464518473456693, + "learning_rate": 1.0069125962412606e-06, + "loss": 0.1562, + "step": 18277 + }, + { + "epoch": 2.580909347641909, + "grad_norm": 3.6667727195765853, + "learning_rate": 1.0062460356769189e-06, + "loss": 0.1423, + "step": 18278 + }, + { + "epoch": 2.581050550691895, + "grad_norm": 3.020328314051827, + "learning_rate": 1.0055796841235331e-06, + "loss": 0.1053, + "step": 18279 + }, + { + "epoch": 2.581191753741881, + "grad_norm": 3.2576259707113926, + "learning_rate": 1.0049135415965926e-06, + "loss": 0.1407, + "step": 18280 + }, + { + "epoch": 2.581332956791867, + "grad_norm": 4.378202415880571, + "learning_rate": 1.004247608111577e-06, + "loss": 0.1748, + "step": 18281 + }, + { + "epoch": 2.5814741598418527, + "grad_norm": 3.405192986948481, + "learning_rate": 1.0035818836839618e-06, + "loss": 0.1405, + "step": 18282 + }, + { + "epoch": 2.5816153628918386, + "grad_norm": 3.4529055194134926, + "learning_rate": 1.0029163683292187e-06, + "loss": 0.1437, + "step": 18283 + }, + { + "epoch": 2.5817565659418245, + "grad_norm": 3.3258724199179834, + "learning_rate": 1.0022510620628145e-06, + "loss": 0.161, + "step": 18284 + }, + { + "epoch": 2.5818977689918103, + "grad_norm": 3.020470991083388, + "learning_rate": 1.001585964900208e-06, + "loss": 0.1512, + "step": 18285 + }, + { + "epoch": 2.582038972041796, + "grad_norm": 3.426111447057368, + "learning_rate": 1.000921076856859e-06, + "loss": 0.1457, + "step": 18286 + }, + { + "epoch": 2.582180175091782, + "grad_norm": 3.769053291077551, + "learning_rate": 1.000256397948217e-06, + "loss": 0.195, + "step": 18287 + }, + { + "epoch": 2.582321378141768, + "grad_norm": 3.095072890120823, + "learning_rate": 9.995919281897304e-07, + "loss": 0.1309, + "step": 18288 + }, + { + "epoch": 2.582462581191754, + "grad_norm": 3.4529234124657546, + "learning_rate": 9.989276675968395e-07, + "loss": 0.1605, + "step": 18289 + }, + { + "epoch": 2.5826037842417398, + "grad_norm": 2.77200983885765, + "learning_rate": 9.982636161849824e-07, + "loss": 0.1402, + "step": 18290 + }, + { + "epoch": 2.5827449872917256, + "grad_norm": 2.834870186750404, + "learning_rate": 9.975997739695898e-07, + "loss": 0.1347, + "step": 18291 + }, + { + "epoch": 2.5828861903417115, + "grad_norm": 2.965429847664991, + "learning_rate": 9.969361409660927e-07, + "loss": 0.1306, + "step": 18292 + }, + { + "epoch": 2.5830273933916974, + "grad_norm": 3.906828374747792, + "learning_rate": 9.962727171899134e-07, + "loss": 0.1879, + "step": 18293 + }, + { + "epoch": 2.5831685964416833, + "grad_norm": 2.8111316104734656, + "learning_rate": 9.95609502656465e-07, + "loss": 0.1111, + "step": 18294 + }, + { + "epoch": 2.583309799491669, + "grad_norm": 3.5890050696312095, + "learning_rate": 9.94946497381164e-07, + "loss": 0.1424, + "step": 18295 + }, + { + "epoch": 2.583451002541655, + "grad_norm": 3.1060321689955024, + "learning_rate": 9.942837013794149e-07, + "loss": 0.152, + "step": 18296 + }, + { + "epoch": 2.583592205591641, + "grad_norm": 3.983963877187781, + "learning_rate": 9.936211146666253e-07, + "loss": 0.2171, + "step": 18297 + }, + { + "epoch": 2.583733408641627, + "grad_norm": 3.7080011791762058, + "learning_rate": 9.929587372581917e-07, + "loss": 0.1538, + "step": 18298 + }, + { + "epoch": 2.5838746116916127, + "grad_norm": 4.166461891936424, + "learning_rate": 9.922965691695076e-07, + "loss": 0.1513, + "step": 18299 + }, + { + "epoch": 2.5840158147415986, + "grad_norm": 3.194478156320871, + "learning_rate": 9.916346104159602e-07, + "loss": 0.1541, + "step": 18300 + }, + { + "epoch": 2.5841570177915845, + "grad_norm": 4.072256611435509, + "learning_rate": 9.909728610129355e-07, + "loss": 0.1462, + "step": 18301 + }, + { + "epoch": 2.5842982208415703, + "grad_norm": 2.648147033425766, + "learning_rate": 9.903113209758098e-07, + "loss": 0.1164, + "step": 18302 + }, + { + "epoch": 2.5844394238915562, + "grad_norm": 3.464268031029866, + "learning_rate": 9.896499903199575e-07, + "loss": 0.1441, + "step": 18303 + }, + { + "epoch": 2.584580626941542, + "grad_norm": 3.7384543989484027, + "learning_rate": 9.889888690607485e-07, + "loss": 0.1064, + "step": 18304 + }, + { + "epoch": 2.584721829991528, + "grad_norm": 2.1705098476602553, + "learning_rate": 9.883279572135474e-07, + "loss": 0.0871, + "step": 18305 + }, + { + "epoch": 2.584863033041514, + "grad_norm": 3.079720609827303, + "learning_rate": 9.876672547937117e-07, + "loss": 0.1572, + "step": 18306 + }, + { + "epoch": 2.5850042360914998, + "grad_norm": 4.368219523345479, + "learning_rate": 9.870067618165968e-07, + "loss": 0.1603, + "step": 18307 + }, + { + "epoch": 2.5851454391414856, + "grad_norm": 3.663606302726044, + "learning_rate": 9.86346478297552e-07, + "loss": 0.1811, + "step": 18308 + }, + { + "epoch": 2.5852866421914715, + "grad_norm": 3.6037107515814126, + "learning_rate": 9.856864042519232e-07, + "loss": 0.1336, + "step": 18309 + }, + { + "epoch": 2.5854278452414574, + "grad_norm": 3.9741541888766507, + "learning_rate": 9.85026539695051e-07, + "loss": 0.2004, + "step": 18310 + }, + { + "epoch": 2.5855690482914433, + "grad_norm": 3.4864291764706783, + "learning_rate": 9.843668846422672e-07, + "loss": 0.1628, + "step": 18311 + }, + { + "epoch": 2.585710251341429, + "grad_norm": 2.6339455431015413, + "learning_rate": 9.83707439108903e-07, + "loss": 0.1222, + "step": 18312 + }, + { + "epoch": 2.585851454391415, + "grad_norm": 2.5777712369622927, + "learning_rate": 9.830482031102828e-07, + "loss": 0.1164, + "step": 18313 + }, + { + "epoch": 2.585992657441401, + "grad_norm": 3.7476792968413597, + "learning_rate": 9.8238917666173e-07, + "loss": 0.1586, + "step": 18314 + }, + { + "epoch": 2.586133860491387, + "grad_norm": 3.0225690930985976, + "learning_rate": 9.817303597785577e-07, + "loss": 0.152, + "step": 18315 + }, + { + "epoch": 2.5862750635413727, + "grad_norm": 3.6375325584193425, + "learning_rate": 9.810717524760783e-07, + "loss": 0.152, + "step": 18316 + }, + { + "epoch": 2.5864162665913586, + "grad_norm": 3.335363696005989, + "learning_rate": 9.804133547695948e-07, + "loss": 0.1509, + "step": 18317 + }, + { + "epoch": 2.5865574696413445, + "grad_norm": 2.5882458028251984, + "learning_rate": 9.79755166674411e-07, + "loss": 0.135, + "step": 18318 + }, + { + "epoch": 2.5866986726913304, + "grad_norm": 3.3573792771875066, + "learning_rate": 9.790971882058208e-07, + "loss": 0.1412, + "step": 18319 + }, + { + "epoch": 2.5868398757413162, + "grad_norm": 3.700453024047953, + "learning_rate": 9.784394193791169e-07, + "loss": 0.1911, + "step": 18320 + }, + { + "epoch": 2.586981078791302, + "grad_norm": 3.143032632048506, + "learning_rate": 9.777818602095846e-07, + "loss": 0.1478, + "step": 18321 + }, + { + "epoch": 2.587122281841288, + "grad_norm": 3.922324275590088, + "learning_rate": 9.77124510712505e-07, + "loss": 0.2086, + "step": 18322 + }, + { + "epoch": 2.587263484891274, + "grad_norm": 3.5645506121854953, + "learning_rate": 9.764673709031558e-07, + "loss": 0.1516, + "step": 18323 + }, + { + "epoch": 2.5874046879412598, + "grad_norm": 3.160020826657492, + "learning_rate": 9.758104407968073e-07, + "loss": 0.1682, + "step": 18324 + }, + { + "epoch": 2.5875458909912457, + "grad_norm": 3.8383875208206293, + "learning_rate": 9.751537204087258e-07, + "loss": 0.1691, + "step": 18325 + }, + { + "epoch": 2.5876870940412315, + "grad_norm": 3.667706949317372, + "learning_rate": 9.74497209754175e-07, + "loss": 0.199, + "step": 18326 + }, + { + "epoch": 2.5878282970912174, + "grad_norm": 3.6154091843481218, + "learning_rate": 9.738409088484135e-07, + "loss": 0.1693, + "step": 18327 + }, + { + "epoch": 2.5879695001412033, + "grad_norm": 3.1755985701535385, + "learning_rate": 9.731848177066905e-07, + "loss": 0.1455, + "step": 18328 + }, + { + "epoch": 2.588110703191189, + "grad_norm": 3.65646232189554, + "learning_rate": 9.725289363442526e-07, + "loss": 0.2244, + "step": 18329 + }, + { + "epoch": 2.588251906241175, + "grad_norm": 3.3581870565933016, + "learning_rate": 9.718732647763419e-07, + "loss": 0.1646, + "step": 18330 + }, + { + "epoch": 2.588393109291161, + "grad_norm": 3.4476195504016522, + "learning_rate": 9.712178030181996e-07, + "loss": 0.1736, + "step": 18331 + }, + { + "epoch": 2.588534312341147, + "grad_norm": 2.6530411755060617, + "learning_rate": 9.705625510850557e-07, + "loss": 0.1257, + "step": 18332 + }, + { + "epoch": 2.5886755153911327, + "grad_norm": 4.5146833004295495, + "learning_rate": 9.699075089921396e-07, + "loss": 0.2019, + "step": 18333 + }, + { + "epoch": 2.5888167184411186, + "grad_norm": 3.413491115029616, + "learning_rate": 9.692526767546727e-07, + "loss": 0.1512, + "step": 18334 + }, + { + "epoch": 2.588957921491104, + "grad_norm": 2.8209104705375903, + "learning_rate": 9.685980543878736e-07, + "loss": 0.1549, + "step": 18335 + }, + { + "epoch": 2.58909912454109, + "grad_norm": 2.9867407600733933, + "learning_rate": 9.679436419069555e-07, + "loss": 0.1688, + "step": 18336 + }, + { + "epoch": 2.589240327591076, + "grad_norm": 4.3618084763326275, + "learning_rate": 9.67289439327127e-07, + "loss": 0.185, + "step": 18337 + }, + { + "epoch": 2.5893815306410617, + "grad_norm": 2.871417702706307, + "learning_rate": 9.666354466635908e-07, + "loss": 0.1273, + "step": 18338 + }, + { + "epoch": 2.5895227336910476, + "grad_norm": 3.7238023210463953, + "learning_rate": 9.659816639315444e-07, + "loss": 0.166, + "step": 18339 + }, + { + "epoch": 2.5896639367410335, + "grad_norm": 3.6545060035030374, + "learning_rate": 9.653280911461837e-07, + "loss": 0.1468, + "step": 18340 + }, + { + "epoch": 2.5898051397910193, + "grad_norm": 3.1487144212494758, + "learning_rate": 9.646747283226965e-07, + "loss": 0.152, + "step": 18341 + }, + { + "epoch": 2.5899463428410052, + "grad_norm": 3.6240648546337226, + "learning_rate": 9.640215754762638e-07, + "loss": 0.1745, + "step": 18342 + }, + { + "epoch": 2.590087545890991, + "grad_norm": 3.006838392716478, + "learning_rate": 9.633686326220704e-07, + "loss": 0.1342, + "step": 18343 + }, + { + "epoch": 2.590228748940977, + "grad_norm": 3.483413851927523, + "learning_rate": 9.627158997752883e-07, + "loss": 0.1998, + "step": 18344 + }, + { + "epoch": 2.590369951990963, + "grad_norm": 3.580504155648478, + "learning_rate": 9.620633769510846e-07, + "loss": 0.1955, + "step": 18345 + }, + { + "epoch": 2.5905111550409488, + "grad_norm": 3.3970746722716214, + "learning_rate": 9.614110641646235e-07, + "loss": 0.1489, + "step": 18346 + }, + { + "epoch": 2.5906523580909346, + "grad_norm": 3.4032976106450548, + "learning_rate": 9.607589614310674e-07, + "loss": 0.1888, + "step": 18347 + }, + { + "epoch": 2.5907935611409205, + "grad_norm": 5.477658141773086, + "learning_rate": 9.601070687655667e-07, + "loss": 0.1731, + "step": 18348 + }, + { + "epoch": 2.5909347641909064, + "grad_norm": 2.9018112518835864, + "learning_rate": 9.594553861832755e-07, + "loss": 0.1231, + "step": 18349 + }, + { + "epoch": 2.5910759672408923, + "grad_norm": 3.1139767000686347, + "learning_rate": 9.588039136993366e-07, + "loss": 0.1157, + "step": 18350 + }, + { + "epoch": 2.591217170290878, + "grad_norm": 2.675594700603319, + "learning_rate": 9.58152651328891e-07, + "loss": 0.1271, + "step": 18351 + }, + { + "epoch": 2.591358373340864, + "grad_norm": 3.097668781536899, + "learning_rate": 9.575015990870717e-07, + "loss": 0.1367, + "step": 18352 + }, + { + "epoch": 2.59149957639085, + "grad_norm": 2.7188537147662237, + "learning_rate": 9.568507569890117e-07, + "loss": 0.1094, + "step": 18353 + }, + { + "epoch": 2.591640779440836, + "grad_norm": 3.390244849828202, + "learning_rate": 9.562001250498333e-07, + "loss": 0.1685, + "step": 18354 + }, + { + "epoch": 2.5917819824908217, + "grad_norm": 3.1706209564964642, + "learning_rate": 9.555497032846583e-07, + "loss": 0.1357, + "step": 18355 + }, + { + "epoch": 2.5919231855408076, + "grad_norm": 3.5925862259729833, + "learning_rate": 9.54899491708603e-07, + "loss": 0.1744, + "step": 18356 + }, + { + "epoch": 2.5920643885907935, + "grad_norm": 3.7043083874845144, + "learning_rate": 9.542494903367772e-07, + "loss": 0.1687, + "step": 18357 + }, + { + "epoch": 2.5922055916407793, + "grad_norm": 2.55443452553865, + "learning_rate": 9.535996991842855e-07, + "loss": 0.1286, + "step": 18358 + }, + { + "epoch": 2.5923467946907652, + "grad_norm": 3.0555556555411436, + "learning_rate": 9.529501182662315e-07, + "loss": 0.1329, + "step": 18359 + }, + { + "epoch": 2.592487997740751, + "grad_norm": 3.1935043627304243, + "learning_rate": 9.523007475977064e-07, + "loss": 0.1522, + "step": 18360 + }, + { + "epoch": 2.592629200790737, + "grad_norm": 5.374659568591887, + "learning_rate": 9.516515871938093e-07, + "loss": 0.184, + "step": 18361 + }, + { + "epoch": 2.592770403840723, + "grad_norm": 3.3664454117300293, + "learning_rate": 9.51002637069619e-07, + "loss": 0.1271, + "step": 18362 + }, + { + "epoch": 2.5929116068907088, + "grad_norm": 3.63843438693323, + "learning_rate": 9.503538972402204e-07, + "loss": 0.1574, + "step": 18363 + }, + { + "epoch": 2.5930528099406946, + "grad_norm": 2.9219045216203536, + "learning_rate": 9.49705367720688e-07, + "loss": 0.1528, + "step": 18364 + }, + { + "epoch": 2.5931940129906805, + "grad_norm": 3.931551353838078, + "learning_rate": 9.49057048526093e-07, + "loss": 0.1708, + "step": 18365 + }, + { + "epoch": 2.5933352160406664, + "grad_norm": 2.7029178333704063, + "learning_rate": 9.484089396715057e-07, + "loss": 0.1632, + "step": 18366 + }, + { + "epoch": 2.5934764190906523, + "grad_norm": 2.8129230331645103, + "learning_rate": 9.47761041171985e-07, + "loss": 0.117, + "step": 18367 + }, + { + "epoch": 2.593617622140638, + "grad_norm": 3.497498093699572, + "learning_rate": 9.47113353042588e-07, + "loss": 0.1396, + "step": 18368 + }, + { + "epoch": 2.593758825190624, + "grad_norm": 3.324400076739421, + "learning_rate": 9.464658752983669e-07, + "loss": 0.1744, + "step": 18369 + }, + { + "epoch": 2.59390002824061, + "grad_norm": 2.598166800816424, + "learning_rate": 9.458186079543697e-07, + "loss": 0.1119, + "step": 18370 + }, + { + "epoch": 2.594041231290596, + "grad_norm": 3.710417554423941, + "learning_rate": 9.451715510256377e-07, + "loss": 0.1859, + "step": 18371 + }, + { + "epoch": 2.5941824343405817, + "grad_norm": 2.9052018960840913, + "learning_rate": 9.445247045272077e-07, + "loss": 0.1293, + "step": 18372 + }, + { + "epoch": 2.5943236373905676, + "grad_norm": 3.9169243696528397, + "learning_rate": 9.438780684741134e-07, + "loss": 0.1597, + "step": 18373 + }, + { + "epoch": 2.5944648404405535, + "grad_norm": 3.8937127838526466, + "learning_rate": 9.432316428813826e-07, + "loss": 0.1495, + "step": 18374 + }, + { + "epoch": 2.5946060434905394, + "grad_norm": 3.4781496635815263, + "learning_rate": 9.425854277640356e-07, + "loss": 0.1498, + "step": 18375 + }, + { + "epoch": 2.5947472465405252, + "grad_norm": 3.200457339988378, + "learning_rate": 9.419394231370926e-07, + "loss": 0.1204, + "step": 18376 + }, + { + "epoch": 2.594888449590511, + "grad_norm": 2.7953736557944997, + "learning_rate": 9.412936290155627e-07, + "loss": 0.1346, + "step": 18377 + }, + { + "epoch": 2.595029652640497, + "grad_norm": 3.069516892070966, + "learning_rate": 9.406480454144617e-07, + "loss": 0.1333, + "step": 18378 + }, + { + "epoch": 2.595170855690483, + "grad_norm": 3.3482820924392653, + "learning_rate": 9.400026723487854e-07, + "loss": 0.1442, + "step": 18379 + }, + { + "epoch": 2.5953120587404688, + "grad_norm": 2.1447076899335977, + "learning_rate": 9.393575098335339e-07, + "loss": 0.0999, + "step": 18380 + }, + { + "epoch": 2.5954532617904547, + "grad_norm": 3.580533487446578, + "learning_rate": 9.387125578837008e-07, + "loss": 0.1547, + "step": 18381 + }, + { + "epoch": 2.5955944648404405, + "grad_norm": 2.8221158808776825, + "learning_rate": 9.380678165142732e-07, + "loss": 0.1272, + "step": 18382 + }, + { + "epoch": 2.5957356678904264, + "grad_norm": 2.9199868201191124, + "learning_rate": 9.374232857402376e-07, + "loss": 0.1464, + "step": 18383 + }, + { + "epoch": 2.5958768709404123, + "grad_norm": 2.957001221242242, + "learning_rate": 9.367789655765703e-07, + "loss": 0.1123, + "step": 18384 + }, + { + "epoch": 2.596018073990398, + "grad_norm": 2.7994137762280547, + "learning_rate": 9.361348560382467e-07, + "loss": 0.1318, + "step": 18385 + }, + { + "epoch": 2.596159277040384, + "grad_norm": 3.192407554789306, + "learning_rate": 9.354909571402349e-07, + "loss": 0.1746, + "step": 18386 + }, + { + "epoch": 2.59630048009037, + "grad_norm": 4.189350189763951, + "learning_rate": 9.348472688974974e-07, + "loss": 0.1525, + "step": 18387 + }, + { + "epoch": 2.596441683140356, + "grad_norm": 4.3105662811083345, + "learning_rate": 9.342037913249957e-07, + "loss": 0.2338, + "step": 18388 + }, + { + "epoch": 2.5965828861903417, + "grad_norm": 2.9950639123945675, + "learning_rate": 9.335605244376821e-07, + "loss": 0.1409, + "step": 18389 + }, + { + "epoch": 2.5967240892403276, + "grad_norm": 3.8586787307094856, + "learning_rate": 9.32917468250506e-07, + "loss": 0.1611, + "step": 18390 + }, + { + "epoch": 2.5968652922903135, + "grad_norm": 3.387232757427889, + "learning_rate": 9.32274622778413e-07, + "loss": 0.1699, + "step": 18391 + }, + { + "epoch": 2.5970064953402994, + "grad_norm": 3.5722260950533875, + "learning_rate": 9.316319880363411e-07, + "loss": 0.1591, + "step": 18392 + }, + { + "epoch": 2.5971476983902853, + "grad_norm": 2.215633431206377, + "learning_rate": 9.309895640392263e-07, + "loss": 0.1071, + "step": 18393 + }, + { + "epoch": 2.597288901440271, + "grad_norm": 2.850018744369343, + "learning_rate": 9.303473508019944e-07, + "loss": 0.1197, + "step": 18394 + }, + { + "epoch": 2.597430104490257, + "grad_norm": 3.355483088207705, + "learning_rate": 9.297053483395779e-07, + "loss": 0.1467, + "step": 18395 + }, + { + "epoch": 2.597571307540243, + "grad_norm": 3.0265153871154036, + "learning_rate": 9.290635566668893e-07, + "loss": 0.1511, + "step": 18396 + }, + { + "epoch": 2.597712510590229, + "grad_norm": 4.126638371637896, + "learning_rate": 9.284219757988466e-07, + "loss": 0.1838, + "step": 18397 + }, + { + "epoch": 2.5978537136402147, + "grad_norm": 2.482472404601543, + "learning_rate": 9.277806057503592e-07, + "loss": 0.088, + "step": 18398 + }, + { + "epoch": 2.5979949166902006, + "grad_norm": 2.6968350259810805, + "learning_rate": 9.271394465363314e-07, + "loss": 0.1232, + "step": 18399 + }, + { + "epoch": 2.5981361197401864, + "grad_norm": 3.217370274763721, + "learning_rate": 9.264984981716663e-07, + "loss": 0.1341, + "step": 18400 + }, + { + "epoch": 2.5982773227901723, + "grad_norm": 2.8785520261275797, + "learning_rate": 9.258577606712571e-07, + "loss": 0.1583, + "step": 18401 + }, + { + "epoch": 2.598418525840158, + "grad_norm": 2.5570120755396193, + "learning_rate": 9.252172340499943e-07, + "loss": 0.1019, + "step": 18402 + }, + { + "epoch": 2.598559728890144, + "grad_norm": 3.654547606714179, + "learning_rate": 9.245769183227649e-07, + "loss": 0.1429, + "step": 18403 + }, + { + "epoch": 2.59870093194013, + "grad_norm": 3.5924303894474408, + "learning_rate": 9.23936813504448e-07, + "loss": 0.159, + "step": 18404 + }, + { + "epoch": 2.598842134990116, + "grad_norm": 4.086384323285141, + "learning_rate": 9.232969196099195e-07, + "loss": 0.1815, + "step": 18405 + }, + { + "epoch": 2.5989833380401017, + "grad_norm": 2.91285595742083, + "learning_rate": 9.22657236654051e-07, + "loss": 0.1381, + "step": 18406 + }, + { + "epoch": 2.5991245410900876, + "grad_norm": 2.697494060377228, + "learning_rate": 9.220177646517081e-07, + "loss": 0.1015, + "step": 18407 + }, + { + "epoch": 2.5992657441400735, + "grad_norm": 3.7240491147614123, + "learning_rate": 9.213785036177525e-07, + "loss": 0.14, + "step": 18408 + }, + { + "epoch": 2.5994069471900594, + "grad_norm": 3.49013499423758, + "learning_rate": 9.207394535670389e-07, + "loss": 0.1996, + "step": 18409 + }, + { + "epoch": 2.5995481502400453, + "grad_norm": 3.337850397139976, + "learning_rate": 9.201006145144198e-07, + "loss": 0.1547, + "step": 18410 + }, + { + "epoch": 2.599689353290031, + "grad_norm": 2.5572678529748174, + "learning_rate": 9.194619864747389e-07, + "loss": 0.115, + "step": 18411 + }, + { + "epoch": 2.599830556340017, + "grad_norm": 3.161816526250413, + "learning_rate": 9.188235694628445e-07, + "loss": 0.1399, + "step": 18412 + }, + { + "epoch": 2.599971759390003, + "grad_norm": 2.6140498499566376, + "learning_rate": 9.181853634935656e-07, + "loss": 0.1222, + "step": 18413 + }, + { + "epoch": 2.600112962439989, + "grad_norm": 2.9292006238781387, + "learning_rate": 9.175473685817371e-07, + "loss": 0.105, + "step": 18414 + }, + { + "epoch": 2.6002541654899747, + "grad_norm": 3.650667952536601, + "learning_rate": 9.16909584742186e-07, + "loss": 0.1952, + "step": 18415 + }, + { + "epoch": 2.6003953685399606, + "grad_norm": 3.2151218005012043, + "learning_rate": 9.162720119897306e-07, + "loss": 0.1365, + "step": 18416 + }, + { + "epoch": 2.6005365715899464, + "grad_norm": 3.2100474186268646, + "learning_rate": 9.156346503391922e-07, + "loss": 0.1717, + "step": 18417 + }, + { + "epoch": 2.6006777746399323, + "grad_norm": 3.503381222526579, + "learning_rate": 9.149974998053823e-07, + "loss": 0.1816, + "step": 18418 + }, + { + "epoch": 2.600818977689918, + "grad_norm": 3.8816447198989485, + "learning_rate": 9.14360560403107e-07, + "loss": 0.1829, + "step": 18419 + }, + { + "epoch": 2.600960180739904, + "grad_norm": 2.639529852748756, + "learning_rate": 9.137238321471675e-07, + "loss": 0.1137, + "step": 18420 + }, + { + "epoch": 2.60110138378989, + "grad_norm": 3.3457615390741275, + "learning_rate": 9.130873150523656e-07, + "loss": 0.1463, + "step": 18421 + }, + { + "epoch": 2.601242586839876, + "grad_norm": 3.6427778278888985, + "learning_rate": 9.124510091334849e-07, + "loss": 0.171, + "step": 18422 + }, + { + "epoch": 2.6013837898898617, + "grad_norm": 3.02820606358471, + "learning_rate": 9.118149144053201e-07, + "loss": 0.159, + "step": 18423 + }, + { + "epoch": 2.6015249929398476, + "grad_norm": 2.5180200573879836, + "learning_rate": 9.111790308826529e-07, + "loss": 0.1064, + "step": 18424 + }, + { + "epoch": 2.6016661959898335, + "grad_norm": 2.553887105706108, + "learning_rate": 9.105433585802592e-07, + "loss": 0.1378, + "step": 18425 + }, + { + "epoch": 2.6018073990398194, + "grad_norm": 3.041914433670801, + "learning_rate": 9.099078975129116e-07, + "loss": 0.1161, + "step": 18426 + }, + { + "epoch": 2.6019486020898053, + "grad_norm": 3.3652955356559935, + "learning_rate": 9.092726476953794e-07, + "loss": 0.1544, + "step": 18427 + }, + { + "epoch": 2.602089805139791, + "grad_norm": 2.9595507964058823, + "learning_rate": 9.086376091424243e-07, + "loss": 0.1357, + "step": 18428 + }, + { + "epoch": 2.602231008189777, + "grad_norm": 2.872505083197353, + "learning_rate": 9.080027818688064e-07, + "loss": 0.1355, + "step": 18429 + }, + { + "epoch": 2.602372211239763, + "grad_norm": 2.757134933735135, + "learning_rate": 9.073681658892775e-07, + "loss": 0.1169, + "step": 18430 + }, + { + "epoch": 2.602513414289749, + "grad_norm": 3.4392805224163094, + "learning_rate": 9.067337612185845e-07, + "loss": 0.1678, + "step": 18431 + }, + { + "epoch": 2.6026546173397347, + "grad_norm": 3.3693641582759466, + "learning_rate": 9.060995678714712e-07, + "loss": 0.1605, + "step": 18432 + }, + { + "epoch": 2.6027958203897206, + "grad_norm": 3.0164463998860733, + "learning_rate": 9.054655858626782e-07, + "loss": 0.1134, + "step": 18433 + }, + { + "epoch": 2.6029370234397065, + "grad_norm": 2.687804952594379, + "learning_rate": 9.048318152069346e-07, + "loss": 0.0943, + "step": 18434 + }, + { + "epoch": 2.6030782264896923, + "grad_norm": 3.270929603810049, + "learning_rate": 9.041982559189732e-07, + "loss": 0.1646, + "step": 18435 + }, + { + "epoch": 2.603219429539678, + "grad_norm": 2.5018703304229777, + "learning_rate": 9.035649080135167e-07, + "loss": 0.1245, + "step": 18436 + }, + { + "epoch": 2.603360632589664, + "grad_norm": 3.714554571802927, + "learning_rate": 9.029317715052855e-07, + "loss": 0.1377, + "step": 18437 + }, + { + "epoch": 2.6035018356396495, + "grad_norm": 3.3281005944695075, + "learning_rate": 9.022988464089888e-07, + "loss": 0.1513, + "step": 18438 + }, + { + "epoch": 2.6036430386896354, + "grad_norm": 2.8103669334882353, + "learning_rate": 9.016661327393361e-07, + "loss": 0.1499, + "step": 18439 + }, + { + "epoch": 2.6037842417396213, + "grad_norm": 3.294043931982125, + "learning_rate": 9.010336305110345e-07, + "loss": 0.1475, + "step": 18440 + }, + { + "epoch": 2.603925444789607, + "grad_norm": 2.4517513756523375, + "learning_rate": 9.004013397387823e-07, + "loss": 0.1038, + "step": 18441 + }, + { + "epoch": 2.604066647839593, + "grad_norm": 3.281614221632581, + "learning_rate": 8.997692604372743e-07, + "loss": 0.1211, + "step": 18442 + }, + { + "epoch": 2.604207850889579, + "grad_norm": 3.6716178466434872, + "learning_rate": 8.991373926211966e-07, + "loss": 0.1819, + "step": 18443 + }, + { + "epoch": 2.604349053939565, + "grad_norm": 3.942594090236599, + "learning_rate": 8.985057363052374e-07, + "loss": 0.1926, + "step": 18444 + }, + { + "epoch": 2.6044902569895507, + "grad_norm": 2.7435912187068427, + "learning_rate": 8.978742915040706e-07, + "loss": 0.1422, + "step": 18445 + }, + { + "epoch": 2.6046314600395366, + "grad_norm": 2.991532503664667, + "learning_rate": 8.972430582323788e-07, + "loss": 0.1417, + "step": 18446 + }, + { + "epoch": 2.6047726630895225, + "grad_norm": 3.6636987975316226, + "learning_rate": 8.966120365048259e-07, + "loss": 0.1654, + "step": 18447 + }, + { + "epoch": 2.6049138661395084, + "grad_norm": 3.4496642637245287, + "learning_rate": 8.959812263360779e-07, + "loss": 0.1562, + "step": 18448 + }, + { + "epoch": 2.6050550691894943, + "grad_norm": 2.2929680611100234, + "learning_rate": 8.953506277407931e-07, + "loss": 0.0942, + "step": 18449 + }, + { + "epoch": 2.60519627223948, + "grad_norm": 4.398204266104556, + "learning_rate": 8.947202407336286e-07, + "loss": 0.1674, + "step": 18450 + }, + { + "epoch": 2.605337475289466, + "grad_norm": 2.6966464633907243, + "learning_rate": 8.940900653292317e-07, + "loss": 0.1181, + "step": 18451 + }, + { + "epoch": 2.605478678339452, + "grad_norm": 2.9546396210945374, + "learning_rate": 8.934601015422506e-07, + "loss": 0.1383, + "step": 18452 + }, + { + "epoch": 2.605619881389438, + "grad_norm": 2.2994411965072628, + "learning_rate": 8.928303493873247e-07, + "loss": 0.1224, + "step": 18453 + }, + { + "epoch": 2.6057610844394237, + "grad_norm": 2.850176117313274, + "learning_rate": 8.92200808879089e-07, + "loss": 0.1138, + "step": 18454 + }, + { + "epoch": 2.6059022874894096, + "grad_norm": 3.231349598333421, + "learning_rate": 8.915714800321729e-07, + "loss": 0.1356, + "step": 18455 + }, + { + "epoch": 2.6060434905393954, + "grad_norm": 3.372588333660419, + "learning_rate": 8.909423628611991e-07, + "loss": 0.1246, + "step": 18456 + }, + { + "epoch": 2.6061846935893813, + "grad_norm": 2.874614374435151, + "learning_rate": 8.903134573807925e-07, + "loss": 0.1316, + "step": 18457 + }, + { + "epoch": 2.606325896639367, + "grad_norm": 2.452533830928694, + "learning_rate": 8.896847636055672e-07, + "loss": 0.0896, + "step": 18458 + }, + { + "epoch": 2.606467099689353, + "grad_norm": 3.403642681478167, + "learning_rate": 8.890562815501336e-07, + "loss": 0.1343, + "step": 18459 + }, + { + "epoch": 2.606608302739339, + "grad_norm": 3.6140994230477914, + "learning_rate": 8.884280112290977e-07, + "loss": 0.1631, + "step": 18460 + }, + { + "epoch": 2.606749505789325, + "grad_norm": 2.9175638237559274, + "learning_rate": 8.877999526570591e-07, + "loss": 0.1423, + "step": 18461 + }, + { + "epoch": 2.6068907088393107, + "grad_norm": 2.731426571107013, + "learning_rate": 8.871721058486149e-07, + "loss": 0.1337, + "step": 18462 + }, + { + "epoch": 2.6070319118892966, + "grad_norm": 4.163641610833574, + "learning_rate": 8.865444708183558e-07, + "loss": 0.1581, + "step": 18463 + }, + { + "epoch": 2.6071731149392825, + "grad_norm": 3.0885503425784906, + "learning_rate": 8.859170475808665e-07, + "loss": 0.1526, + "step": 18464 + }, + { + "epoch": 2.6073143179892684, + "grad_norm": 3.266011038043512, + "learning_rate": 8.8528983615073e-07, + "loss": 0.1672, + "step": 18465 + }, + { + "epoch": 2.6074555210392543, + "grad_norm": 3.6001289235591405, + "learning_rate": 8.846628365425203e-07, + "loss": 0.1612, + "step": 18466 + }, + { + "epoch": 2.60759672408924, + "grad_norm": 3.1661660858851404, + "learning_rate": 8.8403604877081e-07, + "loss": 0.1094, + "step": 18467 + }, + { + "epoch": 2.607737927139226, + "grad_norm": 3.4193864168594734, + "learning_rate": 8.83409472850163e-07, + "loss": 0.1622, + "step": 18468 + }, + { + "epoch": 2.607879130189212, + "grad_norm": 2.8247905413736336, + "learning_rate": 8.827831087951455e-07, + "loss": 0.1445, + "step": 18469 + }, + { + "epoch": 2.608020333239198, + "grad_norm": 3.61594523666992, + "learning_rate": 8.821569566203103e-07, + "loss": 0.1504, + "step": 18470 + }, + { + "epoch": 2.6081615362891837, + "grad_norm": 3.0434120301705687, + "learning_rate": 8.815310163402113e-07, + "loss": 0.1629, + "step": 18471 + }, + { + "epoch": 2.6083027393391696, + "grad_norm": 4.468330861745514, + "learning_rate": 8.809052879693925e-07, + "loss": 0.1944, + "step": 18472 + }, + { + "epoch": 2.6084439423891554, + "grad_norm": 3.0048683731910777, + "learning_rate": 8.802797715223943e-07, + "loss": 0.1247, + "step": 18473 + }, + { + "epoch": 2.6085851454391413, + "grad_norm": 2.4312899958701895, + "learning_rate": 8.796544670137574e-07, + "loss": 0.1042, + "step": 18474 + }, + { + "epoch": 2.608726348489127, + "grad_norm": 2.742654715858938, + "learning_rate": 8.790293744580125e-07, + "loss": 0.0976, + "step": 18475 + }, + { + "epoch": 2.608867551539113, + "grad_norm": 2.7750074174560093, + "learning_rate": 8.784044938696856e-07, + "loss": 0.1319, + "step": 18476 + }, + { + "epoch": 2.609008754589099, + "grad_norm": 4.157019796271099, + "learning_rate": 8.777798252632986e-07, + "loss": 0.2118, + "step": 18477 + }, + { + "epoch": 2.609149957639085, + "grad_norm": 4.0704833123593245, + "learning_rate": 8.771553686533684e-07, + "loss": 0.1373, + "step": 18478 + }, + { + "epoch": 2.6092911606890707, + "grad_norm": 3.7187210076875363, + "learning_rate": 8.765311240544083e-07, + "loss": 0.1779, + "step": 18479 + }, + { + "epoch": 2.6094323637390566, + "grad_norm": 3.5393868696138036, + "learning_rate": 8.759070914809253e-07, + "loss": 0.155, + "step": 18480 + }, + { + "epoch": 2.6095735667890425, + "grad_norm": 3.0891236363084604, + "learning_rate": 8.752832709474202e-07, + "loss": 0.1268, + "step": 18481 + }, + { + "epoch": 2.6097147698390284, + "grad_norm": 2.985675989102364, + "learning_rate": 8.746596624683922e-07, + "loss": 0.1237, + "step": 18482 + }, + { + "epoch": 2.6098559728890143, + "grad_norm": 2.6651186194635814, + "learning_rate": 8.740362660583312e-07, + "loss": 0.1076, + "step": 18483 + }, + { + "epoch": 2.609997175939, + "grad_norm": 3.3805677008532147, + "learning_rate": 8.734130817317277e-07, + "loss": 0.1626, + "step": 18484 + }, + { + "epoch": 2.610138378988986, + "grad_norm": 2.768556217940723, + "learning_rate": 8.7279010950306e-07, + "loss": 0.1144, + "step": 18485 + }, + { + "epoch": 2.610279582038972, + "grad_norm": 2.6117266811163438, + "learning_rate": 8.721673493868111e-07, + "loss": 0.1351, + "step": 18486 + }, + { + "epoch": 2.610420785088958, + "grad_norm": 2.9223396570606, + "learning_rate": 8.715448013974493e-07, + "loss": 0.146, + "step": 18487 + }, + { + "epoch": 2.6105619881389437, + "grad_norm": 3.0047885739597238, + "learning_rate": 8.709224655494475e-07, + "loss": 0.1668, + "step": 18488 + }, + { + "epoch": 2.6107031911889296, + "grad_norm": 2.7512304488414476, + "learning_rate": 8.703003418572631e-07, + "loss": 0.1156, + "step": 18489 + }, + { + "epoch": 2.6108443942389155, + "grad_norm": 2.7040568245715004, + "learning_rate": 8.696784303353534e-07, + "loss": 0.1309, + "step": 18490 + }, + { + "epoch": 2.6109855972889013, + "grad_norm": 2.7510720061071527, + "learning_rate": 8.690567309981756e-07, + "loss": 0.1526, + "step": 18491 + }, + { + "epoch": 2.611126800338887, + "grad_norm": 2.8856062380922824, + "learning_rate": 8.684352438601762e-07, + "loss": 0.1285, + "step": 18492 + }, + { + "epoch": 2.611268003388873, + "grad_norm": 3.8526380084047545, + "learning_rate": 8.67813968935799e-07, + "loss": 0.1907, + "step": 18493 + }, + { + "epoch": 2.611409206438859, + "grad_norm": 3.1514153275588024, + "learning_rate": 8.671929062394802e-07, + "loss": 0.1194, + "step": 18494 + }, + { + "epoch": 2.611550409488845, + "grad_norm": 3.084821146359261, + "learning_rate": 8.66572055785655e-07, + "loss": 0.1835, + "step": 18495 + }, + { + "epoch": 2.6116916125388308, + "grad_norm": 3.035041502218832, + "learning_rate": 8.659514175887495e-07, + "loss": 0.1156, + "step": 18496 + }, + { + "epoch": 2.6118328155888166, + "grad_norm": 3.064193633824566, + "learning_rate": 8.653309916631891e-07, + "loss": 0.1378, + "step": 18497 + }, + { + "epoch": 2.6119740186388025, + "grad_norm": 3.510994964693351, + "learning_rate": 8.647107780233921e-07, + "loss": 0.1566, + "step": 18498 + }, + { + "epoch": 2.6121152216887884, + "grad_norm": 2.6680705367330537, + "learning_rate": 8.640907766837703e-07, + "loss": 0.1223, + "step": 18499 + }, + { + "epoch": 2.6122564247387743, + "grad_norm": 3.9284119326951177, + "learning_rate": 8.634709876587344e-07, + "loss": 0.1389, + "step": 18500 + }, + { + "epoch": 2.61239762778876, + "grad_norm": 2.6334504647931065, + "learning_rate": 8.628514109626863e-07, + "loss": 0.1197, + "step": 18501 + }, + { + "epoch": 2.612538830838746, + "grad_norm": 3.418581090427093, + "learning_rate": 8.622320466100242e-07, + "loss": 0.1404, + "step": 18502 + }, + { + "epoch": 2.612680033888732, + "grad_norm": 2.889263219676667, + "learning_rate": 8.616128946151436e-07, + "loss": 0.09, + "step": 18503 + }, + { + "epoch": 2.612821236938718, + "grad_norm": 3.1098580543392, + "learning_rate": 8.60993954992434e-07, + "loss": 0.1296, + "step": 18504 + }, + { + "epoch": 2.6129624399887037, + "grad_norm": 3.0683724597727147, + "learning_rate": 8.603752277562794e-07, + "loss": 0.1337, + "step": 18505 + }, + { + "epoch": 2.6131036430386896, + "grad_norm": 3.308085635623813, + "learning_rate": 8.59756712921056e-07, + "loss": 0.1456, + "step": 18506 + }, + { + "epoch": 2.6132448460886755, + "grad_norm": 3.846191803838156, + "learning_rate": 8.591384105011369e-07, + "loss": 0.1898, + "step": 18507 + }, + { + "epoch": 2.6133860491386613, + "grad_norm": 3.101796510268107, + "learning_rate": 8.585203205108949e-07, + "loss": 0.1804, + "step": 18508 + }, + { + "epoch": 2.6135272521886472, + "grad_norm": 3.4606210252000413, + "learning_rate": 8.579024429646932e-07, + "loss": 0.1833, + "step": 18509 + }, + { + "epoch": 2.613668455238633, + "grad_norm": 2.443871383666741, + "learning_rate": 8.572847778768912e-07, + "loss": 0.1079, + "step": 18510 + }, + { + "epoch": 2.613809658288619, + "grad_norm": 2.8282772863272085, + "learning_rate": 8.566673252618419e-07, + "loss": 0.1471, + "step": 18511 + }, + { + "epoch": 2.613950861338605, + "grad_norm": 3.306219646785648, + "learning_rate": 8.560500851338949e-07, + "loss": 0.1857, + "step": 18512 + }, + { + "epoch": 2.6140920643885908, + "grad_norm": 2.627155728185175, + "learning_rate": 8.554330575073954e-07, + "loss": 0.1153, + "step": 18513 + }, + { + "epoch": 2.6142332674385766, + "grad_norm": 3.4758714752145585, + "learning_rate": 8.548162423966832e-07, + "loss": 0.1555, + "step": 18514 + }, + { + "epoch": 2.6143744704885625, + "grad_norm": 2.878832692572717, + "learning_rate": 8.541996398160912e-07, + "loss": 0.105, + "step": 18515 + }, + { + "epoch": 2.6145156735385484, + "grad_norm": 3.5163895708867723, + "learning_rate": 8.53583249779949e-07, + "loss": 0.1413, + "step": 18516 + }, + { + "epoch": 2.6146568765885343, + "grad_norm": 3.0580466331721032, + "learning_rate": 8.529670723025829e-07, + "loss": 0.1443, + "step": 18517 + }, + { + "epoch": 2.61479807963852, + "grad_norm": 3.9006667615474058, + "learning_rate": 8.523511073983127e-07, + "loss": 0.2118, + "step": 18518 + }, + { + "epoch": 2.614939282688506, + "grad_norm": 3.686763496210329, + "learning_rate": 8.517353550814488e-07, + "loss": 0.1582, + "step": 18519 + }, + { + "epoch": 2.615080485738492, + "grad_norm": 3.4311146846229956, + "learning_rate": 8.511198153663069e-07, + "loss": 0.1838, + "step": 18520 + }, + { + "epoch": 2.615221688788478, + "grad_norm": 2.8511727309338393, + "learning_rate": 8.505044882671898e-07, + "loss": 0.107, + "step": 18521 + }, + { + "epoch": 2.6153628918384637, + "grad_norm": 2.989363149261534, + "learning_rate": 8.498893737983982e-07, + "loss": 0.1259, + "step": 18522 + }, + { + "epoch": 2.6155040948884496, + "grad_norm": 4.0455964987841, + "learning_rate": 8.49274471974224e-07, + "loss": 0.1997, + "step": 18523 + }, + { + "epoch": 2.6156452979384355, + "grad_norm": 3.0527422692495625, + "learning_rate": 8.486597828089594e-07, + "loss": 0.1854, + "step": 18524 + }, + { + "epoch": 2.6157865009884214, + "grad_norm": 2.8269618024783503, + "learning_rate": 8.48045306316887e-07, + "loss": 0.1292, + "step": 18525 + }, + { + "epoch": 2.6159277040384072, + "grad_norm": 2.8455273781896295, + "learning_rate": 8.474310425122923e-07, + "loss": 0.1308, + "step": 18526 + }, + { + "epoch": 2.616068907088393, + "grad_norm": 3.1139280376138254, + "learning_rate": 8.46816991409446e-07, + "loss": 0.1568, + "step": 18527 + }, + { + "epoch": 2.616210110138379, + "grad_norm": 3.9554847710306453, + "learning_rate": 8.462031530226211e-07, + "loss": 0.174, + "step": 18528 + }, + { + "epoch": 2.616351313188365, + "grad_norm": 2.9066118526979503, + "learning_rate": 8.455895273660808e-07, + "loss": 0.135, + "step": 18529 + }, + { + "epoch": 2.6164925162383508, + "grad_norm": 2.7462574457926148, + "learning_rate": 8.449761144540869e-07, + "loss": 0.0945, + "step": 18530 + }, + { + "epoch": 2.6166337192883367, + "grad_norm": 2.5041610332760493, + "learning_rate": 8.443629143008946e-07, + "loss": 0.0991, + "step": 18531 + }, + { + "epoch": 2.6167749223383225, + "grad_norm": 3.6957642545362277, + "learning_rate": 8.437499269207538e-07, + "loss": 0.1534, + "step": 18532 + }, + { + "epoch": 2.6169161253883084, + "grad_norm": 3.591065504391603, + "learning_rate": 8.431371523279108e-07, + "loss": 0.1651, + "step": 18533 + }, + { + "epoch": 2.6170573284382943, + "grad_norm": 2.6240083356241497, + "learning_rate": 8.425245905366052e-07, + "loss": 0.0831, + "step": 18534 + }, + { + "epoch": 2.61719853148828, + "grad_norm": 3.1574352043571543, + "learning_rate": 8.419122415610736e-07, + "loss": 0.1569, + "step": 18535 + }, + { + "epoch": 2.617339734538266, + "grad_norm": 3.472848527420189, + "learning_rate": 8.413001054155467e-07, + "loss": 0.1488, + "step": 18536 + }, + { + "epoch": 2.617480937588252, + "grad_norm": 5.120196302408893, + "learning_rate": 8.406881821142477e-07, + "loss": 0.2207, + "step": 18537 + }, + { + "epoch": 2.617622140638238, + "grad_norm": 3.358273129573705, + "learning_rate": 8.400764716714016e-07, + "loss": 0.1524, + "step": 18538 + }, + { + "epoch": 2.6177633436882237, + "grad_norm": 2.6259012009595146, + "learning_rate": 8.394649741012251e-07, + "loss": 0.1352, + "step": 18539 + }, + { + "epoch": 2.6179045467382096, + "grad_norm": 3.91703804465611, + "learning_rate": 8.388536894179234e-07, + "loss": 0.213, + "step": 18540 + }, + { + "epoch": 2.6180457497881955, + "grad_norm": 3.206796063010781, + "learning_rate": 8.382426176357062e-07, + "loss": 0.1743, + "step": 18541 + }, + { + "epoch": 2.6181869528381814, + "grad_norm": 2.8297321351329954, + "learning_rate": 8.376317587687721e-07, + "loss": 0.1285, + "step": 18542 + }, + { + "epoch": 2.6183281558881673, + "grad_norm": 2.9457023988103366, + "learning_rate": 8.37021112831321e-07, + "loss": 0.1175, + "step": 18543 + }, + { + "epoch": 2.618469358938153, + "grad_norm": 2.818773920446437, + "learning_rate": 8.364106798375416e-07, + "loss": 0.1333, + "step": 18544 + }, + { + "epoch": 2.618610561988139, + "grad_norm": 3.163273668877417, + "learning_rate": 8.358004598016212e-07, + "loss": 0.1386, + "step": 18545 + }, + { + "epoch": 2.618751765038125, + "grad_norm": 3.4624315569820365, + "learning_rate": 8.351904527377397e-07, + "loss": 0.1379, + "step": 18546 + }, + { + "epoch": 2.618892968088111, + "grad_norm": 3.070519560619336, + "learning_rate": 8.345806586600736e-07, + "loss": 0.1228, + "step": 18547 + }, + { + "epoch": 2.6190341711380967, + "grad_norm": 3.4559405679045265, + "learning_rate": 8.339710775827958e-07, + "loss": 0.1507, + "step": 18548 + }, + { + "epoch": 2.6191753741880826, + "grad_norm": 3.7790037710013156, + "learning_rate": 8.333617095200719e-07, + "loss": 0.1707, + "step": 18549 + }, + { + "epoch": 2.6193165772380684, + "grad_norm": 2.978509378372071, + "learning_rate": 8.327525544860626e-07, + "loss": 0.1397, + "step": 18550 + }, + { + "epoch": 2.6194577802880543, + "grad_norm": 3.226123724845677, + "learning_rate": 8.321436124949245e-07, + "loss": 0.1403, + "step": 18551 + }, + { + "epoch": 2.61959898333804, + "grad_norm": 3.0090719787076026, + "learning_rate": 8.315348835608095e-07, + "loss": 0.1068, + "step": 18552 + }, + { + "epoch": 2.619740186388026, + "grad_norm": 3.482269879969656, + "learning_rate": 8.309263676978651e-07, + "loss": 0.1497, + "step": 18553 + }, + { + "epoch": 2.619881389438012, + "grad_norm": 3.979639722052874, + "learning_rate": 8.303180649202303e-07, + "loss": 0.1735, + "step": 18554 + }, + { + "epoch": 2.620022592487998, + "grad_norm": 4.156237673347555, + "learning_rate": 8.297099752420446e-07, + "loss": 0.21, + "step": 18555 + }, + { + "epoch": 2.6201637955379837, + "grad_norm": 3.239971543062821, + "learning_rate": 8.291020986774412e-07, + "loss": 0.1419, + "step": 18556 + }, + { + "epoch": 2.6203049985879696, + "grad_norm": 2.977123413015261, + "learning_rate": 8.284944352405421e-07, + "loss": 0.1382, + "step": 18557 + }, + { + "epoch": 2.6204462016379555, + "grad_norm": 3.219361151237995, + "learning_rate": 8.278869849454718e-07, + "loss": 0.1359, + "step": 18558 + }, + { + "epoch": 2.6205874046879414, + "grad_norm": 2.9722790252010176, + "learning_rate": 8.272797478063444e-07, + "loss": 0.0924, + "step": 18559 + }, + { + "epoch": 2.6207286077379273, + "grad_norm": 2.6638093870443122, + "learning_rate": 8.266727238372763e-07, + "loss": 0.1027, + "step": 18560 + }, + { + "epoch": 2.620869810787913, + "grad_norm": 3.578980251017331, + "learning_rate": 8.260659130523729e-07, + "loss": 0.2191, + "step": 18561 + }, + { + "epoch": 2.621011013837899, + "grad_norm": 3.8795159145237905, + "learning_rate": 8.254593154657353e-07, + "loss": 0.1779, + "step": 18562 + }, + { + "epoch": 2.621152216887885, + "grad_norm": 2.9033614989322056, + "learning_rate": 8.248529310914622e-07, + "loss": 0.1386, + "step": 18563 + }, + { + "epoch": 2.621293419937871, + "grad_norm": 3.233262525916338, + "learning_rate": 8.242467599436432e-07, + "loss": 0.1535, + "step": 18564 + }, + { + "epoch": 2.6214346229878567, + "grad_norm": 2.536726968121742, + "learning_rate": 8.236408020363673e-07, + "loss": 0.123, + "step": 18565 + }, + { + "epoch": 2.6215758260378426, + "grad_norm": 2.758714654726664, + "learning_rate": 8.230350573837165e-07, + "loss": 0.1249, + "step": 18566 + }, + { + "epoch": 2.6217170290878284, + "grad_norm": 2.7968602363188553, + "learning_rate": 8.224295259997672e-07, + "loss": 0.1389, + "step": 18567 + }, + { + "epoch": 2.6218582321378143, + "grad_norm": 3.66987139141581, + "learning_rate": 8.218242078985917e-07, + "loss": 0.16, + "step": 18568 + }, + { + "epoch": 2.6219994351878, + "grad_norm": 3.479434860609257, + "learning_rate": 8.212191030942585e-07, + "loss": 0.1843, + "step": 18569 + }, + { + "epoch": 2.622140638237786, + "grad_norm": 3.276790297920599, + "learning_rate": 8.206142116008298e-07, + "loss": 0.141, + "step": 18570 + }, + { + "epoch": 2.622281841287772, + "grad_norm": 2.9559581350602713, + "learning_rate": 8.20009533432361e-07, + "loss": 0.1186, + "step": 18571 + }, + { + "epoch": 2.622423044337758, + "grad_norm": 3.431912455833615, + "learning_rate": 8.194050686029065e-07, + "loss": 0.1457, + "step": 18572 + }, + { + "epoch": 2.6225642473877437, + "grad_norm": 3.381263864116239, + "learning_rate": 8.18800817126516e-07, + "loss": 0.1614, + "step": 18573 + }, + { + "epoch": 2.6227054504377296, + "grad_norm": 3.615609036331113, + "learning_rate": 8.181967790172274e-07, + "loss": 0.1747, + "step": 18574 + }, + { + "epoch": 2.6228466534877155, + "grad_norm": 3.0238749172850574, + "learning_rate": 8.175929542890804e-07, + "loss": 0.1325, + "step": 18575 + }, + { + "epoch": 2.6229878565377014, + "grad_norm": 3.254156174912789, + "learning_rate": 8.16989342956106e-07, + "loss": 0.1847, + "step": 18576 + }, + { + "epoch": 2.6231290595876873, + "grad_norm": 2.8594496496045627, + "learning_rate": 8.163859450323352e-07, + "loss": 0.0957, + "step": 18577 + }, + { + "epoch": 2.623270262637673, + "grad_norm": 3.2082068641768267, + "learning_rate": 8.157827605317892e-07, + "loss": 0.1535, + "step": 18578 + }, + { + "epoch": 2.623411465687659, + "grad_norm": 3.275585560640901, + "learning_rate": 8.151797894684855e-07, + "loss": 0.1821, + "step": 18579 + }, + { + "epoch": 2.623552668737645, + "grad_norm": 2.4903124203877898, + "learning_rate": 8.145770318564361e-07, + "loss": 0.0868, + "step": 18580 + }, + { + "epoch": 2.623693871787631, + "grad_norm": 3.212897743169331, + "learning_rate": 8.139744877096501e-07, + "loss": 0.153, + "step": 18581 + }, + { + "epoch": 2.6238350748376167, + "grad_norm": 3.737551157070766, + "learning_rate": 8.133721570421305e-07, + "loss": 0.1569, + "step": 18582 + }, + { + "epoch": 2.6239762778876026, + "grad_norm": 2.7148767719416367, + "learning_rate": 8.127700398678728e-07, + "loss": 0.1077, + "step": 18583 + }, + { + "epoch": 2.6241174809375885, + "grad_norm": 4.078864081976734, + "learning_rate": 8.121681362008737e-07, + "loss": 0.2019, + "step": 18584 + }, + { + "epoch": 2.6242586839875743, + "grad_norm": 3.601278645883798, + "learning_rate": 8.115664460551176e-07, + "loss": 0.1376, + "step": 18585 + }, + { + "epoch": 2.62439988703756, + "grad_norm": 3.1338338331331284, + "learning_rate": 8.109649694445898e-07, + "loss": 0.148, + "step": 18586 + }, + { + "epoch": 2.624541090087546, + "grad_norm": 3.570285763155862, + "learning_rate": 8.103637063832681e-07, + "loss": 0.1473, + "step": 18587 + }, + { + "epoch": 2.624682293137532, + "grad_norm": 2.9199389773191715, + "learning_rate": 8.097626568851224e-07, + "loss": 0.1553, + "step": 18588 + }, + { + "epoch": 2.624823496187518, + "grad_norm": 3.406069107733788, + "learning_rate": 8.09161820964126e-07, + "loss": 0.1713, + "step": 18589 + }, + { + "epoch": 2.6249646992375038, + "grad_norm": 3.302911973130976, + "learning_rate": 8.085611986342423e-07, + "loss": 0.1586, + "step": 18590 + }, + { + "epoch": 2.6251059022874896, + "grad_norm": 3.9790853663164643, + "learning_rate": 8.079607899094233e-07, + "loss": 0.2155, + "step": 18591 + }, + { + "epoch": 2.6252471053374755, + "grad_norm": 3.411352472129503, + "learning_rate": 8.073605948036267e-07, + "loss": 0.1436, + "step": 18592 + }, + { + "epoch": 2.6253883083874614, + "grad_norm": 3.2311082206538217, + "learning_rate": 8.067606133307981e-07, + "loss": 0.1286, + "step": 18593 + }, + { + "epoch": 2.6255295114374473, + "grad_norm": 3.2993853586225845, + "learning_rate": 8.061608455048841e-07, + "loss": 0.1504, + "step": 18594 + }, + { + "epoch": 2.625670714487433, + "grad_norm": 4.471999599331549, + "learning_rate": 8.055612913398226e-07, + "loss": 0.2412, + "step": 18595 + }, + { + "epoch": 2.625811917537419, + "grad_norm": 2.870247639407375, + "learning_rate": 8.049619508495454e-07, + "loss": 0.1366, + "step": 18596 + }, + { + "epoch": 2.625953120587405, + "grad_norm": 2.9492708095911855, + "learning_rate": 8.043628240479806e-07, + "loss": 0.1151, + "step": 18597 + }, + { + "epoch": 2.626094323637391, + "grad_norm": 3.4433865576111966, + "learning_rate": 8.037639109490524e-07, + "loss": 0.1712, + "step": 18598 + }, + { + "epoch": 2.6262355266873767, + "grad_norm": 2.582860090478288, + "learning_rate": 8.03165211566681e-07, + "loss": 0.1455, + "step": 18599 + }, + { + "epoch": 2.6263767297373626, + "grad_norm": 3.309675267075451, + "learning_rate": 8.025667259147773e-07, + "loss": 0.1233, + "step": 18600 + }, + { + "epoch": 2.6265179327873485, + "grad_norm": 2.8408444592545146, + "learning_rate": 8.019684540072503e-07, + "loss": 0.1283, + "step": 18601 + }, + { + "epoch": 2.6266591358373343, + "grad_norm": 2.8907048939162685, + "learning_rate": 8.013703958580044e-07, + "loss": 0.1537, + "step": 18602 + }, + { + "epoch": 2.6268003388873202, + "grad_norm": 2.6250776455997777, + "learning_rate": 8.007725514809384e-07, + "loss": 0.1294, + "step": 18603 + }, + { + "epoch": 2.626941541937306, + "grad_norm": 3.4953752357274404, + "learning_rate": 8.001749208899445e-07, + "loss": 0.1194, + "step": 18604 + }, + { + "epoch": 2.627082744987292, + "grad_norm": 2.7664118571509775, + "learning_rate": 7.995775040989118e-07, + "loss": 0.1312, + "step": 18605 + }, + { + "epoch": 2.627223948037278, + "grad_norm": 2.6355052276358757, + "learning_rate": 7.989803011217256e-07, + "loss": 0.1415, + "step": 18606 + }, + { + "epoch": 2.6273651510872638, + "grad_norm": 3.2279794991620046, + "learning_rate": 7.98383311972265e-07, + "loss": 0.1391, + "step": 18607 + }, + { + "epoch": 2.627506354137249, + "grad_norm": 2.8667038568317884, + "learning_rate": 7.977865366644011e-07, + "loss": 0.133, + "step": 18608 + }, + { + "epoch": 2.627647557187235, + "grad_norm": 3.377232113929168, + "learning_rate": 7.97189975212005e-07, + "loss": 0.145, + "step": 18609 + }, + { + "epoch": 2.627788760237221, + "grad_norm": 3.5200425137646936, + "learning_rate": 7.965936276289366e-07, + "loss": 0.1945, + "step": 18610 + }, + { + "epoch": 2.627929963287207, + "grad_norm": 4.189134215480756, + "learning_rate": 7.959974939290593e-07, + "loss": 0.1854, + "step": 18611 + }, + { + "epoch": 2.6280711663371927, + "grad_norm": 2.4454489038190594, + "learning_rate": 7.954015741262255e-07, + "loss": 0.1119, + "step": 18612 + }, + { + "epoch": 2.6282123693871786, + "grad_norm": 2.5690779391069065, + "learning_rate": 7.94805868234284e-07, + "loss": 0.1183, + "step": 18613 + }, + { + "epoch": 2.6283535724371645, + "grad_norm": 2.886270456390757, + "learning_rate": 7.942103762670783e-07, + "loss": 0.1099, + "step": 18614 + }, + { + "epoch": 2.6284947754871504, + "grad_norm": 3.2508145471254704, + "learning_rate": 7.936150982384495e-07, + "loss": 0.1522, + "step": 18615 + }, + { + "epoch": 2.6286359785371363, + "grad_norm": 2.9614887135061867, + "learning_rate": 7.930200341622274e-07, + "loss": 0.1432, + "step": 18616 + }, + { + "epoch": 2.628777181587122, + "grad_norm": 3.055715332925409, + "learning_rate": 7.924251840522446e-07, + "loss": 0.1055, + "step": 18617 + }, + { + "epoch": 2.628918384637108, + "grad_norm": 4.035306041012885, + "learning_rate": 7.918305479223243e-07, + "loss": 0.1893, + "step": 18618 + }, + { + "epoch": 2.629059587687094, + "grad_norm": 3.3567633726863755, + "learning_rate": 7.912361257862844e-07, + "loss": 0.1274, + "step": 18619 + }, + { + "epoch": 2.62920079073708, + "grad_norm": 3.089284125387942, + "learning_rate": 7.906419176579416e-07, + "loss": 0.1354, + "step": 18620 + }, + { + "epoch": 2.6293419937870657, + "grad_norm": 3.0205798873461003, + "learning_rate": 7.900479235511016e-07, + "loss": 0.1094, + "step": 18621 + }, + { + "epoch": 2.6294831968370516, + "grad_norm": 2.6684725865561227, + "learning_rate": 7.8945414347957e-07, + "loss": 0.1281, + "step": 18622 + }, + { + "epoch": 2.6296243998870374, + "grad_norm": 2.8130759633673987, + "learning_rate": 7.888605774571478e-07, + "loss": 0.115, + "step": 18623 + }, + { + "epoch": 2.6297656029370233, + "grad_norm": 3.0461499952144053, + "learning_rate": 7.882672254976298e-07, + "loss": 0.1403, + "step": 18624 + }, + { + "epoch": 2.629906805987009, + "grad_norm": 2.8182833831602196, + "learning_rate": 7.876740876148015e-07, + "loss": 0.1032, + "step": 18625 + }, + { + "epoch": 2.630048009036995, + "grad_norm": 3.2974311084349046, + "learning_rate": 7.870811638224485e-07, + "loss": 0.1677, + "step": 18626 + }, + { + "epoch": 2.630189212086981, + "grad_norm": 3.1527869981286143, + "learning_rate": 7.864884541343499e-07, + "loss": 0.1391, + "step": 18627 + }, + { + "epoch": 2.630330415136967, + "grad_norm": 2.777433668904338, + "learning_rate": 7.8589595856428e-07, + "loss": 0.1555, + "step": 18628 + }, + { + "epoch": 2.6304716181869527, + "grad_norm": 2.503295163868392, + "learning_rate": 7.853036771260103e-07, + "loss": 0.1255, + "step": 18629 + }, + { + "epoch": 2.6306128212369386, + "grad_norm": 3.0160741661427473, + "learning_rate": 7.847116098333029e-07, + "loss": 0.1691, + "step": 18630 + }, + { + "epoch": 2.6307540242869245, + "grad_norm": 3.04272543787079, + "learning_rate": 7.841197566999182e-07, + "loss": 0.1055, + "step": 18631 + }, + { + "epoch": 2.6308952273369104, + "grad_norm": 3.184722222467459, + "learning_rate": 7.835281177396126e-07, + "loss": 0.1397, + "step": 18632 + }, + { + "epoch": 2.6310364303868963, + "grad_norm": 3.6975111967257304, + "learning_rate": 7.829366929661298e-07, + "loss": 0.1703, + "step": 18633 + }, + { + "epoch": 2.631177633436882, + "grad_norm": 3.526690466516146, + "learning_rate": 7.823454823932186e-07, + "loss": 0.1609, + "step": 18634 + }, + { + "epoch": 2.631318836486868, + "grad_norm": 3.348851558042337, + "learning_rate": 7.817544860346183e-07, + "loss": 0.147, + "step": 18635 + }, + { + "epoch": 2.631460039536854, + "grad_norm": 2.7411656110871894, + "learning_rate": 7.811637039040621e-07, + "loss": 0.1238, + "step": 18636 + }, + { + "epoch": 2.63160124258684, + "grad_norm": 3.097704537002722, + "learning_rate": 7.805731360152802e-07, + "loss": 0.1548, + "step": 18637 + }, + { + "epoch": 2.6317424456368257, + "grad_norm": 3.49136516806522, + "learning_rate": 7.799827823819972e-07, + "loss": 0.1655, + "step": 18638 + }, + { + "epoch": 2.6318836486868116, + "grad_norm": 3.3325888316391237, + "learning_rate": 7.793926430179333e-07, + "loss": 0.1557, + "step": 18639 + }, + { + "epoch": 2.6320248517367975, + "grad_norm": 2.638931240401629, + "learning_rate": 7.788027179367997e-07, + "loss": 0.1397, + "step": 18640 + }, + { + "epoch": 2.6321660547867833, + "grad_norm": 3.276240878519179, + "learning_rate": 7.78213007152312e-07, + "loss": 0.152, + "step": 18641 + }, + { + "epoch": 2.632307257836769, + "grad_norm": 3.5781330152463537, + "learning_rate": 7.776235106781704e-07, + "loss": 0.191, + "step": 18642 + }, + { + "epoch": 2.632448460886755, + "grad_norm": 3.0147325349507192, + "learning_rate": 7.770342285280752e-07, + "loss": 0.1683, + "step": 18643 + }, + { + "epoch": 2.632589663936741, + "grad_norm": 3.1681859674887307, + "learning_rate": 7.764451607157208e-07, + "loss": 0.1123, + "step": 18644 + }, + { + "epoch": 2.632730866986727, + "grad_norm": 2.6491575201766366, + "learning_rate": 7.758563072547965e-07, + "loss": 0.1351, + "step": 18645 + }, + { + "epoch": 2.6328720700367128, + "grad_norm": 3.0698317813218203, + "learning_rate": 7.752676681589899e-07, + "loss": 0.147, + "step": 18646 + }, + { + "epoch": 2.6330132730866986, + "grad_norm": 3.8233838379955993, + "learning_rate": 7.74679243441978e-07, + "loss": 0.1562, + "step": 18647 + }, + { + "epoch": 2.6331544761366845, + "grad_norm": 2.71688171973595, + "learning_rate": 7.740910331174378e-07, + "loss": 0.1275, + "step": 18648 + }, + { + "epoch": 2.6332956791866704, + "grad_norm": 3.0486793894334316, + "learning_rate": 7.735030371990382e-07, + "loss": 0.1149, + "step": 18649 + }, + { + "epoch": 2.6334368822366563, + "grad_norm": 3.2676697419097214, + "learning_rate": 7.729152557004405e-07, + "loss": 0.1237, + "step": 18650 + }, + { + "epoch": 2.633578085286642, + "grad_norm": 2.875871611916708, + "learning_rate": 7.723276886353081e-07, + "loss": 0.1239, + "step": 18651 + }, + { + "epoch": 2.633719288336628, + "grad_norm": 2.9149887072580416, + "learning_rate": 7.717403360172959e-07, + "loss": 0.0993, + "step": 18652 + }, + { + "epoch": 2.633860491386614, + "grad_norm": 3.541140948234796, + "learning_rate": 7.711531978600529e-07, + "loss": 0.1412, + "step": 18653 + }, + { + "epoch": 2.6340016944366, + "grad_norm": 3.1880139286813476, + "learning_rate": 7.705662741772235e-07, + "loss": 0.1397, + "step": 18654 + }, + { + "epoch": 2.6341428974865857, + "grad_norm": 4.291653403972578, + "learning_rate": 7.699795649824493e-07, + "loss": 0.226, + "step": 18655 + }, + { + "epoch": 2.6342841005365716, + "grad_norm": 3.621784990766145, + "learning_rate": 7.693930702893626e-07, + "loss": 0.1578, + "step": 18656 + }, + { + "epoch": 2.6344253035865575, + "grad_norm": 3.703705689813216, + "learning_rate": 7.688067901115926e-07, + "loss": 0.1608, + "step": 18657 + }, + { + "epoch": 2.6345665066365433, + "grad_norm": 2.7629488982814165, + "learning_rate": 7.682207244627704e-07, + "loss": 0.0965, + "step": 18658 + }, + { + "epoch": 2.6347077096865292, + "grad_norm": 3.6702579555758303, + "learning_rate": 7.676348733565098e-07, + "loss": 0.1669, + "step": 18659 + }, + { + "epoch": 2.634848912736515, + "grad_norm": 3.3758262466904285, + "learning_rate": 7.670492368064275e-07, + "loss": 0.1523, + "step": 18660 + }, + { + "epoch": 2.634990115786501, + "grad_norm": 4.524352119979482, + "learning_rate": 7.664638148261339e-07, + "loss": 0.1763, + "step": 18661 + }, + { + "epoch": 2.635131318836487, + "grad_norm": 3.0605161871581563, + "learning_rate": 7.658786074292312e-07, + "loss": 0.1452, + "step": 18662 + }, + { + "epoch": 2.6352725218864728, + "grad_norm": 3.4603724898501613, + "learning_rate": 7.652936146293244e-07, + "loss": 0.1361, + "step": 18663 + }, + { + "epoch": 2.6354137249364586, + "grad_norm": 3.244552748541896, + "learning_rate": 7.647088364400046e-07, + "loss": 0.1642, + "step": 18664 + }, + { + "epoch": 2.6355549279864445, + "grad_norm": 3.6544719469243376, + "learning_rate": 7.641242728748632e-07, + "loss": 0.1655, + "step": 18665 + }, + { + "epoch": 2.6356961310364304, + "grad_norm": 3.271614582268736, + "learning_rate": 7.635399239474872e-07, + "loss": 0.1325, + "step": 18666 + }, + { + "epoch": 2.6358373340864163, + "grad_norm": 4.073323714110204, + "learning_rate": 7.629557896714512e-07, + "loss": 0.1211, + "step": 18667 + }, + { + "epoch": 2.635978537136402, + "grad_norm": 2.294860807091073, + "learning_rate": 7.623718700603356e-07, + "loss": 0.0948, + "step": 18668 + }, + { + "epoch": 2.636119740186388, + "grad_norm": 3.235620071850725, + "learning_rate": 7.617881651277071e-07, + "loss": 0.1345, + "step": 18669 + }, + { + "epoch": 2.636260943236374, + "grad_norm": 4.153366230534762, + "learning_rate": 7.612046748871327e-07, + "loss": 0.2007, + "step": 18670 + }, + { + "epoch": 2.63640214628636, + "grad_norm": 3.035151981858048, + "learning_rate": 7.606213993521716e-07, + "loss": 0.1447, + "step": 18671 + }, + { + "epoch": 2.6365433493363457, + "grad_norm": 2.8541704585044787, + "learning_rate": 7.600383385363797e-07, + "loss": 0.1238, + "step": 18672 + }, + { + "epoch": 2.6366845523863316, + "grad_norm": 3.8217958475483944, + "learning_rate": 7.594554924533048e-07, + "loss": 0.139, + "step": 18673 + }, + { + "epoch": 2.6368257554363175, + "grad_norm": 3.8757053772792784, + "learning_rate": 7.58872861116493e-07, + "loss": 0.1824, + "step": 18674 + }, + { + "epoch": 2.6369669584863034, + "grad_norm": 3.01391636814118, + "learning_rate": 7.582904445394878e-07, + "loss": 0.1413, + "step": 18675 + }, + { + "epoch": 2.6371081615362892, + "grad_norm": 3.234067759763652, + "learning_rate": 7.577082427358207e-07, + "loss": 0.1529, + "step": 18676 + }, + { + "epoch": 2.637249364586275, + "grad_norm": 3.0061096325972225, + "learning_rate": 7.571262557190218e-07, + "loss": 0.1329, + "step": 18677 + }, + { + "epoch": 2.637390567636261, + "grad_norm": 3.3705180547855362, + "learning_rate": 7.56544483502617e-07, + "loss": 0.1245, + "step": 18678 + }, + { + "epoch": 2.637531770686247, + "grad_norm": 4.049016333376115, + "learning_rate": 7.559629261001256e-07, + "loss": 0.1922, + "step": 18679 + }, + { + "epoch": 2.6376729737362328, + "grad_norm": 2.758296803388759, + "learning_rate": 7.553815835250644e-07, + "loss": 0.1235, + "step": 18680 + }, + { + "epoch": 2.6378141767862187, + "grad_norm": 3.139196930863653, + "learning_rate": 7.548004557909428e-07, + "loss": 0.1195, + "step": 18681 + }, + { + "epoch": 2.6379553798362045, + "grad_norm": 2.8262638131869284, + "learning_rate": 7.542195429112664e-07, + "loss": 0.1313, + "step": 18682 + }, + { + "epoch": 2.6380965828861904, + "grad_norm": 3.122881371417505, + "learning_rate": 7.536388448995357e-07, + "loss": 0.1284, + "step": 18683 + }, + { + "epoch": 2.6382377859361763, + "grad_norm": 3.001000157990892, + "learning_rate": 7.530583617692433e-07, + "loss": 0.1245, + "step": 18684 + }, + { + "epoch": 2.638378988986162, + "grad_norm": 3.2129902242135207, + "learning_rate": 7.524780935338815e-07, + "loss": 0.1311, + "step": 18685 + }, + { + "epoch": 2.638520192036148, + "grad_norm": 2.948115075048922, + "learning_rate": 7.518980402069354e-07, + "loss": 0.1333, + "step": 18686 + }, + { + "epoch": 2.638661395086134, + "grad_norm": 3.4492105783846267, + "learning_rate": 7.51318201801885e-07, + "loss": 0.1401, + "step": 18687 + }, + { + "epoch": 2.63880259813612, + "grad_norm": 2.8797020921755694, + "learning_rate": 7.507385783322052e-07, + "loss": 0.1236, + "step": 18688 + }, + { + "epoch": 2.6389438011861057, + "grad_norm": 2.927318628139526, + "learning_rate": 7.501591698113663e-07, + "loss": 0.143, + "step": 18689 + }, + { + "epoch": 2.6390850042360916, + "grad_norm": 2.5952464990903423, + "learning_rate": 7.495799762528333e-07, + "loss": 0.1355, + "step": 18690 + }, + { + "epoch": 2.6392262072860775, + "grad_norm": 3.3220776667776795, + "learning_rate": 7.490009976700663e-07, + "loss": 0.1554, + "step": 18691 + }, + { + "epoch": 2.6393674103360634, + "grad_norm": 2.9322310803915803, + "learning_rate": 7.484222340765235e-07, + "loss": 0.1454, + "step": 18692 + }, + { + "epoch": 2.6395086133860493, + "grad_norm": 3.018574905771618, + "learning_rate": 7.478436854856508e-07, + "loss": 0.1306, + "step": 18693 + }, + { + "epoch": 2.639649816436035, + "grad_norm": 3.3052282531676345, + "learning_rate": 7.472653519108952e-07, + "loss": 0.1636, + "step": 18694 + }, + { + "epoch": 2.639791019486021, + "grad_norm": 2.9666400963726676, + "learning_rate": 7.46687233365696e-07, + "loss": 0.1386, + "step": 18695 + }, + { + "epoch": 2.639932222536007, + "grad_norm": 3.801878151311475, + "learning_rate": 7.461093298634892e-07, + "loss": 0.2027, + "step": 18696 + }, + { + "epoch": 2.640073425585993, + "grad_norm": 3.1906617764152747, + "learning_rate": 7.45531641417706e-07, + "loss": 0.1541, + "step": 18697 + }, + { + "epoch": 2.6402146286359787, + "grad_norm": 3.7357605767319253, + "learning_rate": 7.449541680417704e-07, + "loss": 0.1429, + "step": 18698 + }, + { + "epoch": 2.6403558316859645, + "grad_norm": 2.9604219289589855, + "learning_rate": 7.443769097491038e-07, + "loss": 0.1461, + "step": 18699 + }, + { + "epoch": 2.6404970347359504, + "grad_norm": 3.0468322855327648, + "learning_rate": 7.437998665531221e-07, + "loss": 0.1449, + "step": 18700 + }, + { + "epoch": 2.6406382377859363, + "grad_norm": 2.7944765880101317, + "learning_rate": 7.432230384672301e-07, + "loss": 0.1277, + "step": 18701 + }, + { + "epoch": 2.640779440835922, + "grad_norm": 3.71133128915966, + "learning_rate": 7.426464255048393e-07, + "loss": 0.1556, + "step": 18702 + }, + { + "epoch": 2.640920643885908, + "grad_norm": 3.652316824497758, + "learning_rate": 7.420700276793469e-07, + "loss": 0.1858, + "step": 18703 + }, + { + "epoch": 2.641061846935894, + "grad_norm": 4.023334924397379, + "learning_rate": 7.414938450041497e-07, + "loss": 0.2379, + "step": 18704 + }, + { + "epoch": 2.64120304998588, + "grad_norm": 4.11634175597947, + "learning_rate": 7.409178774926373e-07, + "loss": 0.2092, + "step": 18705 + }, + { + "epoch": 2.6413442530358657, + "grad_norm": 3.2953660839349816, + "learning_rate": 7.403421251581933e-07, + "loss": 0.1356, + "step": 18706 + }, + { + "epoch": 2.6414854560858516, + "grad_norm": 2.7624465375288394, + "learning_rate": 7.397665880142013e-07, + "loss": 0.1239, + "step": 18707 + }, + { + "epoch": 2.6416266591358375, + "grad_norm": 3.1133789090746924, + "learning_rate": 7.391912660740319e-07, + "loss": 0.1493, + "step": 18708 + }, + { + "epoch": 2.6417678621858234, + "grad_norm": 3.3146218826250577, + "learning_rate": 7.38616159351061e-07, + "loss": 0.1651, + "step": 18709 + }, + { + "epoch": 2.641909065235809, + "grad_norm": 3.1388506257116666, + "learning_rate": 7.38041267858649e-07, + "loss": 0.1612, + "step": 18710 + }, + { + "epoch": 2.6420502682857947, + "grad_norm": 3.679264910469496, + "learning_rate": 7.374665916101587e-07, + "loss": 0.2001, + "step": 18711 + }, + { + "epoch": 2.6421914713357806, + "grad_norm": 3.2704729571303597, + "learning_rate": 7.368921306189447e-07, + "loss": 0.1565, + "step": 18712 + }, + { + "epoch": 2.6423326743857665, + "grad_norm": 3.0030532113750055, + "learning_rate": 7.363178848983554e-07, + "loss": 0.1054, + "step": 18713 + }, + { + "epoch": 2.6424738774357523, + "grad_norm": 2.964196222520876, + "learning_rate": 7.35743854461739e-07, + "loss": 0.1489, + "step": 18714 + }, + { + "epoch": 2.6426150804857382, + "grad_norm": 3.352544267954871, + "learning_rate": 7.351700393224359e-07, + "loss": 0.1427, + "step": 18715 + }, + { + "epoch": 2.642756283535724, + "grad_norm": 3.1995846251288547, + "learning_rate": 7.345964394937788e-07, + "loss": 0.1245, + "step": 18716 + }, + { + "epoch": 2.64289748658571, + "grad_norm": 3.449750253477038, + "learning_rate": 7.340230549891003e-07, + "loss": 0.1546, + "step": 18717 + }, + { + "epoch": 2.643038689635696, + "grad_norm": 3.896587030848445, + "learning_rate": 7.334498858217231e-07, + "loss": 0.1817, + "step": 18718 + }, + { + "epoch": 2.6431798926856818, + "grad_norm": 3.492929173224557, + "learning_rate": 7.328769320049667e-07, + "loss": 0.1221, + "step": 18719 + }, + { + "epoch": 2.6433210957356676, + "grad_norm": 2.617408529529238, + "learning_rate": 7.323041935521502e-07, + "loss": 0.1285, + "step": 18720 + }, + { + "epoch": 2.6434622987856535, + "grad_norm": 3.3750892500377994, + "learning_rate": 7.317316704765821e-07, + "loss": 0.1364, + "step": 18721 + }, + { + "epoch": 2.6436035018356394, + "grad_norm": 3.9786225955747603, + "learning_rate": 7.31159362791567e-07, + "loss": 0.1829, + "step": 18722 + }, + { + "epoch": 2.6437447048856253, + "grad_norm": 2.8664840229309227, + "learning_rate": 7.305872705104056e-07, + "loss": 0.137, + "step": 18723 + }, + { + "epoch": 2.643885907935611, + "grad_norm": 3.3865222213052064, + "learning_rate": 7.300153936463927e-07, + "loss": 0.1375, + "step": 18724 + }, + { + "epoch": 2.644027110985597, + "grad_norm": 3.1636126974209287, + "learning_rate": 7.294437322128167e-07, + "loss": 0.1504, + "step": 18725 + }, + { + "epoch": 2.644168314035583, + "grad_norm": 2.782094870821125, + "learning_rate": 7.288722862229691e-07, + "loss": 0.146, + "step": 18726 + }, + { + "epoch": 2.644309517085569, + "grad_norm": 3.237902146498894, + "learning_rate": 7.283010556901226e-07, + "loss": 0.1536, + "step": 18727 + }, + { + "epoch": 2.6444507201355547, + "grad_norm": 3.049622831942678, + "learning_rate": 7.277300406275567e-07, + "loss": 0.1319, + "step": 18728 + }, + { + "epoch": 2.6445919231855406, + "grad_norm": 3.127561957577059, + "learning_rate": 7.271592410485395e-07, + "loss": 0.1376, + "step": 18729 + }, + { + "epoch": 2.6447331262355265, + "grad_norm": 2.6758912592120256, + "learning_rate": 7.265886569663372e-07, + "loss": 0.1046, + "step": 18730 + }, + { + "epoch": 2.6448743292855124, + "grad_norm": 3.372172642875817, + "learning_rate": 7.260182883942079e-07, + "loss": 0.1598, + "step": 18731 + }, + { + "epoch": 2.6450155323354982, + "grad_norm": 4.600696955515216, + "learning_rate": 7.254481353454102e-07, + "loss": 0.205, + "step": 18732 + }, + { + "epoch": 2.645156735385484, + "grad_norm": 3.5461829944571974, + "learning_rate": 7.24878197833192e-07, + "loss": 0.1845, + "step": 18733 + }, + { + "epoch": 2.64529793843547, + "grad_norm": 3.5443039655612285, + "learning_rate": 7.243084758708007e-07, + "loss": 0.1481, + "step": 18734 + }, + { + "epoch": 2.645439141485456, + "grad_norm": 3.6523319508643586, + "learning_rate": 7.237389694714736e-07, + "loss": 0.147, + "step": 18735 + }, + { + "epoch": 2.6455803445354418, + "grad_norm": 3.1278368349043135, + "learning_rate": 7.231696786484443e-07, + "loss": 0.1598, + "step": 18736 + }, + { + "epoch": 2.6457215475854277, + "grad_norm": 4.867884154661913, + "learning_rate": 7.226006034149469e-07, + "loss": 0.1822, + "step": 18737 + }, + { + "epoch": 2.6458627506354135, + "grad_norm": 3.1656492235080256, + "learning_rate": 7.22031743784205e-07, + "loss": 0.152, + "step": 18738 + }, + { + "epoch": 2.6460039536853994, + "grad_norm": 3.1778224839121854, + "learning_rate": 7.214630997694394e-07, + "loss": 0.1393, + "step": 18739 + }, + { + "epoch": 2.6461451567353853, + "grad_norm": 3.1432656046170924, + "learning_rate": 7.208946713838638e-07, + "loss": 0.13, + "step": 18740 + }, + { + "epoch": 2.646286359785371, + "grad_norm": 3.5746465833097014, + "learning_rate": 7.203264586406877e-07, + "loss": 0.1371, + "step": 18741 + }, + { + "epoch": 2.646427562835357, + "grad_norm": 2.6182014647673797, + "learning_rate": 7.197584615531184e-07, + "loss": 0.1468, + "step": 18742 + }, + { + "epoch": 2.646568765885343, + "grad_norm": 3.0441019361160593, + "learning_rate": 7.19190680134354e-07, + "loss": 0.1305, + "step": 18743 + }, + { + "epoch": 2.646709968935329, + "grad_norm": 3.1538799999945715, + "learning_rate": 7.186231143975908e-07, + "loss": 0.1301, + "step": 18744 + }, + { + "epoch": 2.6468511719853147, + "grad_norm": 2.5697661611265348, + "learning_rate": 7.18055764356017e-07, + "loss": 0.1308, + "step": 18745 + }, + { + "epoch": 2.6469923750353006, + "grad_norm": 3.0522884885442108, + "learning_rate": 7.17488630022819e-07, + "loss": 0.1705, + "step": 18746 + }, + { + "epoch": 2.6471335780852865, + "grad_norm": 2.808022142023771, + "learning_rate": 7.169217114111771e-07, + "loss": 0.1182, + "step": 18747 + }, + { + "epoch": 2.6472747811352724, + "grad_norm": 3.3520456539394847, + "learning_rate": 7.16355008534263e-07, + "loss": 0.1629, + "step": 18748 + }, + { + "epoch": 2.6474159841852583, + "grad_norm": 4.808814403556536, + "learning_rate": 7.157885214052518e-07, + "loss": 0.2085, + "step": 18749 + }, + { + "epoch": 2.647557187235244, + "grad_norm": 2.852149647973399, + "learning_rate": 7.152222500373052e-07, + "loss": 0.1122, + "step": 18750 + }, + { + "epoch": 2.64769839028523, + "grad_norm": 2.779992315687866, + "learning_rate": 7.146561944435859e-07, + "loss": 0.1383, + "step": 18751 + }, + { + "epoch": 2.647839593335216, + "grad_norm": 3.096373057879562, + "learning_rate": 7.140903546372446e-07, + "loss": 0.1372, + "step": 18752 + }, + { + "epoch": 2.647980796385202, + "grad_norm": 2.516311792955464, + "learning_rate": 7.135247306314308e-07, + "loss": 0.1206, + "step": 18753 + }, + { + "epoch": 2.6481219994351877, + "grad_norm": 3.4152968675941864, + "learning_rate": 7.12959322439295e-07, + "loss": 0.1427, + "step": 18754 + }, + { + "epoch": 2.6482632024851736, + "grad_norm": 3.3661778212113496, + "learning_rate": 7.123941300739723e-07, + "loss": 0.135, + "step": 18755 + }, + { + "epoch": 2.6484044055351594, + "grad_norm": 3.2544149735634136, + "learning_rate": 7.118291535485999e-07, + "loss": 0.1435, + "step": 18756 + }, + { + "epoch": 2.6485456085851453, + "grad_norm": 2.680796999762798, + "learning_rate": 7.112643928763064e-07, + "loss": 0.108, + "step": 18757 + }, + { + "epoch": 2.648686811635131, + "grad_norm": 3.400120069267058, + "learning_rate": 7.106998480702165e-07, + "loss": 0.1529, + "step": 18758 + }, + { + "epoch": 2.648828014685117, + "grad_norm": 3.636970173195075, + "learning_rate": 7.101355191434511e-07, + "loss": 0.1498, + "step": 18759 + }, + { + "epoch": 2.648969217735103, + "grad_norm": 3.510922416715557, + "learning_rate": 7.095714061091241e-07, + "loss": 0.1823, + "step": 18760 + }, + { + "epoch": 2.649110420785089, + "grad_norm": 3.465764044833183, + "learning_rate": 7.09007508980345e-07, + "loss": 0.1534, + "step": 18761 + }, + { + "epoch": 2.6492516238350747, + "grad_norm": 3.0388705527304585, + "learning_rate": 7.084438277702188e-07, + "loss": 0.0976, + "step": 18762 + }, + { + "epoch": 2.6493928268850606, + "grad_norm": 3.8498535455563214, + "learning_rate": 7.078803624918463e-07, + "loss": 0.1707, + "step": 18763 + }, + { + "epoch": 2.6495340299350465, + "grad_norm": 3.110519901240474, + "learning_rate": 7.073171131583201e-07, + "loss": 0.1596, + "step": 18764 + }, + { + "epoch": 2.6496752329850324, + "grad_norm": 3.0194878120958037, + "learning_rate": 7.067540797827299e-07, + "loss": 0.1544, + "step": 18765 + }, + { + "epoch": 2.6498164360350183, + "grad_norm": 3.1038435354857907, + "learning_rate": 7.06191262378163e-07, + "loss": 0.1013, + "step": 18766 + }, + { + "epoch": 2.649957639085004, + "grad_norm": 3.329584926479046, + "learning_rate": 7.056286609576979e-07, + "loss": 0.1337, + "step": 18767 + }, + { + "epoch": 2.65009884213499, + "grad_norm": 4.065825390258055, + "learning_rate": 7.050662755344096e-07, + "loss": 0.1746, + "step": 18768 + }, + { + "epoch": 2.650240045184976, + "grad_norm": 2.445080162379736, + "learning_rate": 7.045041061213664e-07, + "loss": 0.1192, + "step": 18769 + }, + { + "epoch": 2.650381248234962, + "grad_norm": 3.611202237352108, + "learning_rate": 7.039421527316304e-07, + "loss": 0.1311, + "step": 18770 + }, + { + "epoch": 2.6505224512849477, + "grad_norm": 3.3554715880497423, + "learning_rate": 7.033804153782664e-07, + "loss": 0.1263, + "step": 18771 + }, + { + "epoch": 2.6506636543349336, + "grad_norm": 3.198775243117756, + "learning_rate": 7.028188940743275e-07, + "loss": 0.1327, + "step": 18772 + }, + { + "epoch": 2.6508048573849194, + "grad_norm": 2.935863069931787, + "learning_rate": 7.022575888328608e-07, + "loss": 0.1343, + "step": 18773 + }, + { + "epoch": 2.6509460604349053, + "grad_norm": 4.443586812079073, + "learning_rate": 7.016964996669129e-07, + "loss": 0.1574, + "step": 18774 + }, + { + "epoch": 2.651087263484891, + "grad_norm": 2.875364163422291, + "learning_rate": 7.011356265895231e-07, + "loss": 0.1326, + "step": 18775 + }, + { + "epoch": 2.651228466534877, + "grad_norm": 3.226281992547059, + "learning_rate": 7.005749696137254e-07, + "loss": 0.1761, + "step": 18776 + }, + { + "epoch": 2.651369669584863, + "grad_norm": 3.400781699100564, + "learning_rate": 7.000145287525484e-07, + "loss": 0.1159, + "step": 18777 + }, + { + "epoch": 2.651510872634849, + "grad_norm": 2.602906908502238, + "learning_rate": 6.994543040190183e-07, + "loss": 0.1109, + "step": 18778 + }, + { + "epoch": 2.6516520756848347, + "grad_norm": 2.401671015369705, + "learning_rate": 6.988942954261535e-07, + "loss": 0.1242, + "step": 18779 + }, + { + "epoch": 2.6517932787348206, + "grad_norm": 3.1898329110407984, + "learning_rate": 6.983345029869681e-07, + "loss": 0.1571, + "step": 18780 + }, + { + "epoch": 2.6519344817848065, + "grad_norm": 2.911394567963712, + "learning_rate": 6.977749267144718e-07, + "loss": 0.1393, + "step": 18781 + }, + { + "epoch": 2.6520756848347924, + "grad_norm": 3.684349298046009, + "learning_rate": 6.972155666216684e-07, + "loss": 0.1599, + "step": 18782 + }, + { + "epoch": 2.6522168878847783, + "grad_norm": 2.833831992403683, + "learning_rate": 6.966564227215578e-07, + "loss": 0.124, + "step": 18783 + }, + { + "epoch": 2.652358090934764, + "grad_norm": 3.714089689243963, + "learning_rate": 6.960974950271348e-07, + "loss": 0.1597, + "step": 18784 + }, + { + "epoch": 2.65249929398475, + "grad_norm": 3.742438800231911, + "learning_rate": 6.955387835513894e-07, + "loss": 0.1867, + "step": 18785 + }, + { + "epoch": 2.652640497034736, + "grad_norm": 2.7266313720674806, + "learning_rate": 6.949802883073031e-07, + "loss": 0.124, + "step": 18786 + }, + { + "epoch": 2.652781700084722, + "grad_norm": 3.0682369076170106, + "learning_rate": 6.944220093078546e-07, + "loss": 0.1137, + "step": 18787 + }, + { + "epoch": 2.6529229031347077, + "grad_norm": 3.0762771654845285, + "learning_rate": 6.938639465660213e-07, + "loss": 0.1386, + "step": 18788 + }, + { + "epoch": 2.6530641061846936, + "grad_norm": 3.6028375847586545, + "learning_rate": 6.933061000947705e-07, + "loss": 0.0982, + "step": 18789 + }, + { + "epoch": 2.6532053092346795, + "grad_norm": 2.982292461262047, + "learning_rate": 6.927484699070675e-07, + "loss": 0.1533, + "step": 18790 + }, + { + "epoch": 2.6533465122846653, + "grad_norm": 3.169376033532251, + "learning_rate": 6.921910560158696e-07, + "loss": 0.1655, + "step": 18791 + }, + { + "epoch": 2.653487715334651, + "grad_norm": 3.4379592537965125, + "learning_rate": 6.91633858434132e-07, + "loss": 0.1626, + "step": 18792 + }, + { + "epoch": 2.653628918384637, + "grad_norm": 2.9466224409091493, + "learning_rate": 6.910768771748044e-07, + "loss": 0.1361, + "step": 18793 + }, + { + "epoch": 2.653770121434623, + "grad_norm": 2.7195683243440647, + "learning_rate": 6.905201122508299e-07, + "loss": 0.1027, + "step": 18794 + }, + { + "epoch": 2.653911324484609, + "grad_norm": 2.7991434397354493, + "learning_rate": 6.899635636751467e-07, + "loss": 0.1588, + "step": 18795 + }, + { + "epoch": 2.6540525275345948, + "grad_norm": 2.660535105582515, + "learning_rate": 6.894072314606892e-07, + "loss": 0.0995, + "step": 18796 + }, + { + "epoch": 2.6541937305845806, + "grad_norm": 3.1820039334115213, + "learning_rate": 6.888511156203881e-07, + "loss": 0.1369, + "step": 18797 + }, + { + "epoch": 2.6543349336345665, + "grad_norm": 2.9100359447136075, + "learning_rate": 6.882952161671652e-07, + "loss": 0.1465, + "step": 18798 + }, + { + "epoch": 2.6544761366845524, + "grad_norm": 3.3067047587434835, + "learning_rate": 6.87739533113938e-07, + "loss": 0.1193, + "step": 18799 + }, + { + "epoch": 2.6546173397345383, + "grad_norm": 3.2466027075625417, + "learning_rate": 6.871840664736251e-07, + "loss": 0.1374, + "step": 18800 + }, + { + "epoch": 2.654758542784524, + "grad_norm": 3.1155824349568326, + "learning_rate": 6.866288162591317e-07, + "loss": 0.1614, + "step": 18801 + }, + { + "epoch": 2.65489974583451, + "grad_norm": 3.1087495171700037, + "learning_rate": 6.860737824833652e-07, + "loss": 0.1215, + "step": 18802 + }, + { + "epoch": 2.655040948884496, + "grad_norm": 2.7524013041526447, + "learning_rate": 6.855189651592187e-07, + "loss": 0.1167, + "step": 18803 + }, + { + "epoch": 2.655182151934482, + "grad_norm": 3.004576821422797, + "learning_rate": 6.849643642995873e-07, + "loss": 0.1629, + "step": 18804 + }, + { + "epoch": 2.6553233549844677, + "grad_norm": 3.3296399183373784, + "learning_rate": 6.844099799173643e-07, + "loss": 0.1318, + "step": 18805 + }, + { + "epoch": 2.6554645580344536, + "grad_norm": 3.2030387821551645, + "learning_rate": 6.83855812025429e-07, + "loss": 0.1515, + "step": 18806 + }, + { + "epoch": 2.6556057610844395, + "grad_norm": 3.525426025050401, + "learning_rate": 6.833018606366615e-07, + "loss": 0.1573, + "step": 18807 + }, + { + "epoch": 2.6557469641344253, + "grad_norm": 3.307343242267232, + "learning_rate": 6.827481257639346e-07, + "loss": 0.1255, + "step": 18808 + }, + { + "epoch": 2.6558881671844112, + "grad_norm": 3.7653375290154654, + "learning_rate": 6.82194607420118e-07, + "loss": 0.1574, + "step": 18809 + }, + { + "epoch": 2.656029370234397, + "grad_norm": 3.531318950981282, + "learning_rate": 6.816413056180748e-07, + "loss": 0.1548, + "step": 18810 + }, + { + "epoch": 2.656170573284383, + "grad_norm": 3.2241847095347915, + "learning_rate": 6.810882203706637e-07, + "loss": 0.1483, + "step": 18811 + }, + { + "epoch": 2.656311776334369, + "grad_norm": 3.0192287942404814, + "learning_rate": 6.805353516907376e-07, + "loss": 0.1128, + "step": 18812 + }, + { + "epoch": 2.6564529793843548, + "grad_norm": 3.7375157627411895, + "learning_rate": 6.799826995911451e-07, + "loss": 0.1896, + "step": 18813 + }, + { + "epoch": 2.6565941824343406, + "grad_norm": 2.7307133158962102, + "learning_rate": 6.794302640847294e-07, + "loss": 0.1044, + "step": 18814 + }, + { + "epoch": 2.6567353854843265, + "grad_norm": 2.5005223507998746, + "learning_rate": 6.788780451843291e-07, + "loss": 0.1253, + "step": 18815 + }, + { + "epoch": 2.6568765885343124, + "grad_norm": 3.225078941638792, + "learning_rate": 6.78326042902776e-07, + "loss": 0.1583, + "step": 18816 + }, + { + "epoch": 2.6570177915842983, + "grad_norm": 2.899073726538033, + "learning_rate": 6.777742572529022e-07, + "loss": 0.1677, + "step": 18817 + }, + { + "epoch": 2.657158994634284, + "grad_norm": 2.5096192517929357, + "learning_rate": 6.772226882475275e-07, + "loss": 0.1269, + "step": 18818 + }, + { + "epoch": 2.65730019768427, + "grad_norm": 2.7448342640551866, + "learning_rate": 6.766713358994736e-07, + "loss": 0.1135, + "step": 18819 + }, + { + "epoch": 2.657441400734256, + "grad_norm": 2.6090899966775973, + "learning_rate": 6.761202002215506e-07, + "loss": 0.1056, + "step": 18820 + }, + { + "epoch": 2.657582603784242, + "grad_norm": 3.8677606212709628, + "learning_rate": 6.755692812265668e-07, + "loss": 0.1645, + "step": 18821 + }, + { + "epoch": 2.6577238068342277, + "grad_norm": 2.4355756680063365, + "learning_rate": 6.750185789273234e-07, + "loss": 0.0848, + "step": 18822 + }, + { + "epoch": 2.6578650098842136, + "grad_norm": 3.5818927202972937, + "learning_rate": 6.744680933366243e-07, + "loss": 0.1834, + "step": 18823 + }, + { + "epoch": 2.6580062129341995, + "grad_norm": 2.760530655301133, + "learning_rate": 6.739178244672584e-07, + "loss": 0.1117, + "step": 18824 + }, + { + "epoch": 2.6581474159841854, + "grad_norm": 2.9527728358991143, + "learning_rate": 6.733677723320142e-07, + "loss": 0.1337, + "step": 18825 + }, + { + "epoch": 2.6582886190341712, + "grad_norm": 3.1393733072190337, + "learning_rate": 6.72817936943676e-07, + "loss": 0.1555, + "step": 18826 + }, + { + "epoch": 2.658429822084157, + "grad_norm": 3.3247003441730665, + "learning_rate": 6.722683183150203e-07, + "loss": 0.1394, + "step": 18827 + }, + { + "epoch": 2.658571025134143, + "grad_norm": 3.118500538456123, + "learning_rate": 6.717189164588212e-07, + "loss": 0.139, + "step": 18828 + }, + { + "epoch": 2.658712228184129, + "grad_norm": 4.087153063595232, + "learning_rate": 6.711697313878452e-07, + "loss": 0.1889, + "step": 18829 + }, + { + "epoch": 2.6588534312341148, + "grad_norm": 3.582699243816067, + "learning_rate": 6.706207631148564e-07, + "loss": 0.1485, + "step": 18830 + }, + { + "epoch": 2.6589946342841007, + "grad_norm": 3.299397342728459, + "learning_rate": 6.700720116526116e-07, + "loss": 0.1723, + "step": 18831 + }, + { + "epoch": 2.6591358373340865, + "grad_norm": 2.374707272394249, + "learning_rate": 6.695234770138648e-07, + "loss": 0.0955, + "step": 18832 + }, + { + "epoch": 2.6592770403840724, + "grad_norm": 2.507007990198973, + "learning_rate": 6.689751592113614e-07, + "loss": 0.1095, + "step": 18833 + }, + { + "epoch": 2.6594182434340583, + "grad_norm": 3.2966572441971134, + "learning_rate": 6.684270582578455e-07, + "loss": 0.1322, + "step": 18834 + }, + { + "epoch": 2.659559446484044, + "grad_norm": 2.8247321009028505, + "learning_rate": 6.67879174166055e-07, + "loss": 0.1236, + "step": 18835 + }, + { + "epoch": 2.65970064953403, + "grad_norm": 4.2168546412272025, + "learning_rate": 6.673315069487252e-07, + "loss": 0.2216, + "step": 18836 + }, + { + "epoch": 2.659841852584016, + "grad_norm": 3.723367725479668, + "learning_rate": 6.667840566185779e-07, + "loss": 0.1639, + "step": 18837 + }, + { + "epoch": 2.659983055634002, + "grad_norm": 3.2488600492444206, + "learning_rate": 6.662368231883388e-07, + "loss": 0.1103, + "step": 18838 + }, + { + "epoch": 2.6601242586839877, + "grad_norm": 3.199153680756426, + "learning_rate": 6.65689806670724e-07, + "loss": 0.0938, + "step": 18839 + }, + { + "epoch": 2.6602654617339736, + "grad_norm": 2.4408280104782047, + "learning_rate": 6.65143007078447e-07, + "loss": 0.1315, + "step": 18840 + }, + { + "epoch": 2.6604066647839595, + "grad_norm": 2.6241972365902018, + "learning_rate": 6.645964244242164e-07, + "loss": 0.1318, + "step": 18841 + }, + { + "epoch": 2.6605478678339454, + "grad_norm": 3.4598101324593302, + "learning_rate": 6.640500587207333e-07, + "loss": 0.1342, + "step": 18842 + }, + { + "epoch": 2.6606890708839313, + "grad_norm": 3.297387047566107, + "learning_rate": 6.635039099806939e-07, + "loss": 0.1414, + "step": 18843 + }, + { + "epoch": 2.660830273933917, + "grad_norm": 3.1051401672594396, + "learning_rate": 6.629579782167928e-07, + "loss": 0.1297, + "step": 18844 + }, + { + "epoch": 2.660971476983903, + "grad_norm": 3.8300308018941824, + "learning_rate": 6.624122634417152e-07, + "loss": 0.1532, + "step": 18845 + }, + { + "epoch": 2.661112680033889, + "grad_norm": 2.817801724121992, + "learning_rate": 6.618667656681444e-07, + "loss": 0.1421, + "step": 18846 + }, + { + "epoch": 2.661253883083875, + "grad_norm": 3.328629898594717, + "learning_rate": 6.613214849087568e-07, + "loss": 0.1765, + "step": 18847 + }, + { + "epoch": 2.6613950861338607, + "grad_norm": 2.410170440875257, + "learning_rate": 6.607764211762247e-07, + "loss": 0.1134, + "step": 18848 + }, + { + "epoch": 2.6615362891838465, + "grad_norm": 2.527743159578753, + "learning_rate": 6.602315744832155e-07, + "loss": 0.1397, + "step": 18849 + }, + { + "epoch": 2.6616774922338324, + "grad_norm": 4.156858133224187, + "learning_rate": 6.596869448423903e-07, + "loss": 0.1469, + "step": 18850 + }, + { + "epoch": 2.6618186952838183, + "grad_norm": 2.856770797744925, + "learning_rate": 6.591425322664058e-07, + "loss": 0.1033, + "step": 18851 + }, + { + "epoch": 2.661959898333804, + "grad_norm": 4.214442918314302, + "learning_rate": 6.585983367679171e-07, + "loss": 0.217, + "step": 18852 + }, + { + "epoch": 2.66210110138379, + "grad_norm": 3.485045219619855, + "learning_rate": 6.5805435835957e-07, + "loss": 0.1574, + "step": 18853 + }, + { + "epoch": 2.662242304433776, + "grad_norm": 3.2959058979777542, + "learning_rate": 6.57510597054003e-07, + "loss": 0.1449, + "step": 18854 + }, + { + "epoch": 2.662383507483762, + "grad_norm": 3.8980086920036414, + "learning_rate": 6.56967052863855e-07, + "loss": 0.2072, + "step": 18855 + }, + { + "epoch": 2.6625247105337477, + "grad_norm": 3.4143245330038, + "learning_rate": 6.564237258017558e-07, + "loss": 0.1552, + "step": 18856 + }, + { + "epoch": 2.6626659135837336, + "grad_norm": 3.049068178567785, + "learning_rate": 6.558806158803366e-07, + "loss": 0.1267, + "step": 18857 + }, + { + "epoch": 2.6628071166337195, + "grad_norm": 3.203757992987614, + "learning_rate": 6.55337723112216e-07, + "loss": 0.1488, + "step": 18858 + }, + { + "epoch": 2.6629483196837054, + "grad_norm": 3.449621162209108, + "learning_rate": 6.547950475100118e-07, + "loss": 0.1638, + "step": 18859 + }, + { + "epoch": 2.6630895227336913, + "grad_norm": 3.0023575636505178, + "learning_rate": 6.542525890863338e-07, + "loss": 0.1165, + "step": 18860 + }, + { + "epoch": 2.663230725783677, + "grad_norm": 2.81686388478396, + "learning_rate": 6.537103478537899e-07, + "loss": 0.1339, + "step": 18861 + }, + { + "epoch": 2.663371928833663, + "grad_norm": 2.3920234729229857, + "learning_rate": 6.531683238249809e-07, + "loss": 0.113, + "step": 18862 + }, + { + "epoch": 2.663513131883649, + "grad_norm": 3.709278279276378, + "learning_rate": 6.526265170125034e-07, + "loss": 0.1619, + "step": 18863 + }, + { + "epoch": 2.663654334933635, + "grad_norm": 3.0524168280763786, + "learning_rate": 6.520849274289498e-07, + "loss": 0.1702, + "step": 18864 + }, + { + "epoch": 2.6637955379836207, + "grad_norm": 2.7479065271774603, + "learning_rate": 6.515435550869043e-07, + "loss": 0.117, + "step": 18865 + }, + { + "epoch": 2.6639367410336066, + "grad_norm": 3.7629546191157153, + "learning_rate": 6.510023999989501e-07, + "loss": 0.1436, + "step": 18866 + }, + { + "epoch": 2.6640779440835924, + "grad_norm": 3.935609173498883, + "learning_rate": 6.504614621776629e-07, + "loss": 0.1417, + "step": 18867 + }, + { + "epoch": 2.6642191471335783, + "grad_norm": 3.444630751796602, + "learning_rate": 6.499207416356113e-07, + "loss": 0.1573, + "step": 18868 + }, + { + "epoch": 2.664360350183564, + "grad_norm": 2.901894005866385, + "learning_rate": 6.493802383853653e-07, + "loss": 0.1338, + "step": 18869 + }, + { + "epoch": 2.66450155323355, + "grad_norm": 2.6511415451334552, + "learning_rate": 6.488399524394851e-07, + "loss": 0.1181, + "step": 18870 + }, + { + "epoch": 2.664642756283536, + "grad_norm": 2.733966287226836, + "learning_rate": 6.482998838105259e-07, + "loss": 0.1189, + "step": 18871 + }, + { + "epoch": 2.664783959333522, + "grad_norm": 2.803971146051175, + "learning_rate": 6.477600325110378e-07, + "loss": 0.1383, + "step": 18872 + }, + { + "epoch": 2.6649251623835077, + "grad_norm": 3.3132204850371765, + "learning_rate": 6.472203985535663e-07, + "loss": 0.1369, + "step": 18873 + }, + { + "epoch": 2.6650663654334936, + "grad_norm": 2.689630841099451, + "learning_rate": 6.466809819506548e-07, + "loss": 0.1508, + "step": 18874 + }, + { + "epoch": 2.6652075684834795, + "grad_norm": 3.2388862621273025, + "learning_rate": 6.461417827148386e-07, + "loss": 0.1517, + "step": 18875 + }, + { + "epoch": 2.6653487715334654, + "grad_norm": 3.5292006691031332, + "learning_rate": 6.456028008586468e-07, + "loss": 0.1158, + "step": 18876 + }, + { + "epoch": 2.6654899745834513, + "grad_norm": 2.7882725013106158, + "learning_rate": 6.45064036394607e-07, + "loss": 0.1316, + "step": 18877 + }, + { + "epoch": 2.665631177633437, + "grad_norm": 3.4647001625610185, + "learning_rate": 6.445254893352381e-07, + "loss": 0.1344, + "step": 18878 + }, + { + "epoch": 2.665772380683423, + "grad_norm": 2.972934413280098, + "learning_rate": 6.439871596930569e-07, + "loss": 0.1186, + "step": 18879 + }, + { + "epoch": 2.6659135837334085, + "grad_norm": 2.548340979535032, + "learning_rate": 6.434490474805743e-07, + "loss": 0.1067, + "step": 18880 + }, + { + "epoch": 2.6660547867833944, + "grad_norm": 3.3102653807530267, + "learning_rate": 6.429111527102938e-07, + "loss": 0.1289, + "step": 18881 + }, + { + "epoch": 2.6661959898333802, + "grad_norm": 3.8034155833395786, + "learning_rate": 6.423734753947175e-07, + "loss": 0.1815, + "step": 18882 + }, + { + "epoch": 2.666337192883366, + "grad_norm": 2.723595507999998, + "learning_rate": 6.41836015546341e-07, + "loss": 0.131, + "step": 18883 + }, + { + "epoch": 2.666478395933352, + "grad_norm": 2.6718269954380682, + "learning_rate": 6.412987731776532e-07, + "loss": 0.1573, + "step": 18884 + }, + { + "epoch": 2.666619598983338, + "grad_norm": 2.912157313764413, + "learning_rate": 6.407617483011385e-07, + "loss": 0.1604, + "step": 18885 + }, + { + "epoch": 2.6667608020333238, + "grad_norm": 3.1123700536266985, + "learning_rate": 6.402249409292815e-07, + "loss": 0.1435, + "step": 18886 + }, + { + "epoch": 2.6669020050833097, + "grad_norm": 2.9497825456991884, + "learning_rate": 6.396883510745555e-07, + "loss": 0.138, + "step": 18887 + }, + { + "epoch": 2.6670432081332955, + "grad_norm": 3.2879820997874685, + "learning_rate": 6.391519787494282e-07, + "loss": 0.1456, + "step": 18888 + }, + { + "epoch": 2.6671844111832814, + "grad_norm": 2.8379663305521934, + "learning_rate": 6.386158239663665e-07, + "loss": 0.125, + "step": 18889 + }, + { + "epoch": 2.6673256142332673, + "grad_norm": 2.9796414850731745, + "learning_rate": 6.380798867378291e-07, + "loss": 0.1384, + "step": 18890 + }, + { + "epoch": 2.667466817283253, + "grad_norm": 3.1272364777300035, + "learning_rate": 6.375441670762727e-07, + "loss": 0.1001, + "step": 18891 + }, + { + "epoch": 2.667608020333239, + "grad_norm": 3.239279467281637, + "learning_rate": 6.370086649941465e-07, + "loss": 0.1376, + "step": 18892 + }, + { + "epoch": 2.667749223383225, + "grad_norm": 3.125371149928673, + "learning_rate": 6.364733805038958e-07, + "loss": 0.1062, + "step": 18893 + }, + { + "epoch": 2.667890426433211, + "grad_norm": 3.5863486728565688, + "learning_rate": 6.359383136179598e-07, + "loss": 0.0981, + "step": 18894 + }, + { + "epoch": 2.6680316294831967, + "grad_norm": 2.6940527973874766, + "learning_rate": 6.35403464348775e-07, + "loss": 0.1402, + "step": 18895 + }, + { + "epoch": 2.6681728325331826, + "grad_norm": 3.5272198626032525, + "learning_rate": 6.348688327087671e-07, + "loss": 0.1497, + "step": 18896 + }, + { + "epoch": 2.6683140355831685, + "grad_norm": 3.790274060679558, + "learning_rate": 6.343344187103628e-07, + "loss": 0.1739, + "step": 18897 + }, + { + "epoch": 2.6684552386331544, + "grad_norm": 3.0913267956654678, + "learning_rate": 6.338002223659834e-07, + "loss": 0.1383, + "step": 18898 + }, + { + "epoch": 2.6685964416831403, + "grad_norm": 2.8240809292687112, + "learning_rate": 6.33266243688041e-07, + "loss": 0.1595, + "step": 18899 + }, + { + "epoch": 2.668737644733126, + "grad_norm": 2.8979765178360655, + "learning_rate": 6.327324826889469e-07, + "loss": 0.1481, + "step": 18900 + }, + { + "epoch": 2.668878847783112, + "grad_norm": 3.6977156288876327, + "learning_rate": 6.321989393811034e-07, + "loss": 0.1957, + "step": 18901 + }, + { + "epoch": 2.669020050833098, + "grad_norm": 2.679839979958239, + "learning_rate": 6.316656137769095e-07, + "loss": 0.118, + "step": 18902 + }, + { + "epoch": 2.669161253883084, + "grad_norm": 3.3301744764850234, + "learning_rate": 6.311325058887629e-07, + "loss": 0.1537, + "step": 18903 + }, + { + "epoch": 2.6693024569330697, + "grad_norm": 2.6350979823503393, + "learning_rate": 6.305996157290528e-07, + "loss": 0.1209, + "step": 18904 + }, + { + "epoch": 2.6694436599830556, + "grad_norm": 3.742796353910209, + "learning_rate": 6.300669433101592e-07, + "loss": 0.1587, + "step": 18905 + }, + { + "epoch": 2.6695848630330414, + "grad_norm": 3.5792926196921164, + "learning_rate": 6.295344886444632e-07, + "loss": 0.1616, + "step": 18906 + }, + { + "epoch": 2.6697260660830273, + "grad_norm": 3.109485954656302, + "learning_rate": 6.290022517443372e-07, + "loss": 0.1503, + "step": 18907 + }, + { + "epoch": 2.669867269133013, + "grad_norm": 3.0012079275544616, + "learning_rate": 6.284702326221537e-07, + "loss": 0.1125, + "step": 18908 + }, + { + "epoch": 2.670008472182999, + "grad_norm": 3.4199098988453667, + "learning_rate": 6.279384312902737e-07, + "loss": 0.1844, + "step": 18909 + }, + { + "epoch": 2.670149675232985, + "grad_norm": 3.507976255156557, + "learning_rate": 6.274068477610584e-07, + "loss": 0.1616, + "step": 18910 + }, + { + "epoch": 2.670290878282971, + "grad_norm": 3.0638138425407426, + "learning_rate": 6.268754820468592e-07, + "loss": 0.1501, + "step": 18911 + }, + { + "epoch": 2.6704320813329567, + "grad_norm": 3.726844078670672, + "learning_rate": 6.263443341600284e-07, + "loss": 0.2212, + "step": 18912 + }, + { + "epoch": 2.6705732843829426, + "grad_norm": 3.495076144501369, + "learning_rate": 6.258134041129038e-07, + "loss": 0.1681, + "step": 18913 + }, + { + "epoch": 2.6707144874329285, + "grad_norm": 2.859601030178073, + "learning_rate": 6.252826919178278e-07, + "loss": 0.1017, + "step": 18914 + }, + { + "epoch": 2.6708556904829144, + "grad_norm": 2.958710779853216, + "learning_rate": 6.247521975871351e-07, + "loss": 0.1203, + "step": 18915 + }, + { + "epoch": 2.6709968935329003, + "grad_norm": 3.450494222246176, + "learning_rate": 6.242219211331512e-07, + "loss": 0.1262, + "step": 18916 + }, + { + "epoch": 2.671138096582886, + "grad_norm": 2.8348834407432952, + "learning_rate": 6.236918625682009e-07, + "loss": 0.1256, + "step": 18917 + }, + { + "epoch": 2.671279299632872, + "grad_norm": 3.917340651783546, + "learning_rate": 6.23162021904603e-07, + "loss": 0.162, + "step": 18918 + }, + { + "epoch": 2.671420502682858, + "grad_norm": 3.0953671721171694, + "learning_rate": 6.226323991546679e-07, + "loss": 0.1184, + "step": 18919 + }, + { + "epoch": 2.671561705732844, + "grad_norm": 3.2660482401010063, + "learning_rate": 6.221029943307099e-07, + "loss": 0.1477, + "step": 18920 + }, + { + "epoch": 2.6717029087828297, + "grad_norm": 3.3218402058051466, + "learning_rate": 6.215738074450262e-07, + "loss": 0.1139, + "step": 18921 + }, + { + "epoch": 2.6718441118328156, + "grad_norm": 3.527092936289585, + "learning_rate": 6.210448385099177e-07, + "loss": 0.1978, + "step": 18922 + }, + { + "epoch": 2.6719853148828014, + "grad_norm": 3.0908141127489848, + "learning_rate": 6.205160875376759e-07, + "loss": 0.1721, + "step": 18923 + }, + { + "epoch": 2.6721265179327873, + "grad_norm": 3.4263658638722485, + "learning_rate": 6.199875545405898e-07, + "loss": 0.1757, + "step": 18924 + }, + { + "epoch": 2.672267720982773, + "grad_norm": 3.2084629600823895, + "learning_rate": 6.194592395309407e-07, + "loss": 0.1395, + "step": 18925 + }, + { + "epoch": 2.672408924032759, + "grad_norm": 4.627261424051521, + "learning_rate": 6.189311425210087e-07, + "loss": 0.1576, + "step": 18926 + }, + { + "epoch": 2.672550127082745, + "grad_norm": 3.118516505968711, + "learning_rate": 6.184032635230663e-07, + "loss": 0.1259, + "step": 18927 + }, + { + "epoch": 2.672691330132731, + "grad_norm": 3.3912718765345695, + "learning_rate": 6.178756025493804e-07, + "loss": 0.1332, + "step": 18928 + }, + { + "epoch": 2.6728325331827167, + "grad_norm": 3.2936049964320677, + "learning_rate": 6.173481596122143e-07, + "loss": 0.1588, + "step": 18929 + }, + { + "epoch": 2.6729737362327026, + "grad_norm": 3.648172981175714, + "learning_rate": 6.16820934723823e-07, + "loss": 0.1796, + "step": 18930 + }, + { + "epoch": 2.6731149392826885, + "grad_norm": 3.4585377684542125, + "learning_rate": 6.162939278964608e-07, + "loss": 0.1793, + "step": 18931 + }, + { + "epoch": 2.6732561423326744, + "grad_norm": 4.521818504694328, + "learning_rate": 6.157671391423769e-07, + "loss": 0.2296, + "step": 18932 + }, + { + "epoch": 2.6733973453826603, + "grad_norm": 2.8765289358070842, + "learning_rate": 6.152405684738116e-07, + "loss": 0.1324, + "step": 18933 + }, + { + "epoch": 2.673538548432646, + "grad_norm": 3.4244397460049596, + "learning_rate": 6.147142159030017e-07, + "loss": 0.1305, + "step": 18934 + }, + { + "epoch": 2.673679751482632, + "grad_norm": 3.0572803614255024, + "learning_rate": 6.141880814421808e-07, + "loss": 0.1493, + "step": 18935 + }, + { + "epoch": 2.673820954532618, + "grad_norm": 2.6045432725954387, + "learning_rate": 6.136621651035756e-07, + "loss": 0.1275, + "step": 18936 + }, + { + "epoch": 2.673962157582604, + "grad_norm": 3.338761615096184, + "learning_rate": 6.131364668994078e-07, + "loss": 0.1604, + "step": 18937 + }, + { + "epoch": 2.6741033606325897, + "grad_norm": 3.075642710960734, + "learning_rate": 6.126109868418951e-07, + "loss": 0.148, + "step": 18938 + }, + { + "epoch": 2.6742445636825756, + "grad_norm": 2.7881439513064366, + "learning_rate": 6.120857249432477e-07, + "loss": 0.1344, + "step": 18939 + }, + { + "epoch": 2.6743857667325615, + "grad_norm": 3.323507131847104, + "learning_rate": 6.115606812156749e-07, + "loss": 0.1465, + "step": 18940 + }, + { + "epoch": 2.6745269697825473, + "grad_norm": 3.1496412754899006, + "learning_rate": 6.110358556713769e-07, + "loss": 0.1212, + "step": 18941 + }, + { + "epoch": 2.674668172832533, + "grad_norm": 2.831592151291494, + "learning_rate": 6.105112483225495e-07, + "loss": 0.1335, + "step": 18942 + }, + { + "epoch": 2.674809375882519, + "grad_norm": 2.99634492273145, + "learning_rate": 6.099868591813873e-07, + "loss": 0.1189, + "step": 18943 + }, + { + "epoch": 2.674950578932505, + "grad_norm": 3.9501741324685424, + "learning_rate": 6.094626882600751e-07, + "loss": 0.1907, + "step": 18944 + }, + { + "epoch": 2.675091781982491, + "grad_norm": 3.424557956022425, + "learning_rate": 6.089387355707943e-07, + "loss": 0.1326, + "step": 18945 + }, + { + "epoch": 2.6752329850324768, + "grad_norm": 3.178118038416846, + "learning_rate": 6.084150011257239e-07, + "loss": 0.1458, + "step": 18946 + }, + { + "epoch": 2.6753741880824626, + "grad_norm": 2.442330491037359, + "learning_rate": 6.078914849370288e-07, + "loss": 0.1105, + "step": 18947 + }, + { + "epoch": 2.6755153911324485, + "grad_norm": 2.9586857130900097, + "learning_rate": 6.073681870168813e-07, + "loss": 0.1142, + "step": 18948 + }, + { + "epoch": 2.6756565941824344, + "grad_norm": 2.98235144871188, + "learning_rate": 6.068451073774417e-07, + "loss": 0.1694, + "step": 18949 + }, + { + "epoch": 2.6757977972324203, + "grad_norm": 2.9756901967529474, + "learning_rate": 6.063222460308649e-07, + "loss": 0.1529, + "step": 18950 + }, + { + "epoch": 2.675939000282406, + "grad_norm": 2.985309376206234, + "learning_rate": 6.057996029893009e-07, + "loss": 0.0944, + "step": 18951 + }, + { + "epoch": 2.676080203332392, + "grad_norm": 3.6452997940352745, + "learning_rate": 6.052771782648981e-07, + "loss": 0.1917, + "step": 18952 + }, + { + "epoch": 2.676221406382378, + "grad_norm": 3.3169868049026436, + "learning_rate": 6.047549718697965e-07, + "loss": 0.1241, + "step": 18953 + }, + { + "epoch": 2.676362609432364, + "grad_norm": 3.09450034613588, + "learning_rate": 6.04232983816132e-07, + "loss": 0.1233, + "step": 18954 + }, + { + "epoch": 2.6765038124823497, + "grad_norm": 3.4057274962761035, + "learning_rate": 6.037112141160351e-07, + "loss": 0.1705, + "step": 18955 + }, + { + "epoch": 2.6766450155323356, + "grad_norm": 3.3998278069501264, + "learning_rate": 6.031896627816314e-07, + "loss": 0.1771, + "step": 18956 + }, + { + "epoch": 2.6767862185823215, + "grad_norm": 3.9665534467310035, + "learning_rate": 6.026683298250424e-07, + "loss": 0.1883, + "step": 18957 + }, + { + "epoch": 2.6769274216323073, + "grad_norm": 2.919377672701439, + "learning_rate": 6.021472152583818e-07, + "loss": 0.1261, + "step": 18958 + }, + { + "epoch": 2.6770686246822932, + "grad_norm": 2.9017765491854224, + "learning_rate": 6.01626319093761e-07, + "loss": 0.1226, + "step": 18959 + }, + { + "epoch": 2.677209827732279, + "grad_norm": 3.478671329326382, + "learning_rate": 6.01105641343287e-07, + "loss": 0.1577, + "step": 18960 + }, + { + "epoch": 2.677351030782265, + "grad_norm": 4.406722954060124, + "learning_rate": 6.005851820190578e-07, + "loss": 0.2114, + "step": 18961 + }, + { + "epoch": 2.677492233832251, + "grad_norm": 2.534073005374882, + "learning_rate": 6.000649411331705e-07, + "loss": 0.0784, + "step": 18962 + }, + { + "epoch": 2.6776334368822368, + "grad_norm": 3.9342303093407307, + "learning_rate": 5.995449186977164e-07, + "loss": 0.1716, + "step": 18963 + }, + { + "epoch": 2.6777746399322226, + "grad_norm": 3.8112865307673847, + "learning_rate": 5.99025114724775e-07, + "loss": 0.1614, + "step": 18964 + }, + { + "epoch": 2.6779158429822085, + "grad_norm": 3.3592786745772716, + "learning_rate": 5.985055292264308e-07, + "loss": 0.1514, + "step": 18965 + }, + { + "epoch": 2.6780570460321944, + "grad_norm": 2.2162224684191965, + "learning_rate": 5.979861622147587e-07, + "loss": 0.1031, + "step": 18966 + }, + { + "epoch": 2.6781982490821803, + "grad_norm": 2.573171147229366, + "learning_rate": 5.974670137018279e-07, + "loss": 0.1248, + "step": 18967 + }, + { + "epoch": 2.678339452132166, + "grad_norm": 2.773468450619444, + "learning_rate": 5.969480836997032e-07, + "loss": 0.1133, + "step": 18968 + }, + { + "epoch": 2.678480655182152, + "grad_norm": 3.3618166237548857, + "learning_rate": 5.96429372220444e-07, + "loss": 0.1879, + "step": 18969 + }, + { + "epoch": 2.678621858232138, + "grad_norm": 3.672083321203852, + "learning_rate": 5.959108792761048e-07, + "loss": 0.1615, + "step": 18970 + }, + { + "epoch": 2.678763061282124, + "grad_norm": 2.884444891363575, + "learning_rate": 5.953926048787361e-07, + "loss": 0.1189, + "step": 18971 + }, + { + "epoch": 2.6789042643321097, + "grad_norm": 2.6209565055653554, + "learning_rate": 5.948745490403806e-07, + "loss": 0.1275, + "step": 18972 + }, + { + "epoch": 2.6790454673820956, + "grad_norm": 2.3840550587603446, + "learning_rate": 5.943567117730797e-07, + "loss": 0.116, + "step": 18973 + }, + { + "epoch": 2.6791866704320815, + "grad_norm": 3.2331158277296153, + "learning_rate": 5.938390930888671e-07, + "loss": 0.1215, + "step": 18974 + }, + { + "epoch": 2.6793278734820674, + "grad_norm": 2.7294215476185144, + "learning_rate": 5.933216929997709e-07, + "loss": 0.148, + "step": 18975 + }, + { + "epoch": 2.6794690765320532, + "grad_norm": 2.73788899170489, + "learning_rate": 5.92804511517815e-07, + "loss": 0.1224, + "step": 18976 + }, + { + "epoch": 2.679610279582039, + "grad_norm": 2.9687303832681424, + "learning_rate": 5.922875486550206e-07, + "loss": 0.1365, + "step": 18977 + }, + { + "epoch": 2.679751482632025, + "grad_norm": 2.862470549006838, + "learning_rate": 5.917708044234017e-07, + "loss": 0.103, + "step": 18978 + }, + { + "epoch": 2.679892685682011, + "grad_norm": 3.027712361093174, + "learning_rate": 5.912542788349651e-07, + "loss": 0.1015, + "step": 18979 + }, + { + "epoch": 2.6800338887319968, + "grad_norm": 2.9296993236493596, + "learning_rate": 5.907379719017181e-07, + "loss": 0.1286, + "step": 18980 + }, + { + "epoch": 2.6801750917819827, + "grad_norm": 2.986466967467467, + "learning_rate": 5.902218836356543e-07, + "loss": 0.1188, + "step": 18981 + }, + { + "epoch": 2.680316294831968, + "grad_norm": 2.7195662842223696, + "learning_rate": 5.897060140487709e-07, + "loss": 0.1151, + "step": 18982 + }, + { + "epoch": 2.680457497881954, + "grad_norm": 3.138543268353344, + "learning_rate": 5.89190363153056e-07, + "loss": 0.1454, + "step": 18983 + }, + { + "epoch": 2.68059870093194, + "grad_norm": 3.2519711239472384, + "learning_rate": 5.886749309604922e-07, + "loss": 0.1348, + "step": 18984 + }, + { + "epoch": 2.6807399039819257, + "grad_norm": 3.2365358663359314, + "learning_rate": 5.8815971748306e-07, + "loss": 0.1512, + "step": 18985 + }, + { + "epoch": 2.6808811070319116, + "grad_norm": 3.1473212646039013, + "learning_rate": 5.876447227327298e-07, + "loss": 0.1518, + "step": 18986 + }, + { + "epoch": 2.6810223100818975, + "grad_norm": 2.846596125400192, + "learning_rate": 5.871299467214719e-07, + "loss": 0.1437, + "step": 18987 + }, + { + "epoch": 2.6811635131318834, + "grad_norm": 3.04775406038311, + "learning_rate": 5.866153894612492e-07, + "loss": 0.1266, + "step": 18988 + }, + { + "epoch": 2.6813047161818693, + "grad_norm": 3.2110310908984667, + "learning_rate": 5.861010509640197e-07, + "loss": 0.147, + "step": 18989 + }, + { + "epoch": 2.681445919231855, + "grad_norm": 2.8757317806946068, + "learning_rate": 5.855869312417362e-07, + "loss": 0.1364, + "step": 18990 + }, + { + "epoch": 2.681587122281841, + "grad_norm": 2.7487249745635887, + "learning_rate": 5.850730303063467e-07, + "loss": 0.0932, + "step": 18991 + }, + { + "epoch": 2.681728325331827, + "grad_norm": 2.6496738666761788, + "learning_rate": 5.845593481697931e-07, + "loss": 0.1097, + "step": 18992 + }, + { + "epoch": 2.681869528381813, + "grad_norm": 2.8386188259177816, + "learning_rate": 5.840458848440133e-07, + "loss": 0.1172, + "step": 18993 + }, + { + "epoch": 2.6820107314317987, + "grad_norm": 3.6080483375773045, + "learning_rate": 5.835326403409414e-07, + "loss": 0.1585, + "step": 18994 + }, + { + "epoch": 2.6821519344817846, + "grad_norm": 2.6287751868177125, + "learning_rate": 5.830196146725054e-07, + "loss": 0.0791, + "step": 18995 + }, + { + "epoch": 2.6822931375317705, + "grad_norm": 3.848219676871724, + "learning_rate": 5.825068078506257e-07, + "loss": 0.1706, + "step": 18996 + }, + { + "epoch": 2.6824343405817563, + "grad_norm": 2.4340363925809245, + "learning_rate": 5.819942198872231e-07, + "loss": 0.1063, + "step": 18997 + }, + { + "epoch": 2.682575543631742, + "grad_norm": 3.355244965771629, + "learning_rate": 5.814818507942055e-07, + "loss": 0.1478, + "step": 18998 + }, + { + "epoch": 2.682716746681728, + "grad_norm": 4.1048796921205, + "learning_rate": 5.809697005834803e-07, + "loss": 0.191, + "step": 18999 + }, + { + "epoch": 2.682857949731714, + "grad_norm": 2.974247169667982, + "learning_rate": 5.804577692669533e-07, + "loss": 0.1601, + "step": 19000 + }, + { + "epoch": 2.6829991527817, + "grad_norm": 2.6784393263527098, + "learning_rate": 5.799460568565207e-07, + "loss": 0.1177, + "step": 19001 + }, + { + "epoch": 2.6831403558316858, + "grad_norm": 3.0415582309252214, + "learning_rate": 5.794345633640718e-07, + "loss": 0.1169, + "step": 19002 + }, + { + "epoch": 2.6832815588816716, + "grad_norm": 4.07905813492894, + "learning_rate": 5.789232888014962e-07, + "loss": 0.196, + "step": 19003 + }, + { + "epoch": 2.6834227619316575, + "grad_norm": 2.8016702977154444, + "learning_rate": 5.784122331806751e-07, + "loss": 0.0967, + "step": 19004 + }, + { + "epoch": 2.6835639649816434, + "grad_norm": 3.2098553429522503, + "learning_rate": 5.779013965134839e-07, + "loss": 0.1438, + "step": 19005 + }, + { + "epoch": 2.6837051680316293, + "grad_norm": 3.6643995399044784, + "learning_rate": 5.77390778811796e-07, + "loss": 0.1305, + "step": 19006 + }, + { + "epoch": 2.683846371081615, + "grad_norm": 4.249650115080932, + "learning_rate": 5.768803800874767e-07, + "loss": 0.2066, + "step": 19007 + }, + { + "epoch": 2.683987574131601, + "grad_norm": 3.037628243586277, + "learning_rate": 5.763702003523874e-07, + "loss": 0.1383, + "step": 19008 + }, + { + "epoch": 2.684128777181587, + "grad_norm": 3.743267011433064, + "learning_rate": 5.758602396183854e-07, + "loss": 0.1436, + "step": 19009 + }, + { + "epoch": 2.684269980231573, + "grad_norm": 2.8516668177977365, + "learning_rate": 5.753504978973212e-07, + "loss": 0.1308, + "step": 19010 + }, + { + "epoch": 2.6844111832815587, + "grad_norm": 3.669760151555839, + "learning_rate": 5.748409752010397e-07, + "loss": 0.1612, + "step": 19011 + }, + { + "epoch": 2.6845523863315446, + "grad_norm": 3.27029870438877, + "learning_rate": 5.743316715413849e-07, + "loss": 0.1512, + "step": 19012 + }, + { + "epoch": 2.6846935893815305, + "grad_norm": 4.1136707174027, + "learning_rate": 5.738225869301927e-07, + "loss": 0.1723, + "step": 19013 + }, + { + "epoch": 2.6848347924315163, + "grad_norm": 3.6389164536851077, + "learning_rate": 5.733137213792928e-07, + "loss": 0.124, + "step": 19014 + }, + { + "epoch": 2.6849759954815022, + "grad_norm": 3.0938070057594884, + "learning_rate": 5.728050749005099e-07, + "loss": 0.1461, + "step": 19015 + }, + { + "epoch": 2.685117198531488, + "grad_norm": 3.0278155383327867, + "learning_rate": 5.722966475056646e-07, + "loss": 0.132, + "step": 19016 + }, + { + "epoch": 2.685258401581474, + "grad_norm": 3.929198539438923, + "learning_rate": 5.717884392065743e-07, + "loss": 0.1569, + "step": 19017 + }, + { + "epoch": 2.68539960463146, + "grad_norm": 3.887382047937539, + "learning_rate": 5.712804500150493e-07, + "loss": 0.1701, + "step": 19018 + }, + { + "epoch": 2.6855408076814458, + "grad_norm": 3.6665968122624784, + "learning_rate": 5.707726799428947e-07, + "loss": 0.1876, + "step": 19019 + }, + { + "epoch": 2.6856820107314316, + "grad_norm": 3.2752520589215948, + "learning_rate": 5.702651290019112e-07, + "loss": 0.1626, + "step": 19020 + }, + { + "epoch": 2.6858232137814175, + "grad_norm": 3.2461303779296102, + "learning_rate": 5.697577972038937e-07, + "loss": 0.149, + "step": 19021 + }, + { + "epoch": 2.6859644168314034, + "grad_norm": 3.4905942725797616, + "learning_rate": 5.692506845606327e-07, + "loss": 0.126, + "step": 19022 + }, + { + "epoch": 2.6861056198813893, + "grad_norm": 2.963669569689377, + "learning_rate": 5.687437910839121e-07, + "loss": 0.128, + "step": 19023 + }, + { + "epoch": 2.686246822931375, + "grad_norm": 3.7959105595318636, + "learning_rate": 5.682371167855127e-07, + "loss": 0.1507, + "step": 19024 + }, + { + "epoch": 2.686388025981361, + "grad_norm": 2.932039549353003, + "learning_rate": 5.677306616772105e-07, + "loss": 0.1508, + "step": 19025 + }, + { + "epoch": 2.686529229031347, + "grad_norm": 3.1275218451479736, + "learning_rate": 5.672244257707738e-07, + "loss": 0.1485, + "step": 19026 + }, + { + "epoch": 2.686670432081333, + "grad_norm": 3.4260351997864427, + "learning_rate": 5.667184090779676e-07, + "loss": 0.1329, + "step": 19027 + }, + { + "epoch": 2.6868116351313187, + "grad_norm": 3.9353186569413534, + "learning_rate": 5.662126116105504e-07, + "loss": 0.1969, + "step": 19028 + }, + { + "epoch": 2.6869528381813046, + "grad_norm": 2.872269787861905, + "learning_rate": 5.657070333802783e-07, + "loss": 0.1283, + "step": 19029 + }, + { + "epoch": 2.6870940412312905, + "grad_norm": 3.542729659227113, + "learning_rate": 5.65201674398902e-07, + "loss": 0.1558, + "step": 19030 + }, + { + "epoch": 2.6872352442812764, + "grad_norm": 3.301196922868541, + "learning_rate": 5.646965346781641e-07, + "loss": 0.1341, + "step": 19031 + }, + { + "epoch": 2.6873764473312622, + "grad_norm": 3.753113700529591, + "learning_rate": 5.641916142298043e-07, + "loss": 0.1927, + "step": 19032 + }, + { + "epoch": 2.687517650381248, + "grad_norm": 3.477632207786333, + "learning_rate": 5.636869130655531e-07, + "loss": 0.1375, + "step": 19033 + }, + { + "epoch": 2.687658853431234, + "grad_norm": 2.7970213054980313, + "learning_rate": 5.631824311971456e-07, + "loss": 0.1084, + "step": 19034 + }, + { + "epoch": 2.68780005648122, + "grad_norm": 2.5093534412668848, + "learning_rate": 5.626781686363025e-07, + "loss": 0.1021, + "step": 19035 + }, + { + "epoch": 2.6879412595312058, + "grad_norm": 3.801036696100907, + "learning_rate": 5.621741253947432e-07, + "loss": 0.1691, + "step": 19036 + }, + { + "epoch": 2.6880824625811917, + "grad_norm": 2.8923023227594964, + "learning_rate": 5.616703014841807e-07, + "loss": 0.152, + "step": 19037 + }, + { + "epoch": 2.6882236656311775, + "grad_norm": 3.6391323790108996, + "learning_rate": 5.611666969163243e-07, + "loss": 0.1714, + "step": 19038 + }, + { + "epoch": 2.6883648686811634, + "grad_norm": 2.940965592436462, + "learning_rate": 5.606633117028781e-07, + "loss": 0.1426, + "step": 19039 + }, + { + "epoch": 2.6885060717311493, + "grad_norm": 2.329067740750674, + "learning_rate": 5.601601458555406e-07, + "loss": 0.0806, + "step": 19040 + }, + { + "epoch": 2.688647274781135, + "grad_norm": 3.7912875694316877, + "learning_rate": 5.596571993860034e-07, + "loss": 0.1571, + "step": 19041 + }, + { + "epoch": 2.688788477831121, + "grad_norm": 2.798507332138452, + "learning_rate": 5.591544723059561e-07, + "loss": 0.1307, + "step": 19042 + }, + { + "epoch": 2.688929680881107, + "grad_norm": 3.0614612932910497, + "learning_rate": 5.586519646270827e-07, + "loss": 0.1131, + "step": 19043 + }, + { + "epoch": 2.689070883931093, + "grad_norm": 3.3620514072894, + "learning_rate": 5.581496763610594e-07, + "loss": 0.1567, + "step": 19044 + }, + { + "epoch": 2.6892120869810787, + "grad_norm": 2.8883448866685164, + "learning_rate": 5.57647607519558e-07, + "loss": 0.1544, + "step": 19045 + }, + { + "epoch": 2.6893532900310646, + "grad_norm": 2.364792159952339, + "learning_rate": 5.571457581142514e-07, + "loss": 0.1135, + "step": 19046 + }, + { + "epoch": 2.6894944930810505, + "grad_norm": 4.003246247624127, + "learning_rate": 5.566441281567981e-07, + "loss": 0.1981, + "step": 19047 + }, + { + "epoch": 2.6896356961310364, + "grad_norm": 3.785324535377466, + "learning_rate": 5.561427176588586e-07, + "loss": 0.1906, + "step": 19048 + }, + { + "epoch": 2.6897768991810223, + "grad_norm": 3.5273708150632617, + "learning_rate": 5.556415266320824e-07, + "loss": 0.128, + "step": 19049 + }, + { + "epoch": 2.689918102231008, + "grad_norm": 2.7854327723149113, + "learning_rate": 5.551405550881173e-07, + "loss": 0.1249, + "step": 19050 + }, + { + "epoch": 2.690059305280994, + "grad_norm": 2.584919544705247, + "learning_rate": 5.54639803038608e-07, + "loss": 0.1531, + "step": 19051 + }, + { + "epoch": 2.69020050833098, + "grad_norm": 4.8878967189745515, + "learning_rate": 5.541392704951909e-07, + "loss": 0.2216, + "step": 19052 + }, + { + "epoch": 2.690341711380966, + "grad_norm": 3.4535369378998446, + "learning_rate": 5.536389574694967e-07, + "loss": 0.0977, + "step": 19053 + }, + { + "epoch": 2.6904829144309517, + "grad_norm": 2.926660762367837, + "learning_rate": 5.53138863973155e-07, + "loss": 0.1168, + "step": 19054 + }, + { + "epoch": 2.6906241174809375, + "grad_norm": 3.1953672848195276, + "learning_rate": 5.526389900177854e-07, + "loss": 0.1509, + "step": 19055 + }, + { + "epoch": 2.6907653205309234, + "grad_norm": 3.2077296909392037, + "learning_rate": 5.521393356150062e-07, + "loss": 0.1353, + "step": 19056 + }, + { + "epoch": 2.6909065235809093, + "grad_norm": 2.856436741511749, + "learning_rate": 5.516399007764283e-07, + "loss": 0.1147, + "step": 19057 + }, + { + "epoch": 2.691047726630895, + "grad_norm": 2.8974034987419652, + "learning_rate": 5.51140685513658e-07, + "loss": 0.1254, + "step": 19058 + }, + { + "epoch": 2.691188929680881, + "grad_norm": 3.351434309389812, + "learning_rate": 5.506416898382982e-07, + "loss": 0.1702, + "step": 19059 + }, + { + "epoch": 2.691330132730867, + "grad_norm": 2.9459254896988294, + "learning_rate": 5.501429137619452e-07, + "loss": 0.1593, + "step": 19060 + }, + { + "epoch": 2.691471335780853, + "grad_norm": 2.3664224772663354, + "learning_rate": 5.496443572961896e-07, + "loss": 0.1042, + "step": 19061 + }, + { + "epoch": 2.6916125388308387, + "grad_norm": 3.4854904183769415, + "learning_rate": 5.491460204526156e-07, + "loss": 0.1471, + "step": 19062 + }, + { + "epoch": 2.6917537418808246, + "grad_norm": 3.993988632101775, + "learning_rate": 5.486479032428083e-07, + "loss": 0.2047, + "step": 19063 + }, + { + "epoch": 2.6918949449308105, + "grad_norm": 3.176737406259515, + "learning_rate": 5.481500056783429e-07, + "loss": 0.1465, + "step": 19064 + }, + { + "epoch": 2.6920361479807964, + "grad_norm": 3.099314378762003, + "learning_rate": 5.476523277707902e-07, + "loss": 0.1302, + "step": 19065 + }, + { + "epoch": 2.6921773510307823, + "grad_norm": 3.807212807544743, + "learning_rate": 5.471548695317131e-07, + "loss": 0.1769, + "step": 19066 + }, + { + "epoch": 2.692318554080768, + "grad_norm": 3.2927473763259885, + "learning_rate": 5.466576309726735e-07, + "loss": 0.1868, + "step": 19067 + }, + { + "epoch": 2.692459757130754, + "grad_norm": 3.1765422679691575, + "learning_rate": 5.461606121052299e-07, + "loss": 0.1339, + "step": 19068 + }, + { + "epoch": 2.69260096018074, + "grad_norm": 3.201655287983891, + "learning_rate": 5.456638129409308e-07, + "loss": 0.1327, + "step": 19069 + }, + { + "epoch": 2.692742163230726, + "grad_norm": 2.5159828852971255, + "learning_rate": 5.451672334913216e-07, + "loss": 0.1267, + "step": 19070 + }, + { + "epoch": 2.6928833662807117, + "grad_norm": 2.676305694253513, + "learning_rate": 5.446708737679418e-07, + "loss": 0.1186, + "step": 19071 + }, + { + "epoch": 2.6930245693306976, + "grad_norm": 3.628806429074437, + "learning_rate": 5.441747337823289e-07, + "loss": 0.1647, + "step": 19072 + }, + { + "epoch": 2.6931657723806834, + "grad_norm": 2.6914377469511424, + "learning_rate": 5.436788135460102e-07, + "loss": 0.1225, + "step": 19073 + }, + { + "epoch": 2.6933069754306693, + "grad_norm": 3.1206745870127905, + "learning_rate": 5.431831130705123e-07, + "loss": 0.1648, + "step": 19074 + }, + { + "epoch": 2.693448178480655, + "grad_norm": 3.3849478209562895, + "learning_rate": 5.426876323673558e-07, + "loss": 0.1598, + "step": 19075 + }, + { + "epoch": 2.693589381530641, + "grad_norm": 3.1361860004020223, + "learning_rate": 5.421923714480537e-07, + "loss": 0.1346, + "step": 19076 + }, + { + "epoch": 2.693730584580627, + "grad_norm": 3.495847556919755, + "learning_rate": 5.416973303241158e-07, + "loss": 0.1581, + "step": 19077 + }, + { + "epoch": 2.693871787630613, + "grad_norm": 2.9467534165705964, + "learning_rate": 5.412025090070483e-07, + "loss": 0.1199, + "step": 19078 + }, + { + "epoch": 2.6940129906805987, + "grad_norm": 2.3519348593279514, + "learning_rate": 5.407079075083476e-07, + "loss": 0.1356, + "step": 19079 + }, + { + "epoch": 2.6941541937305846, + "grad_norm": 2.764203795868386, + "learning_rate": 5.402135258395114e-07, + "loss": 0.0793, + "step": 19080 + }, + { + "epoch": 2.6942953967805705, + "grad_norm": 2.8703837246343684, + "learning_rate": 5.397193640120291e-07, + "loss": 0.1297, + "step": 19081 + }, + { + "epoch": 2.6944365998305564, + "grad_norm": 3.011323806329975, + "learning_rate": 5.392254220373816e-07, + "loss": 0.1389, + "step": 19082 + }, + { + "epoch": 2.6945778028805423, + "grad_norm": 3.8588163367263264, + "learning_rate": 5.387316999270487e-07, + "loss": 0.2012, + "step": 19083 + }, + { + "epoch": 2.694719005930528, + "grad_norm": 2.6934225142624943, + "learning_rate": 5.382381976925044e-07, + "loss": 0.1266, + "step": 19084 + }, + { + "epoch": 2.694860208980514, + "grad_norm": 2.683012773383494, + "learning_rate": 5.377449153452196e-07, + "loss": 0.1201, + "step": 19085 + }, + { + "epoch": 2.6950014120305, + "grad_norm": 2.3597152758200584, + "learning_rate": 5.372518528966575e-07, + "loss": 0.1033, + "step": 19086 + }, + { + "epoch": 2.695142615080486, + "grad_norm": 2.749032819719462, + "learning_rate": 5.367590103582742e-07, + "loss": 0.1105, + "step": 19087 + }, + { + "epoch": 2.6952838181304717, + "grad_norm": 2.9806080585773342, + "learning_rate": 5.362663877415252e-07, + "loss": 0.1537, + "step": 19088 + }, + { + "epoch": 2.6954250211804576, + "grad_norm": 3.4066412878261123, + "learning_rate": 5.357739850578581e-07, + "loss": 0.1627, + "step": 19089 + }, + { + "epoch": 2.6955662242304435, + "grad_norm": 2.8726060734438867, + "learning_rate": 5.352818023187167e-07, + "loss": 0.1726, + "step": 19090 + }, + { + "epoch": 2.6957074272804293, + "grad_norm": 3.2129525936777292, + "learning_rate": 5.347898395355388e-07, + "loss": 0.1518, + "step": 19091 + }, + { + "epoch": 2.695848630330415, + "grad_norm": 3.342875459569788, + "learning_rate": 5.342980967197564e-07, + "loss": 0.1665, + "step": 19092 + }, + { + "epoch": 2.695989833380401, + "grad_norm": 3.7244511142724295, + "learning_rate": 5.338065738827991e-07, + "loss": 0.1633, + "step": 19093 + }, + { + "epoch": 2.696131036430387, + "grad_norm": 3.4881894970801177, + "learning_rate": 5.33315271036089e-07, + "loss": 0.1729, + "step": 19094 + }, + { + "epoch": 2.696272239480373, + "grad_norm": 3.1755292926553156, + "learning_rate": 5.328241881910434e-07, + "loss": 0.1506, + "step": 19095 + }, + { + "epoch": 2.6964134425303588, + "grad_norm": 3.2129004908709, + "learning_rate": 5.323333253590734e-07, + "loss": 0.1477, + "step": 19096 + }, + { + "epoch": 2.6965546455803446, + "grad_norm": 2.9758710882694643, + "learning_rate": 5.318426825515898e-07, + "loss": 0.1357, + "step": 19097 + }, + { + "epoch": 2.6966958486303305, + "grad_norm": 2.9957215924801965, + "learning_rate": 5.313522597799947e-07, + "loss": 0.1323, + "step": 19098 + }, + { + "epoch": 2.6968370516803164, + "grad_norm": 3.5747130631605675, + "learning_rate": 5.308620570556833e-07, + "loss": 0.2152, + "step": 19099 + }, + { + "epoch": 2.6969782547303023, + "grad_norm": 3.6043938478452793, + "learning_rate": 5.303720743900475e-07, + "loss": 0.1226, + "step": 19100 + }, + { + "epoch": 2.697119457780288, + "grad_norm": 3.4154618436744704, + "learning_rate": 5.298823117944752e-07, + "loss": 0.1671, + "step": 19101 + }, + { + "epoch": 2.697260660830274, + "grad_norm": 3.0653670978270506, + "learning_rate": 5.293927692803458e-07, + "loss": 0.1568, + "step": 19102 + }, + { + "epoch": 2.69740186388026, + "grad_norm": 2.825492856742143, + "learning_rate": 5.289034468590404e-07, + "loss": 0.1188, + "step": 19103 + }, + { + "epoch": 2.697543066930246, + "grad_norm": 2.7985941607173266, + "learning_rate": 5.284143445419288e-07, + "loss": 0.1266, + "step": 19104 + }, + { + "epoch": 2.6976842699802317, + "grad_norm": 3.587516550529802, + "learning_rate": 5.279254623403773e-07, + "loss": 0.1581, + "step": 19105 + }, + { + "epoch": 2.6978254730302176, + "grad_norm": 3.2589486563636005, + "learning_rate": 5.274368002657482e-07, + "loss": 0.1549, + "step": 19106 + }, + { + "epoch": 2.6979666760802035, + "grad_norm": 3.366952863050015, + "learning_rate": 5.269483583293966e-07, + "loss": 0.1394, + "step": 19107 + }, + { + "epoch": 2.6981078791301893, + "grad_norm": 3.1120548424397727, + "learning_rate": 5.264601365426736e-07, + "loss": 0.1522, + "step": 19108 + }, + { + "epoch": 2.6982490821801752, + "grad_norm": 3.3326655369205977, + "learning_rate": 5.259721349169256e-07, + "loss": 0.1447, + "step": 19109 + }, + { + "epoch": 2.698390285230161, + "grad_norm": 2.7084479490214677, + "learning_rate": 5.254843534634934e-07, + "loss": 0.1074, + "step": 19110 + }, + { + "epoch": 2.698531488280147, + "grad_norm": 2.6876121202472034, + "learning_rate": 5.249967921937137e-07, + "loss": 0.1235, + "step": 19111 + }, + { + "epoch": 2.698672691330133, + "grad_norm": 2.846348170412563, + "learning_rate": 5.245094511189163e-07, + "loss": 0.1568, + "step": 19112 + }, + { + "epoch": 2.6988138943801188, + "grad_norm": 2.4340867000945052, + "learning_rate": 5.240223302504277e-07, + "loss": 0.1026, + "step": 19113 + }, + { + "epoch": 2.6989550974301046, + "grad_norm": 3.119337968242527, + "learning_rate": 5.235354295995665e-07, + "loss": 0.1306, + "step": 19114 + }, + { + "epoch": 2.6990963004800905, + "grad_norm": 3.132767127765841, + "learning_rate": 5.230487491776514e-07, + "loss": 0.1375, + "step": 19115 + }, + { + "epoch": 2.6992375035300764, + "grad_norm": 3.27569020704532, + "learning_rate": 5.225622889959892e-07, + "loss": 0.181, + "step": 19116 + }, + { + "epoch": 2.6993787065800623, + "grad_norm": 2.9725271297245577, + "learning_rate": 5.220760490658872e-07, + "loss": 0.1238, + "step": 19117 + }, + { + "epoch": 2.699519909630048, + "grad_norm": 3.152266444593001, + "learning_rate": 5.215900293986431e-07, + "loss": 0.1408, + "step": 19118 + }, + { + "epoch": 2.699661112680034, + "grad_norm": 3.404397665100183, + "learning_rate": 5.211042300055535e-07, + "loss": 0.1667, + "step": 19119 + }, + { + "epoch": 2.69980231573002, + "grad_norm": 2.8045646448817525, + "learning_rate": 5.206186508979083e-07, + "loss": 0.1217, + "step": 19120 + }, + { + "epoch": 2.699943518780006, + "grad_norm": 3.613183855529652, + "learning_rate": 5.201332920869928e-07, + "loss": 0.1357, + "step": 19121 + }, + { + "epoch": 2.7000847218299917, + "grad_norm": 3.8244422388171935, + "learning_rate": 5.196481535840847e-07, + "loss": 0.1547, + "step": 19122 + }, + { + "epoch": 2.7002259248799776, + "grad_norm": 2.930325720451192, + "learning_rate": 5.191632354004595e-07, + "loss": 0.1159, + "step": 19123 + }, + { + "epoch": 2.7003671279299635, + "grad_norm": 2.9024432548467676, + "learning_rate": 5.186785375473869e-07, + "loss": 0.1549, + "step": 19124 + }, + { + "epoch": 2.7005083309799494, + "grad_norm": 2.9512865250024176, + "learning_rate": 5.18194060036129e-07, + "loss": 0.1372, + "step": 19125 + }, + { + "epoch": 2.7006495340299352, + "grad_norm": 4.075332340107143, + "learning_rate": 5.17709802877947e-07, + "loss": 0.1957, + "step": 19126 + }, + { + "epoch": 2.700790737079921, + "grad_norm": 3.6335445279620977, + "learning_rate": 5.172257660840951e-07, + "loss": 0.151, + "step": 19127 + }, + { + "epoch": 2.700931940129907, + "grad_norm": 3.143482016645844, + "learning_rate": 5.1674194966582e-07, + "loss": 0.1148, + "step": 19128 + }, + { + "epoch": 2.701073143179893, + "grad_norm": 3.4514544439328985, + "learning_rate": 5.162583536343668e-07, + "loss": 0.1445, + "step": 19129 + }, + { + "epoch": 2.7012143462298788, + "grad_norm": 3.7648446873436696, + "learning_rate": 5.157749780009735e-07, + "loss": 0.1514, + "step": 19130 + }, + { + "epoch": 2.7013555492798647, + "grad_norm": 3.425156825667327, + "learning_rate": 5.152918227768722e-07, + "loss": 0.1663, + "step": 19131 + }, + { + "epoch": 2.7014967523298505, + "grad_norm": 3.5964489354492652, + "learning_rate": 5.14808887973296e-07, + "loss": 0.186, + "step": 19132 + }, + { + "epoch": 2.7016379553798364, + "grad_norm": 3.105044089785203, + "learning_rate": 5.143261736014638e-07, + "loss": 0.1392, + "step": 19133 + }, + { + "epoch": 2.7017791584298223, + "grad_norm": 3.0233078225139796, + "learning_rate": 5.138436796725942e-07, + "loss": 0.1507, + "step": 19134 + }, + { + "epoch": 2.701920361479808, + "grad_norm": 3.2831338007440523, + "learning_rate": 5.133614061979009e-07, + "loss": 0.1516, + "step": 19135 + }, + { + "epoch": 2.702061564529794, + "grad_norm": 3.076370467422102, + "learning_rate": 5.1287935318859e-07, + "loss": 0.1446, + "step": 19136 + }, + { + "epoch": 2.70220276757978, + "grad_norm": 3.022253648097624, + "learning_rate": 5.123975206558673e-07, + "loss": 0.1307, + "step": 19137 + }, + { + "epoch": 2.702343970629766, + "grad_norm": 3.235807656540043, + "learning_rate": 5.119159086109293e-07, + "loss": 0.1475, + "step": 19138 + }, + { + "epoch": 2.7024851736797517, + "grad_norm": 3.650282487818748, + "learning_rate": 5.11434517064967e-07, + "loss": 0.1471, + "step": 19139 + }, + { + "epoch": 2.7026263767297376, + "grad_norm": 3.5715892629243218, + "learning_rate": 5.109533460291694e-07, + "loss": 0.134, + "step": 19140 + }, + { + "epoch": 2.7027675797797235, + "grad_norm": 3.0856119858027458, + "learning_rate": 5.104723955147184e-07, + "loss": 0.1083, + "step": 19141 + }, + { + "epoch": 2.7029087828297094, + "grad_norm": 3.499097674482075, + "learning_rate": 5.099916655327907e-07, + "loss": 0.1455, + "step": 19142 + }, + { + "epoch": 2.7030499858796952, + "grad_norm": 2.9478391757589075, + "learning_rate": 5.095111560945575e-07, + "loss": 0.1383, + "step": 19143 + }, + { + "epoch": 2.703191188929681, + "grad_norm": 3.4206204930941544, + "learning_rate": 5.090308672111866e-07, + "loss": 0.1455, + "step": 19144 + }, + { + "epoch": 2.703332391979667, + "grad_norm": 2.370123077117807, + "learning_rate": 5.085507988938398e-07, + "loss": 0.1095, + "step": 19145 + }, + { + "epoch": 2.703473595029653, + "grad_norm": 3.8179380296983436, + "learning_rate": 5.08070951153673e-07, + "loss": 0.2021, + "step": 19146 + }, + { + "epoch": 2.703614798079639, + "grad_norm": 3.7506062555578974, + "learning_rate": 5.075913240018382e-07, + "loss": 0.225, + "step": 19147 + }, + { + "epoch": 2.7037560011296247, + "grad_norm": 3.1041313820730423, + "learning_rate": 5.07111917449481e-07, + "loss": 0.1353, + "step": 19148 + }, + { + "epoch": 2.7038972041796105, + "grad_norm": 2.62418338088162, + "learning_rate": 5.066327315077446e-07, + "loss": 0.113, + "step": 19149 + }, + { + "epoch": 2.7040384072295964, + "grad_norm": 3.5242467536730233, + "learning_rate": 5.061537661877636e-07, + "loss": 0.1635, + "step": 19150 + }, + { + "epoch": 2.7041796102795823, + "grad_norm": 3.3973290248577572, + "learning_rate": 5.056750215006678e-07, + "loss": 0.1486, + "step": 19151 + }, + { + "epoch": 2.704320813329568, + "grad_norm": 2.8452539593671244, + "learning_rate": 5.051964974575851e-07, + "loss": 0.1573, + "step": 19152 + }, + { + "epoch": 2.7044620163795536, + "grad_norm": 3.3145380476691897, + "learning_rate": 5.047181940696333e-07, + "loss": 0.1309, + "step": 19153 + }, + { + "epoch": 2.7046032194295395, + "grad_norm": 2.8988189842057275, + "learning_rate": 5.042401113479312e-07, + "loss": 0.1399, + "step": 19154 + }, + { + "epoch": 2.7047444224795254, + "grad_norm": 3.308795906612785, + "learning_rate": 5.037622493035888e-07, + "loss": 0.1594, + "step": 19155 + }, + { + "epoch": 2.7048856255295113, + "grad_norm": 2.9664548304323515, + "learning_rate": 5.032846079477105e-07, + "loss": 0.1215, + "step": 19156 + }, + { + "epoch": 2.705026828579497, + "grad_norm": 2.974756048972944, + "learning_rate": 5.028071872913953e-07, + "loss": 0.1521, + "step": 19157 + }, + { + "epoch": 2.705168031629483, + "grad_norm": 2.569499971666096, + "learning_rate": 5.02329987345741e-07, + "loss": 0.1159, + "step": 19158 + }, + { + "epoch": 2.705309234679469, + "grad_norm": 3.4365466061319965, + "learning_rate": 5.018530081218353e-07, + "loss": 0.1521, + "step": 19159 + }, + { + "epoch": 2.705450437729455, + "grad_norm": 2.430354210954474, + "learning_rate": 5.01376249630764e-07, + "loss": 0.1069, + "step": 19160 + }, + { + "epoch": 2.7055916407794407, + "grad_norm": 3.2108701681068226, + "learning_rate": 5.008997118836067e-07, + "loss": 0.1319, + "step": 19161 + }, + { + "epoch": 2.7057328438294266, + "grad_norm": 3.177527287010105, + "learning_rate": 5.004233948914383e-07, + "loss": 0.1591, + "step": 19162 + }, + { + "epoch": 2.7058740468794125, + "grad_norm": 3.185614153356821, + "learning_rate": 4.999472986653264e-07, + "loss": 0.1235, + "step": 19163 + }, + { + "epoch": 2.7060152499293983, + "grad_norm": 3.32487888739914, + "learning_rate": 4.994714232163378e-07, + "loss": 0.1291, + "step": 19164 + }, + { + "epoch": 2.7061564529793842, + "grad_norm": 3.0178970591947722, + "learning_rate": 4.98995768555528e-07, + "loss": 0.1111, + "step": 19165 + }, + { + "epoch": 2.70629765602937, + "grad_norm": 2.9313414052942095, + "learning_rate": 4.98520334693956e-07, + "loss": 0.1288, + "step": 19166 + }, + { + "epoch": 2.706438859079356, + "grad_norm": 2.8835610879835945, + "learning_rate": 4.980451216426674e-07, + "loss": 0.1203, + "step": 19167 + }, + { + "epoch": 2.706580062129342, + "grad_norm": 3.1313529663020634, + "learning_rate": 4.975701294127067e-07, + "loss": 0.1806, + "step": 19168 + }, + { + "epoch": 2.7067212651793278, + "grad_norm": 3.734462058301065, + "learning_rate": 4.970953580151117e-07, + "loss": 0.1645, + "step": 19169 + }, + { + "epoch": 2.7068624682293136, + "grad_norm": 3.4066574993368524, + "learning_rate": 4.966208074609158e-07, + "loss": 0.157, + "step": 19170 + }, + { + "epoch": 2.7070036712792995, + "grad_norm": 3.641324660665661, + "learning_rate": 4.961464777611491e-07, + "loss": 0.1643, + "step": 19171 + }, + { + "epoch": 2.7071448743292854, + "grad_norm": 3.2046531039605837, + "learning_rate": 4.956723689268339e-07, + "loss": 0.1337, + "step": 19172 + }, + { + "epoch": 2.7072860773792713, + "grad_norm": 2.3522578498000537, + "learning_rate": 4.95198480968988e-07, + "loss": 0.077, + "step": 19173 + }, + { + "epoch": 2.707427280429257, + "grad_norm": 3.254443853910979, + "learning_rate": 4.947248138986249e-07, + "loss": 0.1765, + "step": 19174 + }, + { + "epoch": 2.707568483479243, + "grad_norm": 2.189146991900782, + "learning_rate": 4.942513677267524e-07, + "loss": 0.103, + "step": 19175 + }, + { + "epoch": 2.707709686529229, + "grad_norm": 3.2603533055469507, + "learning_rate": 4.937781424643728e-07, + "loss": 0.1717, + "step": 19176 + }, + { + "epoch": 2.707850889579215, + "grad_norm": 2.8274225869714793, + "learning_rate": 4.933051381224829e-07, + "loss": 0.1429, + "step": 19177 + }, + { + "epoch": 2.7079920926292007, + "grad_norm": 3.6245000969569916, + "learning_rate": 4.928323547120772e-07, + "loss": 0.202, + "step": 19178 + }, + { + "epoch": 2.7081332956791866, + "grad_norm": 3.9004067288020448, + "learning_rate": 4.923597922441415e-07, + "loss": 0.1647, + "step": 19179 + }, + { + "epoch": 2.7082744987291725, + "grad_norm": 3.0567238458083, + "learning_rate": 4.918874507296578e-07, + "loss": 0.1293, + "step": 19180 + }, + { + "epoch": 2.7084157017791584, + "grad_norm": 3.4772722979544772, + "learning_rate": 4.914153301796032e-07, + "loss": 0.1626, + "step": 19181 + }, + { + "epoch": 2.7085569048291442, + "grad_norm": 3.069978103363989, + "learning_rate": 4.909434306049487e-07, + "loss": 0.1485, + "step": 19182 + }, + { + "epoch": 2.70869810787913, + "grad_norm": 2.6180322063661925, + "learning_rate": 4.904717520166657e-07, + "loss": 0.1359, + "step": 19183 + }, + { + "epoch": 2.708839310929116, + "grad_norm": 3.0996338138837674, + "learning_rate": 4.900002944257098e-07, + "loss": 0.1208, + "step": 19184 + }, + { + "epoch": 2.708980513979102, + "grad_norm": 3.02052736736637, + "learning_rate": 4.895290578430412e-07, + "loss": 0.1335, + "step": 19185 + }, + { + "epoch": 2.7091217170290878, + "grad_norm": 2.739568707729761, + "learning_rate": 4.890580422796087e-07, + "loss": 0.1286, + "step": 19186 + }, + { + "epoch": 2.7092629200790737, + "grad_norm": 3.5795781801879762, + "learning_rate": 4.885872477463594e-07, + "loss": 0.1516, + "step": 19187 + }, + { + "epoch": 2.7094041231290595, + "grad_norm": 2.5464340554107383, + "learning_rate": 4.881166742542365e-07, + "loss": 0.1107, + "step": 19188 + }, + { + "epoch": 2.7095453261790454, + "grad_norm": 3.220747071286372, + "learning_rate": 4.876463218141736e-07, + "loss": 0.1389, + "step": 19189 + }, + { + "epoch": 2.7096865292290313, + "grad_norm": 3.296652425655922, + "learning_rate": 4.871761904371019e-07, + "loss": 0.1708, + "step": 19190 + }, + { + "epoch": 2.709827732279017, + "grad_norm": 2.9485872184523787, + "learning_rate": 4.867062801339484e-07, + "loss": 0.1483, + "step": 19191 + }, + { + "epoch": 2.709968935329003, + "grad_norm": 3.0661346906058955, + "learning_rate": 4.86236590915633e-07, + "loss": 0.1344, + "step": 19192 + }, + { + "epoch": 2.710110138378989, + "grad_norm": 3.70543734035251, + "learning_rate": 4.857671227930671e-07, + "loss": 0.1414, + "step": 19193 + }, + { + "epoch": 2.710251341428975, + "grad_norm": 2.914800950411566, + "learning_rate": 4.852978757771664e-07, + "loss": 0.1192, + "step": 19194 + }, + { + "epoch": 2.7103925444789607, + "grad_norm": 3.4023212314245797, + "learning_rate": 4.848288498788345e-07, + "loss": 0.1848, + "step": 19195 + }, + { + "epoch": 2.7105337475289466, + "grad_norm": 3.225212466985664, + "learning_rate": 4.843600451089702e-07, + "loss": 0.1566, + "step": 19196 + }, + { + "epoch": 2.7106749505789325, + "grad_norm": 3.6365806184782428, + "learning_rate": 4.838914614784695e-07, + "loss": 0.1369, + "step": 19197 + }, + { + "epoch": 2.7108161536289184, + "grad_norm": 2.90698030058396, + "learning_rate": 4.834230989982214e-07, + "loss": 0.1015, + "step": 19198 + }, + { + "epoch": 2.7109573566789043, + "grad_norm": 1.8324210277064046, + "learning_rate": 4.829549576791092e-07, + "loss": 0.0875, + "step": 19199 + }, + { + "epoch": 2.71109855972889, + "grad_norm": 3.0410464218075393, + "learning_rate": 4.824870375320156e-07, + "loss": 0.1476, + "step": 19200 + }, + { + "epoch": 2.711239762778876, + "grad_norm": 3.089854737965832, + "learning_rate": 4.820193385678129e-07, + "loss": 0.1395, + "step": 19201 + }, + { + "epoch": 2.711380965828862, + "grad_norm": 2.7063104452208417, + "learning_rate": 4.81551860797369e-07, + "loss": 0.1306, + "step": 19202 + }, + { + "epoch": 2.711522168878848, + "grad_norm": 2.915578363650722, + "learning_rate": 4.810846042315498e-07, + "loss": 0.1487, + "step": 19203 + }, + { + "epoch": 2.7116633719288337, + "grad_norm": 2.476024734621655, + "learning_rate": 4.806175688812142e-07, + "loss": 0.0886, + "step": 19204 + }, + { + "epoch": 2.7118045749788195, + "grad_norm": 3.7479816491117934, + "learning_rate": 4.801507547572126e-07, + "loss": 0.1706, + "step": 19205 + }, + { + "epoch": 2.7119457780288054, + "grad_norm": 3.6498926957204505, + "learning_rate": 4.796841618703984e-07, + "loss": 0.1569, + "step": 19206 + }, + { + "epoch": 2.7120869810787913, + "grad_norm": 3.220599714716518, + "learning_rate": 4.79217790231612e-07, + "loss": 0.1497, + "step": 19207 + }, + { + "epoch": 2.712228184128777, + "grad_norm": 2.92189804423502, + "learning_rate": 4.787516398516934e-07, + "loss": 0.1108, + "step": 19208 + }, + { + "epoch": 2.712369387178763, + "grad_norm": 3.2946555351901767, + "learning_rate": 4.782857107414752e-07, + "loss": 0.1695, + "step": 19209 + }, + { + "epoch": 2.712510590228749, + "grad_norm": 2.6499037061812802, + "learning_rate": 4.77820002911783e-07, + "loss": 0.1319, + "step": 19210 + }, + { + "epoch": 2.712651793278735, + "grad_norm": 3.2849126754899234, + "learning_rate": 4.773545163734416e-07, + "loss": 0.1072, + "step": 19211 + }, + { + "epoch": 2.7127929963287207, + "grad_norm": 4.143036197127106, + "learning_rate": 4.768892511372703e-07, + "loss": 0.1962, + "step": 19212 + }, + { + "epoch": 2.7129341993787066, + "grad_norm": 3.098026916731239, + "learning_rate": 4.76424207214079e-07, + "loss": 0.1289, + "step": 19213 + }, + { + "epoch": 2.7130754024286925, + "grad_norm": 3.7238058060850223, + "learning_rate": 4.7595938461467706e-07, + "loss": 0.1518, + "step": 19214 + }, + { + "epoch": 2.7132166054786784, + "grad_norm": 3.1278462193718553, + "learning_rate": 4.7549478334986576e-07, + "loss": 0.1209, + "step": 19215 + }, + { + "epoch": 2.7133578085286643, + "grad_norm": 3.713035725374499, + "learning_rate": 4.7503040343044205e-07, + "loss": 0.1716, + "step": 19216 + }, + { + "epoch": 2.71349901157865, + "grad_norm": 3.8532281829076593, + "learning_rate": 4.745662448671984e-07, + "loss": 0.2078, + "step": 19217 + }, + { + "epoch": 2.713640214628636, + "grad_norm": 2.91283348184974, + "learning_rate": 4.741023076709217e-07, + "loss": 0.1093, + "step": 19218 + }, + { + "epoch": 2.713781417678622, + "grad_norm": 3.0852114428496953, + "learning_rate": 4.7363859185239336e-07, + "loss": 0.1432, + "step": 19219 + }, + { + "epoch": 2.713922620728608, + "grad_norm": 4.121960971125105, + "learning_rate": 4.731750974223892e-07, + "loss": 0.1724, + "step": 19220 + }, + { + "epoch": 2.7140638237785937, + "grad_norm": 4.268687106148387, + "learning_rate": 4.7271182439168286e-07, + "loss": 0.1726, + "step": 19221 + }, + { + "epoch": 2.7142050268285796, + "grad_norm": 2.7477110199302484, + "learning_rate": 4.7224877277103673e-07, + "loss": 0.1299, + "step": 19222 + }, + { + "epoch": 2.7143462298785654, + "grad_norm": 2.634416807993679, + "learning_rate": 4.717859425712168e-07, + "loss": 0.1294, + "step": 19223 + }, + { + "epoch": 2.7144874329285513, + "grad_norm": 2.8739496667680546, + "learning_rate": 4.7132333380297546e-07, + "loss": 0.1282, + "step": 19224 + }, + { + "epoch": 2.714628635978537, + "grad_norm": 2.8780627686775677, + "learning_rate": 4.708609464770653e-07, + "loss": 0.1413, + "step": 19225 + }, + { + "epoch": 2.714769839028523, + "grad_norm": 2.7906808707204336, + "learning_rate": 4.703987806042332e-07, + "loss": 0.1281, + "step": 19226 + }, + { + "epoch": 2.714911042078509, + "grad_norm": 4.39402470441956, + "learning_rate": 4.6993683619521393e-07, + "loss": 0.2436, + "step": 19227 + }, + { + "epoch": 2.715052245128495, + "grad_norm": 3.4233360878829093, + "learning_rate": 4.6947511326074893e-07, + "loss": 0.1382, + "step": 19228 + }, + { + "epoch": 2.7151934481784807, + "grad_norm": 2.4282727195683576, + "learning_rate": 4.6901361181156737e-07, + "loss": 0.0809, + "step": 19229 + }, + { + "epoch": 2.7153346512284666, + "grad_norm": 3.2492857985860732, + "learning_rate": 4.6855233185839175e-07, + "loss": 0.1347, + "step": 19230 + }, + { + "epoch": 2.7154758542784525, + "grad_norm": 3.0697059400859006, + "learning_rate": 4.680912734119447e-07, + "loss": 0.1522, + "step": 19231 + }, + { + "epoch": 2.7156170573284384, + "grad_norm": 3.863461679627103, + "learning_rate": 4.676304364829398e-07, + "loss": 0.138, + "step": 19232 + }, + { + "epoch": 2.7157582603784243, + "grad_norm": 2.5785026017418318, + "learning_rate": 4.671698210820863e-07, + "loss": 0.1242, + "step": 19233 + }, + { + "epoch": 2.71589946342841, + "grad_norm": 2.9054022144311618, + "learning_rate": 4.6670942722009004e-07, + "loss": 0.1399, + "step": 19234 + }, + { + "epoch": 2.716040666478396, + "grad_norm": 3.002043387922445, + "learning_rate": 4.6624925490764914e-07, + "loss": 0.1459, + "step": 19235 + }, + { + "epoch": 2.716181869528382, + "grad_norm": 2.624766126396182, + "learning_rate": 4.657893041554584e-07, + "loss": 0.0913, + "step": 19236 + }, + { + "epoch": 2.716323072578368, + "grad_norm": 3.214429830232471, + "learning_rate": 4.6532957497420593e-07, + "loss": 0.1333, + "step": 19237 + }, + { + "epoch": 2.7164642756283537, + "grad_norm": 3.1289717506881636, + "learning_rate": 4.6487006737457765e-07, + "loss": 0.1338, + "step": 19238 + }, + { + "epoch": 2.7166054786783396, + "grad_norm": 4.104688588330098, + "learning_rate": 4.644107813672483e-07, + "loss": 0.1686, + "step": 19239 + }, + { + "epoch": 2.7167466817283255, + "grad_norm": 4.04194819287088, + "learning_rate": 4.639517169628971e-07, + "loss": 0.1883, + "step": 19240 + }, + { + "epoch": 2.7168878847783113, + "grad_norm": 3.602147730580308, + "learning_rate": 4.634928741721889e-07, + "loss": 0.1691, + "step": 19241 + }, + { + "epoch": 2.717029087828297, + "grad_norm": 4.482372626534816, + "learning_rate": 4.6303425300578964e-07, + "loss": 0.1498, + "step": 19242 + }, + { + "epoch": 2.717170290878283, + "grad_norm": 2.998464151417447, + "learning_rate": 4.6257585347435406e-07, + "loss": 0.1153, + "step": 19243 + }, + { + "epoch": 2.717311493928269, + "grad_norm": 3.480652850696283, + "learning_rate": 4.6211767558853484e-07, + "loss": 0.1409, + "step": 19244 + }, + { + "epoch": 2.717452696978255, + "grad_norm": 2.77088736631154, + "learning_rate": 4.6165971935898337e-07, + "loss": 0.1124, + "step": 19245 + }, + { + "epoch": 2.7175939000282407, + "grad_norm": 3.1586981849033995, + "learning_rate": 4.6120198479634117e-07, + "loss": 0.1406, + "step": 19246 + }, + { + "epoch": 2.7177351030782266, + "grad_norm": 3.660098883821135, + "learning_rate": 4.607444719112453e-07, + "loss": 0.1186, + "step": 19247 + }, + { + "epoch": 2.7178763061282125, + "grad_norm": 3.225571851719596, + "learning_rate": 4.6028718071432834e-07, + "loss": 0.1445, + "step": 19248 + }, + { + "epoch": 2.7180175091781984, + "grad_norm": 3.010212924523239, + "learning_rate": 4.598301112162162e-07, + "loss": 0.1194, + "step": 19249 + }, + { + "epoch": 2.7181587122281843, + "grad_norm": 3.4034043376127223, + "learning_rate": 4.5937326342753384e-07, + "loss": 0.1379, + "step": 19250 + }, + { + "epoch": 2.71829991527817, + "grad_norm": 3.127890448934661, + "learning_rate": 4.58916637358896e-07, + "loss": 0.1523, + "step": 19251 + }, + { + "epoch": 2.718441118328156, + "grad_norm": 3.5667772946219722, + "learning_rate": 4.5846023302091424e-07, + "loss": 0.1591, + "step": 19252 + }, + { + "epoch": 2.718582321378142, + "grad_norm": 3.2972223958039133, + "learning_rate": 4.580040504241967e-07, + "loss": 0.1462, + "step": 19253 + }, + { + "epoch": 2.718723524428128, + "grad_norm": 3.8714292468998965, + "learning_rate": 4.575480895793438e-07, + "loss": 0.1839, + "step": 19254 + }, + { + "epoch": 2.7188647274781133, + "grad_norm": 3.483074740719657, + "learning_rate": 4.5709235049695267e-07, + "loss": 0.1598, + "step": 19255 + }, + { + "epoch": 2.719005930528099, + "grad_norm": 3.464113272168803, + "learning_rate": 4.5663683318761255e-07, + "loss": 0.1786, + "step": 19256 + }, + { + "epoch": 2.719147133578085, + "grad_norm": 2.9733374354105955, + "learning_rate": 4.5618153766191275e-07, + "loss": 0.1248, + "step": 19257 + }, + { + "epoch": 2.719288336628071, + "grad_norm": 3.5228203704000043, + "learning_rate": 4.557264639304315e-07, + "loss": 0.1623, + "step": 19258 + }, + { + "epoch": 2.719429539678057, + "grad_norm": 2.91906396992724, + "learning_rate": 4.55271612003747e-07, + "loss": 0.102, + "step": 19259 + }, + { + "epoch": 2.7195707427280427, + "grad_norm": 2.4507272292406324, + "learning_rate": 4.548169818924275e-07, + "loss": 0.1087, + "step": 19260 + }, + { + "epoch": 2.7197119457780286, + "grad_norm": 3.448222907812001, + "learning_rate": 4.543625736070367e-07, + "loss": 0.1585, + "step": 19261 + }, + { + "epoch": 2.7198531488280144, + "grad_norm": 3.5149674755116744, + "learning_rate": 4.5390838715813956e-07, + "loss": 0.1707, + "step": 19262 + }, + { + "epoch": 2.7199943518780003, + "grad_norm": 3.5544207112155273, + "learning_rate": 4.534544225562876e-07, + "loss": 0.1381, + "step": 19263 + }, + { + "epoch": 2.720135554927986, + "grad_norm": 3.3522076215146823, + "learning_rate": 4.5300067981203346e-07, + "loss": 0.1393, + "step": 19264 + }, + { + "epoch": 2.720276757977972, + "grad_norm": 3.380881757760855, + "learning_rate": 4.525471589359198e-07, + "loss": 0.1576, + "step": 19265 + }, + { + "epoch": 2.720417961027958, + "grad_norm": 4.115792652030727, + "learning_rate": 4.520938599384872e-07, + "loss": 0.1875, + "step": 19266 + }, + { + "epoch": 2.720559164077944, + "grad_norm": 3.3593118143094904, + "learning_rate": 4.5164078283026934e-07, + "loss": 0.171, + "step": 19267 + }, + { + "epoch": 2.7207003671279297, + "grad_norm": 3.172074869477012, + "learning_rate": 4.511879276217967e-07, + "loss": 0.134, + "step": 19268 + }, + { + "epoch": 2.7208415701779156, + "grad_norm": 4.066777026516414, + "learning_rate": 4.507352943235921e-07, + "loss": 0.1759, + "step": 19269 + }, + { + "epoch": 2.7209827732279015, + "grad_norm": 2.871895412107255, + "learning_rate": 4.5028288294617583e-07, + "loss": 0.1277, + "step": 19270 + }, + { + "epoch": 2.7211239762778874, + "grad_norm": 3.4806963090116976, + "learning_rate": 4.498306935000607e-07, + "loss": 0.1341, + "step": 19271 + }, + { + "epoch": 2.7212651793278733, + "grad_norm": 3.0558044302368876, + "learning_rate": 4.4937872599575605e-07, + "loss": 0.15, + "step": 19272 + }, + { + "epoch": 2.721406382377859, + "grad_norm": 3.098678587524678, + "learning_rate": 4.4892698044376346e-07, + "loss": 0.1285, + "step": 19273 + }, + { + "epoch": 2.721547585427845, + "grad_norm": 2.6102018645896776, + "learning_rate": 4.484754568545857e-07, + "loss": 0.108, + "step": 19274 + }, + { + "epoch": 2.721688788477831, + "grad_norm": 2.799940361902545, + "learning_rate": 4.4802415523871214e-07, + "loss": 0.0977, + "step": 19275 + }, + { + "epoch": 2.721829991527817, + "grad_norm": 3.3602300957065205, + "learning_rate": 4.475730756066332e-07, + "loss": 0.1657, + "step": 19276 + }, + { + "epoch": 2.7219711945778027, + "grad_norm": 3.3926056319262976, + "learning_rate": 4.471222179688306e-07, + "loss": 0.1387, + "step": 19277 + }, + { + "epoch": 2.7221123976277886, + "grad_norm": 3.6564454765361782, + "learning_rate": 4.4667158233577925e-07, + "loss": 0.1393, + "step": 19278 + }, + { + "epoch": 2.7222536006777744, + "grad_norm": 2.3322279498954415, + "learning_rate": 4.462211687179574e-07, + "loss": 0.0912, + "step": 19279 + }, + { + "epoch": 2.7223948037277603, + "grad_norm": 3.3057067655498678, + "learning_rate": 4.4577097712582897e-07, + "loss": 0.1446, + "step": 19280 + }, + { + "epoch": 2.722536006777746, + "grad_norm": 2.9585724052675757, + "learning_rate": 4.4532100756985663e-07, + "loss": 0.1262, + "step": 19281 + }, + { + "epoch": 2.722677209827732, + "grad_norm": 3.168021595235617, + "learning_rate": 4.4487126006049764e-07, + "loss": 0.158, + "step": 19282 + }, + { + "epoch": 2.722818412877718, + "grad_norm": 2.474409723978474, + "learning_rate": 4.444217346082036e-07, + "loss": 0.1123, + "step": 19283 + }, + { + "epoch": 2.722959615927704, + "grad_norm": 2.5815016459588587, + "learning_rate": 4.4397243122342284e-07, + "loss": 0.1106, + "step": 19284 + }, + { + "epoch": 2.7231008189776897, + "grad_norm": 3.396116449044327, + "learning_rate": 4.4352334991659475e-07, + "loss": 0.1824, + "step": 19285 + }, + { + "epoch": 2.7232420220276756, + "grad_norm": 3.3599132158699705, + "learning_rate": 4.430744906981577e-07, + "loss": 0.1281, + "step": 19286 + }, + { + "epoch": 2.7233832250776615, + "grad_norm": 4.247307123070073, + "learning_rate": 4.4262585357854217e-07, + "loss": 0.1675, + "step": 19287 + }, + { + "epoch": 2.7235244281276474, + "grad_norm": 4.267281582293608, + "learning_rate": 4.421774385681743e-07, + "loss": 0.1749, + "step": 19288 + }, + { + "epoch": 2.7236656311776333, + "grad_norm": 3.0014287337737167, + "learning_rate": 4.4172924567747467e-07, + "loss": 0.1436, + "step": 19289 + }, + { + "epoch": 2.723806834227619, + "grad_norm": 3.2406929888348213, + "learning_rate": 4.412812749168582e-07, + "loss": 0.1443, + "step": 19290 + }, + { + "epoch": 2.723948037277605, + "grad_norm": 2.710843115511303, + "learning_rate": 4.408335262967378e-07, + "loss": 0.1433, + "step": 19291 + }, + { + "epoch": 2.724089240327591, + "grad_norm": 3.665094879720396, + "learning_rate": 4.403859998275184e-07, + "loss": 0.1598, + "step": 19292 + }, + { + "epoch": 2.724230443377577, + "grad_norm": 3.771711170906613, + "learning_rate": 4.3993869551960165e-07, + "loss": 0.1741, + "step": 19293 + }, + { + "epoch": 2.7243716464275627, + "grad_norm": 3.433241342825456, + "learning_rate": 4.394916133833782e-07, + "loss": 0.1506, + "step": 19294 + }, + { + "epoch": 2.7245128494775486, + "grad_norm": 3.4737072051924796, + "learning_rate": 4.390447534292419e-07, + "loss": 0.1416, + "step": 19295 + }, + { + "epoch": 2.7246540525275345, + "grad_norm": 2.840175954508181, + "learning_rate": 4.385981156675756e-07, + "loss": 0.1271, + "step": 19296 + }, + { + "epoch": 2.7247952555775203, + "grad_norm": 3.7790893162369024, + "learning_rate": 4.3815170010875984e-07, + "loss": 0.1879, + "step": 19297 + }, + { + "epoch": 2.724936458627506, + "grad_norm": 2.7096361549091283, + "learning_rate": 4.377055067631697e-07, + "loss": 0.1097, + "step": 19298 + }, + { + "epoch": 2.725077661677492, + "grad_norm": 2.87399765431248, + "learning_rate": 4.372595356411746e-07, + "loss": 0.0994, + "step": 19299 + }, + { + "epoch": 2.725218864727478, + "grad_norm": 3.226149349046855, + "learning_rate": 4.3681378675313747e-07, + "loss": 0.1314, + "step": 19300 + }, + { + "epoch": 2.725360067777464, + "grad_norm": 3.175470841121101, + "learning_rate": 4.363682601094177e-07, + "loss": 0.1365, + "step": 19301 + }, + { + "epoch": 2.7255012708274498, + "grad_norm": 2.944246816245402, + "learning_rate": 4.3592295572037037e-07, + "loss": 0.1201, + "step": 19302 + }, + { + "epoch": 2.7256424738774356, + "grad_norm": 2.9145409899455497, + "learning_rate": 4.3547787359634163e-07, + "loss": 0.1297, + "step": 19303 + }, + { + "epoch": 2.7257836769274215, + "grad_norm": 2.4233049570135043, + "learning_rate": 4.350330137476777e-07, + "loss": 0.0853, + "step": 19304 + }, + { + "epoch": 2.7259248799774074, + "grad_norm": 2.9551454863413515, + "learning_rate": 4.345883761847147e-07, + "loss": 0.1202, + "step": 19305 + }, + { + "epoch": 2.7260660830273933, + "grad_norm": 3.214487631843572, + "learning_rate": 4.3414396091778774e-07, + "loss": 0.121, + "step": 19306 + }, + { + "epoch": 2.726207286077379, + "grad_norm": 2.9205039097159826, + "learning_rate": 4.336997679572241e-07, + "loss": 0.13, + "step": 19307 + }, + { + "epoch": 2.726348489127365, + "grad_norm": 3.3872230361092215, + "learning_rate": 4.3325579731334444e-07, + "loss": 0.1646, + "step": 19308 + }, + { + "epoch": 2.726489692177351, + "grad_norm": 4.332493821514747, + "learning_rate": 4.3281204899647046e-07, + "loss": 0.2077, + "step": 19309 + }, + { + "epoch": 2.726630895227337, + "grad_norm": 3.6919561303104738, + "learning_rate": 4.323685230169128e-07, + "loss": 0.1819, + "step": 19310 + }, + { + "epoch": 2.7267720982773227, + "grad_norm": 2.9150266514042107, + "learning_rate": 4.319252193849788e-07, + "loss": 0.1251, + "step": 19311 + }, + { + "epoch": 2.7269133013273086, + "grad_norm": 3.4605387714812252, + "learning_rate": 4.314821381109702e-07, + "loss": 0.134, + "step": 19312 + }, + { + "epoch": 2.7270545043772945, + "grad_norm": 3.389098905854682, + "learning_rate": 4.310392792051832e-07, + "loss": 0.1197, + "step": 19313 + }, + { + "epoch": 2.7271957074272803, + "grad_norm": 3.603679730509811, + "learning_rate": 4.305966426779118e-07, + "loss": 0.1377, + "step": 19314 + }, + { + "epoch": 2.7273369104772662, + "grad_norm": 3.1507424255957113, + "learning_rate": 4.301542285394411e-07, + "loss": 0.1261, + "step": 19315 + }, + { + "epoch": 2.727478113527252, + "grad_norm": 3.0627372124840804, + "learning_rate": 4.2971203680005404e-07, + "loss": 0.1513, + "step": 19316 + }, + { + "epoch": 2.727619316577238, + "grad_norm": 2.8015209895052613, + "learning_rate": 4.2927006747002563e-07, + "loss": 0.1401, + "step": 19317 + }, + { + "epoch": 2.727760519627224, + "grad_norm": 2.747523376300121, + "learning_rate": 4.2882832055962885e-07, + "loss": 0.1105, + "step": 19318 + }, + { + "epoch": 2.7279017226772098, + "grad_norm": 2.837397829415899, + "learning_rate": 4.283867960791277e-07, + "loss": 0.1159, + "step": 19319 + }, + { + "epoch": 2.7280429257271956, + "grad_norm": 3.2545690514459653, + "learning_rate": 4.279454940387828e-07, + "loss": 0.1563, + "step": 19320 + }, + { + "epoch": 2.7281841287771815, + "grad_norm": 3.102814449289982, + "learning_rate": 4.275044144488516e-07, + "loss": 0.1289, + "step": 19321 + }, + { + "epoch": 2.7283253318271674, + "grad_norm": 3.5554220455615546, + "learning_rate": 4.270635573195836e-07, + "loss": 0.1719, + "step": 19322 + }, + { + "epoch": 2.7284665348771533, + "grad_norm": 3.3861074933420414, + "learning_rate": 4.2662292266122505e-07, + "loss": 0.1649, + "step": 19323 + }, + { + "epoch": 2.728607737927139, + "grad_norm": 3.8291550121846116, + "learning_rate": 4.261825104840145e-07, + "loss": 0.1459, + "step": 19324 + }, + { + "epoch": 2.728748940977125, + "grad_norm": 3.009529640473334, + "learning_rate": 4.25742320798187e-07, + "loss": 0.1487, + "step": 19325 + }, + { + "epoch": 2.728890144027111, + "grad_norm": 3.4944575019006185, + "learning_rate": 4.253023536139733e-07, + "loss": 0.1522, + "step": 19326 + }, + { + "epoch": 2.729031347077097, + "grad_norm": 3.0920606236633743, + "learning_rate": 4.2486260894160083e-07, + "loss": 0.1201, + "step": 19327 + }, + { + "epoch": 2.7291725501270827, + "grad_norm": 2.479350469386191, + "learning_rate": 4.244230867912835e-07, + "loss": 0.1209, + "step": 19328 + }, + { + "epoch": 2.7293137531770686, + "grad_norm": 2.648647626174326, + "learning_rate": 4.2398378717323887e-07, + "loss": 0.11, + "step": 19329 + }, + { + "epoch": 2.7294549562270545, + "grad_norm": 3.516232143322579, + "learning_rate": 4.2354471009767415e-07, + "loss": 0.1359, + "step": 19330 + }, + { + "epoch": 2.7295961592770404, + "grad_norm": 2.9186052773527686, + "learning_rate": 4.231058555747958e-07, + "loss": 0.1257, + "step": 19331 + }, + { + "epoch": 2.7297373623270262, + "grad_norm": 3.194427760395779, + "learning_rate": 4.226672236148022e-07, + "loss": 0.1342, + "step": 19332 + }, + { + "epoch": 2.729878565377012, + "grad_norm": 3.777446516699935, + "learning_rate": 4.222288142278852e-07, + "loss": 0.1674, + "step": 19333 + }, + { + "epoch": 2.730019768426998, + "grad_norm": 2.880262445015603, + "learning_rate": 4.217906274242345e-07, + "loss": 0.1024, + "step": 19334 + }, + { + "epoch": 2.730160971476984, + "grad_norm": 2.5484760674390072, + "learning_rate": 4.21352663214033e-07, + "loss": 0.1225, + "step": 19335 + }, + { + "epoch": 2.7303021745269698, + "grad_norm": 3.581657664675056, + "learning_rate": 4.209149216074593e-07, + "loss": 0.182, + "step": 19336 + }, + { + "epoch": 2.7304433775769557, + "grad_norm": 3.1109538614465535, + "learning_rate": 4.2047740261468516e-07, + "loss": 0.1497, + "step": 19337 + }, + { + "epoch": 2.7305845806269415, + "grad_norm": 4.585157840500381, + "learning_rate": 4.2004010624588033e-07, + "loss": 0.2139, + "step": 19338 + }, + { + "epoch": 2.7307257836769274, + "grad_norm": 4.17259703919618, + "learning_rate": 4.1960303251120547e-07, + "loss": 0.187, + "step": 19339 + }, + { + "epoch": 2.7308669867269133, + "grad_norm": 2.6313559528959805, + "learning_rate": 4.191661814208181e-07, + "loss": 0.1444, + "step": 19340 + }, + { + "epoch": 2.731008189776899, + "grad_norm": 3.490652132438848, + "learning_rate": 4.1872955298487227e-07, + "loss": 0.1327, + "step": 19341 + }, + { + "epoch": 2.731149392826885, + "grad_norm": 2.65701000641957, + "learning_rate": 4.1829314721351213e-07, + "loss": 0.1148, + "step": 19342 + }, + { + "epoch": 2.731290595876871, + "grad_norm": 2.9978406139678286, + "learning_rate": 4.178569641168817e-07, + "loss": 0.1286, + "step": 19343 + }, + { + "epoch": 2.731431798926857, + "grad_norm": 2.594870044023713, + "learning_rate": 4.1742100370511853e-07, + "loss": 0.1157, + "step": 19344 + }, + { + "epoch": 2.7315730019768427, + "grad_norm": 3.1509020715851386, + "learning_rate": 4.169852659883522e-07, + "loss": 0.1658, + "step": 19345 + }, + { + "epoch": 2.7317142050268286, + "grad_norm": 3.4093079628565404, + "learning_rate": 4.1654975097671025e-07, + "loss": 0.141, + "step": 19346 + }, + { + "epoch": 2.7318554080768145, + "grad_norm": 3.5720588166000202, + "learning_rate": 4.161144586803112e-07, + "loss": 0.1221, + "step": 19347 + }, + { + "epoch": 2.7319966111268004, + "grad_norm": 2.599719986247328, + "learning_rate": 4.1567938910927475e-07, + "loss": 0.114, + "step": 19348 + }, + { + "epoch": 2.7321378141767863, + "grad_norm": 3.0804425133731588, + "learning_rate": 4.1524454227370945e-07, + "loss": 0.1482, + "step": 19349 + }, + { + "epoch": 2.732279017226772, + "grad_norm": 2.8618081328075857, + "learning_rate": 4.1480991818372284e-07, + "loss": 0.1269, + "step": 19350 + }, + { + "epoch": 2.732420220276758, + "grad_norm": 3.4214907498618903, + "learning_rate": 4.1437551684941345e-07, + "loss": 0.1462, + "step": 19351 + }, + { + "epoch": 2.732561423326744, + "grad_norm": 3.068140104805384, + "learning_rate": 4.1394133828087654e-07, + "loss": 0.1681, + "step": 19352 + }, + { + "epoch": 2.73270262637673, + "grad_norm": 2.758852363440679, + "learning_rate": 4.135073824882041e-07, + "loss": 0.1233, + "step": 19353 + }, + { + "epoch": 2.7328438294267157, + "grad_norm": 2.3448765553633613, + "learning_rate": 4.130736494814802e-07, + "loss": 0.1229, + "step": 19354 + }, + { + "epoch": 2.7329850324767015, + "grad_norm": 3.7593709314048853, + "learning_rate": 4.126401392707835e-07, + "loss": 0.1788, + "step": 19355 + }, + { + "epoch": 2.7331262355266874, + "grad_norm": 2.6450518509268495, + "learning_rate": 4.1220685186619037e-07, + "loss": 0.0996, + "step": 19356 + }, + { + "epoch": 2.7332674385766733, + "grad_norm": 2.6264390069004855, + "learning_rate": 4.117737872777694e-07, + "loss": 0.1024, + "step": 19357 + }, + { + "epoch": 2.733408641626659, + "grad_norm": 2.924053295572232, + "learning_rate": 4.113409455155837e-07, + "loss": 0.1343, + "step": 19358 + }, + { + "epoch": 2.733549844676645, + "grad_norm": 3.3634477029364294, + "learning_rate": 4.1090832658969294e-07, + "loss": 0.1781, + "step": 19359 + }, + { + "epoch": 2.733691047726631, + "grad_norm": 2.7239417193093525, + "learning_rate": 4.1047593051015245e-07, + "loss": 0.1232, + "step": 19360 + }, + { + "epoch": 2.733832250776617, + "grad_norm": 2.7267377539482496, + "learning_rate": 4.1004375728701193e-07, + "loss": 0.131, + "step": 19361 + }, + { + "epoch": 2.7339734538266027, + "grad_norm": 2.883985074495877, + "learning_rate": 4.0961180693031123e-07, + "loss": 0.1093, + "step": 19362 + }, + { + "epoch": 2.7341146568765886, + "grad_norm": 2.7811100587525375, + "learning_rate": 4.0918007945009e-07, + "loss": 0.1063, + "step": 19363 + }, + { + "epoch": 2.7342558599265745, + "grad_norm": 3.6461206304841385, + "learning_rate": 4.087485748563813e-07, + "loss": 0.1469, + "step": 19364 + }, + { + "epoch": 2.7343970629765604, + "grad_norm": 3.4665865630322137, + "learning_rate": 4.083172931592139e-07, + "loss": 0.1343, + "step": 19365 + }, + { + "epoch": 2.7345382660265463, + "grad_norm": 3.1956189671207618, + "learning_rate": 4.0788623436861077e-07, + "loss": 0.092, + "step": 19366 + }, + { + "epoch": 2.734679469076532, + "grad_norm": 2.742902940935254, + "learning_rate": 4.0745539849458837e-07, + "loss": 0.1365, + "step": 19367 + }, + { + "epoch": 2.734820672126518, + "grad_norm": 3.1799613548796803, + "learning_rate": 4.0702478554716094e-07, + "loss": 0.1559, + "step": 19368 + }, + { + "epoch": 2.734961875176504, + "grad_norm": 3.2288130230216154, + "learning_rate": 4.0659439553633385e-07, + "loss": 0.1803, + "step": 19369 + }, + { + "epoch": 2.73510307822649, + "grad_norm": 2.8675436998819084, + "learning_rate": 4.0616422847211013e-07, + "loss": 0.1499, + "step": 19370 + }, + { + "epoch": 2.7352442812764757, + "grad_norm": 3.9430708514995834, + "learning_rate": 4.0573428436448627e-07, + "loss": 0.1791, + "step": 19371 + }, + { + "epoch": 2.7353854843264616, + "grad_norm": 2.4161780812475175, + "learning_rate": 4.053045632234542e-07, + "loss": 0.103, + "step": 19372 + }, + { + "epoch": 2.7355266873764474, + "grad_norm": 3.5211675489720826, + "learning_rate": 4.0487506505900056e-07, + "loss": 0.1653, + "step": 19373 + }, + { + "epoch": 2.7356678904264333, + "grad_norm": 2.911435394894441, + "learning_rate": 4.0444578988110715e-07, + "loss": 0.1337, + "step": 19374 + }, + { + "epoch": 2.735809093476419, + "grad_norm": 2.73153175142303, + "learning_rate": 4.040167376997484e-07, + "loss": 0.126, + "step": 19375 + }, + { + "epoch": 2.735950296526405, + "grad_norm": 2.491834674179084, + "learning_rate": 4.0358790852489616e-07, + "loss": 0.1039, + "step": 19376 + }, + { + "epoch": 2.736091499576391, + "grad_norm": 2.5827246369661054, + "learning_rate": 4.031593023665181e-07, + "loss": 0.1135, + "step": 19377 + }, + { + "epoch": 2.736232702626377, + "grad_norm": 2.79673933152445, + "learning_rate": 4.0273091923457297e-07, + "loss": 0.1125, + "step": 19378 + }, + { + "epoch": 2.7363739056763627, + "grad_norm": 3.1183923679206127, + "learning_rate": 4.0230275913901716e-07, + "loss": 0.1412, + "step": 19379 + }, + { + "epoch": 2.7365151087263486, + "grad_norm": 2.971780911422613, + "learning_rate": 4.018748220897994e-07, + "loss": 0.1274, + "step": 19380 + }, + { + "epoch": 2.7366563117763345, + "grad_norm": 2.471778481445548, + "learning_rate": 4.0144710809686407e-07, + "loss": 0.1156, + "step": 19381 + }, + { + "epoch": 2.7367975148263204, + "grad_norm": 2.9387206315714867, + "learning_rate": 4.0101961717015416e-07, + "loss": 0.1357, + "step": 19382 + }, + { + "epoch": 2.7369387178763063, + "grad_norm": 3.149247195834162, + "learning_rate": 4.005923493196029e-07, + "loss": 0.1125, + "step": 19383 + }, + { + "epoch": 2.737079920926292, + "grad_norm": 3.738936492749957, + "learning_rate": 4.0016530455514013e-07, + "loss": 0.1689, + "step": 19384 + }, + { + "epoch": 2.737221123976278, + "grad_norm": 2.9328198058316626, + "learning_rate": 3.9973848288669013e-07, + "loss": 0.1073, + "step": 19385 + }, + { + "epoch": 2.737362327026264, + "grad_norm": 3.9041212711779756, + "learning_rate": 3.9931188432417057e-07, + "loss": 0.1789, + "step": 19386 + }, + { + "epoch": 2.73750353007625, + "grad_norm": 2.8744570015529183, + "learning_rate": 3.9888550887749787e-07, + "loss": 0.1294, + "step": 19387 + }, + { + "epoch": 2.7376447331262357, + "grad_norm": 2.9034007361933285, + "learning_rate": 3.9845935655657866e-07, + "loss": 0.1382, + "step": 19388 + }, + { + "epoch": 2.7377859361762216, + "grad_norm": 3.516622550036228, + "learning_rate": 3.9803342737131713e-07, + "loss": 0.1554, + "step": 19389 + }, + { + "epoch": 2.7379271392262075, + "grad_norm": 3.250628724976959, + "learning_rate": 3.976077213316132e-07, + "loss": 0.1469, + "step": 19390 + }, + { + "epoch": 2.7380683422761933, + "grad_norm": 3.6764501877211004, + "learning_rate": 3.9718223844735784e-07, + "loss": 0.1599, + "step": 19391 + }, + { + "epoch": 2.738209545326179, + "grad_norm": 2.70512956443334, + "learning_rate": 3.967569787284409e-07, + "loss": 0.1206, + "step": 19392 + }, + { + "epoch": 2.738350748376165, + "grad_norm": 2.9530516783214082, + "learning_rate": 3.9633194218474223e-07, + "loss": 0.1389, + "step": 19393 + }, + { + "epoch": 2.738491951426151, + "grad_norm": 2.9914672192022103, + "learning_rate": 3.95907128826144e-07, + "loss": 0.129, + "step": 19394 + }, + { + "epoch": 2.738633154476137, + "grad_norm": 3.5164886087025784, + "learning_rate": 3.954825386625172e-07, + "loss": 0.1412, + "step": 19395 + }, + { + "epoch": 2.7387743575261227, + "grad_norm": 2.97364672961565, + "learning_rate": 3.9505817170372606e-07, + "loss": 0.128, + "step": 19396 + }, + { + "epoch": 2.7389155605761086, + "grad_norm": 2.709447080355022, + "learning_rate": 3.946340279596361e-07, + "loss": 0.1376, + "step": 19397 + }, + { + "epoch": 2.7390567636260945, + "grad_norm": 3.2302855594915827, + "learning_rate": 3.942101074401028e-07, + "loss": 0.1695, + "step": 19398 + }, + { + "epoch": 2.7391979666760804, + "grad_norm": 2.744602159476807, + "learning_rate": 3.937864101549771e-07, + "loss": 0.1248, + "step": 19399 + }, + { + "epoch": 2.7393391697260663, + "grad_norm": 3.670593089537176, + "learning_rate": 3.933629361141078e-07, + "loss": 0.164, + "step": 19400 + }, + { + "epoch": 2.739480372776052, + "grad_norm": 3.212028759504199, + "learning_rate": 3.9293968532733593e-07, + "loss": 0.1495, + "step": 19401 + }, + { + "epoch": 2.739621575826038, + "grad_norm": 3.242219052507598, + "learning_rate": 3.9251665780449587e-07, + "loss": 0.1158, + "step": 19402 + }, + { + "epoch": 2.739762778876024, + "grad_norm": 4.0302874181253605, + "learning_rate": 3.9209385355542085e-07, + "loss": 0.1733, + "step": 19403 + }, + { + "epoch": 2.73990398192601, + "grad_norm": 3.5100756351832185, + "learning_rate": 3.91671272589933e-07, + "loss": 0.1671, + "step": 19404 + }, + { + "epoch": 2.7400451849759957, + "grad_norm": 2.9424599405535585, + "learning_rate": 3.9124891491785553e-07, + "loss": 0.1207, + "step": 19405 + }, + { + "epoch": 2.7401863880259816, + "grad_norm": 3.3752386007416653, + "learning_rate": 3.908267805490051e-07, + "loss": 0.1578, + "step": 19406 + }, + { + "epoch": 2.7403275910759675, + "grad_norm": 2.7017307623402584, + "learning_rate": 3.9040486949318947e-07, + "loss": 0.1297, + "step": 19407 + }, + { + "epoch": 2.7404687941259533, + "grad_norm": 4.263710886839664, + "learning_rate": 3.899831817602151e-07, + "loss": 0.1759, + "step": 19408 + }, + { + "epoch": 2.7406099971759392, + "grad_norm": 3.1975080532405484, + "learning_rate": 3.895617173598809e-07, + "loss": 0.14, + "step": 19409 + }, + { + "epoch": 2.740751200225925, + "grad_norm": 2.8384344711283482, + "learning_rate": 3.8914047630198237e-07, + "loss": 0.1133, + "step": 19410 + }, + { + "epoch": 2.740892403275911, + "grad_norm": 2.9840727520497117, + "learning_rate": 3.887194585963072e-07, + "loss": 0.1398, + "step": 19411 + }, + { + "epoch": 2.741033606325897, + "grad_norm": 3.337745041856669, + "learning_rate": 3.8829866425264317e-07, + "loss": 0.1479, + "step": 19412 + }, + { + "epoch": 2.7411748093758828, + "grad_norm": 2.662397719755059, + "learning_rate": 3.8787809328076577e-07, + "loss": 0.1166, + "step": 19413 + }, + { + "epoch": 2.7413160124258686, + "grad_norm": 2.6877431853993152, + "learning_rate": 3.874577456904516e-07, + "loss": 0.1339, + "step": 19414 + }, + { + "epoch": 2.7414572154758545, + "grad_norm": 2.9378422465770364, + "learning_rate": 3.8703762149146726e-07, + "loss": 0.1536, + "step": 19415 + }, + { + "epoch": 2.7415984185258404, + "grad_norm": 2.2068814172022773, + "learning_rate": 3.866177206935751e-07, + "loss": 0.1176, + "step": 19416 + }, + { + "epoch": 2.7417396215758263, + "grad_norm": 2.488206317054543, + "learning_rate": 3.861980433065382e-07, + "loss": 0.1173, + "step": 19417 + }, + { + "epoch": 2.741880824625812, + "grad_norm": 3.2191962785578387, + "learning_rate": 3.857785893401056e-07, + "loss": 0.1712, + "step": 19418 + }, + { + "epoch": 2.742022027675798, + "grad_norm": 2.598995996628854, + "learning_rate": 3.853593588040272e-07, + "loss": 0.1227, + "step": 19419 + }, + { + "epoch": 2.742163230725784, + "grad_norm": 2.886007676417886, + "learning_rate": 3.849403517080452e-07, + "loss": 0.1192, + "step": 19420 + }, + { + "epoch": 2.74230443377577, + "grad_norm": 2.924489853099131, + "learning_rate": 3.845215680618963e-07, + "loss": 0.1392, + "step": 19421 + }, + { + "epoch": 2.7424456368257557, + "grad_norm": 3.42616815884423, + "learning_rate": 3.8410300787531385e-07, + "loss": 0.143, + "step": 19422 + }, + { + "epoch": 2.7425868398757416, + "grad_norm": 3.0062191823661597, + "learning_rate": 3.8368467115802443e-07, + "loss": 0.1511, + "step": 19423 + }, + { + "epoch": 2.7427280429257275, + "grad_norm": 2.8390465990287512, + "learning_rate": 3.832665579197503e-07, + "loss": 0.1293, + "step": 19424 + }, + { + "epoch": 2.742869245975713, + "grad_norm": 4.045965126548105, + "learning_rate": 3.8284866817020926e-07, + "loss": 0.1978, + "step": 19425 + }, + { + "epoch": 2.743010449025699, + "grad_norm": 3.120000995160894, + "learning_rate": 3.824310019191102e-07, + "loss": 0.1265, + "step": 19426 + }, + { + "epoch": 2.7431516520756847, + "grad_norm": 2.4897109124418124, + "learning_rate": 3.820135591761631e-07, + "loss": 0.1252, + "step": 19427 + }, + { + "epoch": 2.7432928551256706, + "grad_norm": 3.6964666806220587, + "learning_rate": 3.815963399510647e-07, + "loss": 0.1425, + "step": 19428 + }, + { + "epoch": 2.7434340581756564, + "grad_norm": 3.6532784574715698, + "learning_rate": 3.811793442535161e-07, + "loss": 0.1362, + "step": 19429 + }, + { + "epoch": 2.7435752612256423, + "grad_norm": 3.6704568180668344, + "learning_rate": 3.80762572093204e-07, + "loss": 0.1586, + "step": 19430 + }, + { + "epoch": 2.743716464275628, + "grad_norm": 3.8388677072589186, + "learning_rate": 3.8034602347981617e-07, + "loss": 0.1854, + "step": 19431 + }, + { + "epoch": 2.743857667325614, + "grad_norm": 3.917507084938562, + "learning_rate": 3.799296984230316e-07, + "loss": 0.2051, + "step": 19432 + }, + { + "epoch": 2.7439988703756, + "grad_norm": 2.9154865325347723, + "learning_rate": 3.795135969325259e-07, + "loss": 0.1302, + "step": 19433 + }, + { + "epoch": 2.744140073425586, + "grad_norm": 3.7584214303499097, + "learning_rate": 3.790977190179701e-07, + "loss": 0.1766, + "step": 19434 + }, + { + "epoch": 2.7442812764755717, + "grad_norm": 3.4275963188872582, + "learning_rate": 3.786820646890277e-07, + "loss": 0.151, + "step": 19435 + }, + { + "epoch": 2.7444224795255576, + "grad_norm": 3.0981239983200117, + "learning_rate": 3.782666339553598e-07, + "loss": 0.1372, + "step": 19436 + }, + { + "epoch": 2.7445636825755435, + "grad_norm": 3.857578307231529, + "learning_rate": 3.77851426826622e-07, + "loss": 0.1863, + "step": 19437 + }, + { + "epoch": 2.7447048856255294, + "grad_norm": 3.009554324179775, + "learning_rate": 3.774364433124578e-07, + "loss": 0.1221, + "step": 19438 + }, + { + "epoch": 2.7448460886755153, + "grad_norm": 3.106882242432552, + "learning_rate": 3.770216834225171e-07, + "loss": 0.1751, + "step": 19439 + }, + { + "epoch": 2.744987291725501, + "grad_norm": 3.477318813807565, + "learning_rate": 3.7660714716643563e-07, + "loss": 0.1908, + "step": 19440 + }, + { + "epoch": 2.745128494775487, + "grad_norm": 3.072981058971069, + "learning_rate": 3.7619283455384906e-07, + "loss": 0.1047, + "step": 19441 + }, + { + "epoch": 2.745269697825473, + "grad_norm": 2.485825386775933, + "learning_rate": 3.75778745594384e-07, + "loss": 0.1001, + "step": 19442 + }, + { + "epoch": 2.745410900875459, + "grad_norm": 2.956754761247289, + "learning_rate": 3.75364880297665e-07, + "loss": 0.171, + "step": 19443 + }, + { + "epoch": 2.7455521039254447, + "grad_norm": 2.9721854454369407, + "learning_rate": 3.749512386733101e-07, + "loss": 0.1226, + "step": 19444 + }, + { + "epoch": 2.7456933069754306, + "grad_norm": 3.5239793879798658, + "learning_rate": 3.7453782073092913e-07, + "loss": 0.1917, + "step": 19445 + }, + { + "epoch": 2.7458345100254165, + "grad_norm": 3.501797700249198, + "learning_rate": 3.741246264801357e-07, + "loss": 0.165, + "step": 19446 + }, + { + "epoch": 2.7459757130754023, + "grad_norm": 2.3099870501951876, + "learning_rate": 3.7371165593052763e-07, + "loss": 0.1154, + "step": 19447 + }, + { + "epoch": 2.746116916125388, + "grad_norm": 3.062139647553599, + "learning_rate": 3.7329890909170275e-07, + "loss": 0.1511, + "step": 19448 + }, + { + "epoch": 2.746258119175374, + "grad_norm": 3.1010938582881065, + "learning_rate": 3.7288638597325453e-07, + "loss": 0.1126, + "step": 19449 + }, + { + "epoch": 2.74639932222536, + "grad_norm": 3.0118743760394624, + "learning_rate": 3.7247408658476756e-07, + "loss": 0.1328, + "step": 19450 + }, + { + "epoch": 2.746540525275346, + "grad_norm": 3.39892900080272, + "learning_rate": 3.720620109358264e-07, + "loss": 0.1831, + "step": 19451 + }, + { + "epoch": 2.7466817283253318, + "grad_norm": 3.0066353683450004, + "learning_rate": 3.7165015903600553e-07, + "loss": 0.1489, + "step": 19452 + }, + { + "epoch": 2.7468229313753176, + "grad_norm": 2.9577942251706615, + "learning_rate": 3.712385308948774e-07, + "loss": 0.1573, + "step": 19453 + }, + { + "epoch": 2.7469641344253035, + "grad_norm": 3.6816915819315637, + "learning_rate": 3.708271265220087e-07, + "loss": 0.1559, + "step": 19454 + }, + { + "epoch": 2.7471053374752894, + "grad_norm": 3.1628729620234908, + "learning_rate": 3.704159459269563e-07, + "loss": 0.1234, + "step": 19455 + }, + { + "epoch": 2.7472465405252753, + "grad_norm": 2.6313836780905566, + "learning_rate": 3.700049891192792e-07, + "loss": 0.1174, + "step": 19456 + }, + { + "epoch": 2.747387743575261, + "grad_norm": 2.708731714717972, + "learning_rate": 3.6959425610852863e-07, + "loss": 0.1281, + "step": 19457 + }, + { + "epoch": 2.747528946625247, + "grad_norm": 3.0318736090976577, + "learning_rate": 3.691837469042481e-07, + "loss": 0.1686, + "step": 19458 + }, + { + "epoch": 2.747670149675233, + "grad_norm": 3.324056212194871, + "learning_rate": 3.687734615159777e-07, + "loss": 0.1455, + "step": 19459 + }, + { + "epoch": 2.747811352725219, + "grad_norm": 3.5998944915051965, + "learning_rate": 3.683633999532521e-07, + "loss": 0.1762, + "step": 19460 + }, + { + "epoch": 2.7479525557752047, + "grad_norm": 2.7722290465139916, + "learning_rate": 3.6795356222560253e-07, + "loss": 0.1364, + "step": 19461 + }, + { + "epoch": 2.7480937588251906, + "grad_norm": 3.240409929325466, + "learning_rate": 3.6754394834255023e-07, + "loss": 0.1445, + "step": 19462 + }, + { + "epoch": 2.7482349618751765, + "grad_norm": 4.319019390235662, + "learning_rate": 3.671345583136199e-07, + "loss": 0.148, + "step": 19463 + }, + { + "epoch": 2.7483761649251623, + "grad_norm": 4.184632006510233, + "learning_rate": 3.6672539214832157e-07, + "loss": 0.1487, + "step": 19464 + }, + { + "epoch": 2.7485173679751482, + "grad_norm": 3.9387228059929296, + "learning_rate": 3.663164498561633e-07, + "loss": 0.192, + "step": 19465 + }, + { + "epoch": 2.748658571025134, + "grad_norm": 2.6941572825967355, + "learning_rate": 3.659077314466519e-07, + "loss": 0.1293, + "step": 19466 + }, + { + "epoch": 2.74879977407512, + "grad_norm": 2.9813998918161486, + "learning_rate": 3.6549923692928204e-07, + "loss": 0.1482, + "step": 19467 + }, + { + "epoch": 2.748940977125106, + "grad_norm": 3.9736551423057156, + "learning_rate": 3.650909663135505e-07, + "loss": 0.1798, + "step": 19468 + }, + { + "epoch": 2.7490821801750918, + "grad_norm": 2.899678491572323, + "learning_rate": 3.6468291960894406e-07, + "loss": 0.1428, + "step": 19469 + }, + { + "epoch": 2.7492233832250776, + "grad_norm": 4.262922421669515, + "learning_rate": 3.642750968249442e-07, + "loss": 0.1526, + "step": 19470 + }, + { + "epoch": 2.7493645862750635, + "grad_norm": 3.0888381922771626, + "learning_rate": 3.638674979710322e-07, + "loss": 0.1433, + "step": 19471 + }, + { + "epoch": 2.7495057893250494, + "grad_norm": 2.7460686722026804, + "learning_rate": 3.63460123056677e-07, + "loss": 0.1074, + "step": 19472 + }, + { + "epoch": 2.7496469923750353, + "grad_norm": 2.8510387615597903, + "learning_rate": 3.630529720913445e-07, + "loss": 0.1317, + "step": 19473 + }, + { + "epoch": 2.749788195425021, + "grad_norm": 3.223684142200759, + "learning_rate": 3.626460450845015e-07, + "loss": 0.1636, + "step": 19474 + }, + { + "epoch": 2.749929398475007, + "grad_norm": 3.9422982848498807, + "learning_rate": 3.6223934204560165e-07, + "loss": 0.1754, + "step": 19475 + }, + { + "epoch": 2.750070601524993, + "grad_norm": 2.5771652929794846, + "learning_rate": 3.6183286298409724e-07, + "loss": 0.1047, + "step": 19476 + }, + { + "epoch": 2.750211804574979, + "grad_norm": 4.189765825573365, + "learning_rate": 3.614266079094353e-07, + "loss": 0.1667, + "step": 19477 + }, + { + "epoch": 2.7503530076249647, + "grad_norm": 2.65611009018958, + "learning_rate": 3.6102057683105596e-07, + "loss": 0.0876, + "step": 19478 + }, + { + "epoch": 2.7504942106749506, + "grad_norm": 3.1906724612257604, + "learning_rate": 3.6061476975839395e-07, + "loss": 0.1329, + "step": 19479 + }, + { + "epoch": 2.7506354137249365, + "grad_norm": 2.576238870813438, + "learning_rate": 3.60209186700885e-07, + "loss": 0.1245, + "step": 19480 + }, + { + "epoch": 2.7507766167749224, + "grad_norm": 2.575195536864581, + "learning_rate": 3.598038276679494e-07, + "loss": 0.0842, + "step": 19481 + }, + { + "epoch": 2.7509178198249082, + "grad_norm": 3.3017865889359923, + "learning_rate": 3.5939869266901073e-07, + "loss": 0.1371, + "step": 19482 + }, + { + "epoch": 2.751059022874894, + "grad_norm": 3.14603263869236, + "learning_rate": 3.5899378171348144e-07, + "loss": 0.1536, + "step": 19483 + }, + { + "epoch": 2.75120022592488, + "grad_norm": 3.8305165186411285, + "learning_rate": 3.58589094810774e-07, + "loss": 0.1898, + "step": 19484 + }, + { + "epoch": 2.751341428974866, + "grad_norm": 3.311326743292963, + "learning_rate": 3.5818463197029086e-07, + "loss": 0.1606, + "step": 19485 + }, + { + "epoch": 2.7514826320248518, + "grad_norm": 2.907670810133994, + "learning_rate": 3.5778039320143456e-07, + "loss": 0.1231, + "step": 19486 + }, + { + "epoch": 2.7516238350748377, + "grad_norm": 3.306112875085602, + "learning_rate": 3.573763785135975e-07, + "loss": 0.1569, + "step": 19487 + }, + { + "epoch": 2.7517650381248235, + "grad_norm": 3.327947791239029, + "learning_rate": 3.5697258791617007e-07, + "loss": 0.1425, + "step": 19488 + }, + { + "epoch": 2.7519062411748094, + "grad_norm": 3.0887860998801053, + "learning_rate": 3.5656902141853356e-07, + "loss": 0.1387, + "step": 19489 + }, + { + "epoch": 2.7520474442247953, + "grad_norm": 3.264248806244703, + "learning_rate": 3.561656790300683e-07, + "loss": 0.1473, + "step": 19490 + }, + { + "epoch": 2.752188647274781, + "grad_norm": 4.303967715273767, + "learning_rate": 3.5576256076014783e-07, + "loss": 0.2076, + "step": 19491 + }, + { + "epoch": 2.752329850324767, + "grad_norm": 2.670530796231145, + "learning_rate": 3.553596666181414e-07, + "loss": 0.1207, + "step": 19492 + }, + { + "epoch": 2.752471053374753, + "grad_norm": 3.3508983494305284, + "learning_rate": 3.549569966134103e-07, + "loss": 0.1139, + "step": 19493 + }, + { + "epoch": 2.752612256424739, + "grad_norm": 3.1361106059039243, + "learning_rate": 3.545545507553139e-07, + "loss": 0.0954, + "step": 19494 + }, + { + "epoch": 2.7527534594747247, + "grad_norm": 3.8432665696055963, + "learning_rate": 3.541523290532034e-07, + "loss": 0.2032, + "step": 19495 + }, + { + "epoch": 2.7528946625247106, + "grad_norm": 2.809965976461838, + "learning_rate": 3.537503315164259e-07, + "loss": 0.153, + "step": 19496 + }, + { + "epoch": 2.7530358655746965, + "grad_norm": 3.9026772609466063, + "learning_rate": 3.533485581543283e-07, + "loss": 0.2139, + "step": 19497 + }, + { + "epoch": 2.7531770686246824, + "grad_norm": 3.348088736192786, + "learning_rate": 3.529470089762421e-07, + "loss": 0.1429, + "step": 19498 + }, + { + "epoch": 2.7533182716746682, + "grad_norm": 2.8747479198745816, + "learning_rate": 3.525456839915009e-07, + "loss": 0.1387, + "step": 19499 + }, + { + "epoch": 2.753459474724654, + "grad_norm": 2.933515081201963, + "learning_rate": 3.521445832094328e-07, + "loss": 0.1522, + "step": 19500 + }, + { + "epoch": 2.75360067777464, + "grad_norm": 4.186558287328867, + "learning_rate": 3.51743706639357e-07, + "loss": 0.1779, + "step": 19501 + }, + { + "epoch": 2.753741880824626, + "grad_norm": 2.940150384115856, + "learning_rate": 3.5134305429058935e-07, + "loss": 0.1283, + "step": 19502 + }, + { + "epoch": 2.753883083874612, + "grad_norm": 3.7203508032238592, + "learning_rate": 3.5094262617244356e-07, + "loss": 0.1565, + "step": 19503 + }, + { + "epoch": 2.7540242869245977, + "grad_norm": 3.259409839230129, + "learning_rate": 3.505424222942244e-07, + "loss": 0.1313, + "step": 19504 + }, + { + "epoch": 2.7541654899745835, + "grad_norm": 3.539143838291693, + "learning_rate": 3.501424426652333e-07, + "loss": 0.1485, + "step": 19505 + }, + { + "epoch": 2.7543066930245694, + "grad_norm": 3.6697689727089875, + "learning_rate": 3.497426872947629e-07, + "loss": 0.1533, + "step": 19506 + }, + { + "epoch": 2.7544478960745553, + "grad_norm": 2.9948094006671226, + "learning_rate": 3.4934315619210346e-07, + "loss": 0.1405, + "step": 19507 + }, + { + "epoch": 2.754589099124541, + "grad_norm": 4.472160301585865, + "learning_rate": 3.48943849366542e-07, + "loss": 0.218, + "step": 19508 + }, + { + "epoch": 2.754730302174527, + "grad_norm": 3.375127186027285, + "learning_rate": 3.485447668273589e-07, + "loss": 0.1622, + "step": 19509 + }, + { + "epoch": 2.754871505224513, + "grad_norm": 2.655549839464905, + "learning_rate": 3.481459085838268e-07, + "loss": 0.1431, + "step": 19510 + }, + { + "epoch": 2.755012708274499, + "grad_norm": 2.8110369352142404, + "learning_rate": 3.4774727464521484e-07, + "loss": 0.1403, + "step": 19511 + }, + { + "epoch": 2.7551539113244847, + "grad_norm": 3.7258549680762454, + "learning_rate": 3.473488650207879e-07, + "loss": 0.198, + "step": 19512 + }, + { + "epoch": 2.7552951143744706, + "grad_norm": 3.90779830717312, + "learning_rate": 3.469506797198052e-07, + "loss": 0.1663, + "step": 19513 + }, + { + "epoch": 2.7554363174244565, + "grad_norm": 3.928661652220164, + "learning_rate": 3.465527187515194e-07, + "loss": 0.1475, + "step": 19514 + }, + { + "epoch": 2.7555775204744424, + "grad_norm": 3.0050910053602573, + "learning_rate": 3.4615498212517975e-07, + "loss": 0.1112, + "step": 19515 + }, + { + "epoch": 2.7557187235244283, + "grad_norm": 3.228661231883579, + "learning_rate": 3.4575746985002877e-07, + "loss": 0.1594, + "step": 19516 + }, + { + "epoch": 2.755859926574414, + "grad_norm": 3.555076861281196, + "learning_rate": 3.453601819353047e-07, + "loss": 0.1557, + "step": 19517 + }, + { + "epoch": 2.7560011296244, + "grad_norm": 3.106637786355952, + "learning_rate": 3.4496311839024133e-07, + "loss": 0.1493, + "step": 19518 + }, + { + "epoch": 2.756142332674386, + "grad_norm": 2.6237853129476596, + "learning_rate": 3.4456627922406337e-07, + "loss": 0.1115, + "step": 19519 + }, + { + "epoch": 2.756283535724372, + "grad_norm": 2.370021694969083, + "learning_rate": 3.441696644459969e-07, + "loss": 0.1165, + "step": 19520 + }, + { + "epoch": 2.7564247387743577, + "grad_norm": 3.2333158403856275, + "learning_rate": 3.437732740652566e-07, + "loss": 0.1491, + "step": 19521 + }, + { + "epoch": 2.7565659418243436, + "grad_norm": 3.76228042493995, + "learning_rate": 3.433771080910575e-07, + "loss": 0.1801, + "step": 19522 + }, + { + "epoch": 2.7567071448743294, + "grad_norm": 2.9058942687655, + "learning_rate": 3.4298116653260215e-07, + "loss": 0.1224, + "step": 19523 + }, + { + "epoch": 2.7568483479243153, + "grad_norm": 2.3910949105773995, + "learning_rate": 3.4258544939909324e-07, + "loss": 0.0938, + "step": 19524 + }, + { + "epoch": 2.756989550974301, + "grad_norm": 2.891962938024656, + "learning_rate": 3.4218995669972886e-07, + "loss": 0.1679, + "step": 19525 + }, + { + "epoch": 2.757130754024287, + "grad_norm": 3.7291997678260875, + "learning_rate": 3.4179468844369847e-07, + "loss": 0.1536, + "step": 19526 + }, + { + "epoch": 2.7572719570742725, + "grad_norm": 3.360224870454722, + "learning_rate": 3.4139964464018904e-07, + "loss": 0.1692, + "step": 19527 + }, + { + "epoch": 2.7574131601242584, + "grad_norm": 3.5607398288577925, + "learning_rate": 3.4100482529838e-07, + "loss": 0.1548, + "step": 19528 + }, + { + "epoch": 2.7575543631742443, + "grad_norm": 3.0367291731034802, + "learning_rate": 3.4061023042744837e-07, + "loss": 0.115, + "step": 19529 + }, + { + "epoch": 2.75769556622423, + "grad_norm": 2.8816947543093288, + "learning_rate": 3.4021586003656236e-07, + "loss": 0.1273, + "step": 19530 + }, + { + "epoch": 2.757836769274216, + "grad_norm": 2.7409791842642455, + "learning_rate": 3.3982171413488916e-07, + "loss": 0.1378, + "step": 19531 + }, + { + "epoch": 2.757977972324202, + "grad_norm": 3.2842204605367624, + "learning_rate": 3.394277927315859e-07, + "loss": 0.1263, + "step": 19532 + }, + { + "epoch": 2.758119175374188, + "grad_norm": 3.838314309060839, + "learning_rate": 3.390340958358096e-07, + "loss": 0.1733, + "step": 19533 + }, + { + "epoch": 2.7582603784241737, + "grad_norm": 2.9166689232032654, + "learning_rate": 3.386406234567086e-07, + "loss": 0.1274, + "step": 19534 + }, + { + "epoch": 2.7584015814741596, + "grad_norm": 2.8670261690280983, + "learning_rate": 3.382473756034277e-07, + "loss": 0.1403, + "step": 19535 + }, + { + "epoch": 2.7585427845241455, + "grad_norm": 2.87901061234327, + "learning_rate": 3.37854352285103e-07, + "loss": 0.1415, + "step": 19536 + }, + { + "epoch": 2.7586839875741314, + "grad_norm": 3.2004710504424896, + "learning_rate": 3.3746155351087276e-07, + "loss": 0.1359, + "step": 19537 + }, + { + "epoch": 2.7588251906241172, + "grad_norm": 3.3008086624260926, + "learning_rate": 3.370689792898618e-07, + "loss": 0.1591, + "step": 19538 + }, + { + "epoch": 2.758966393674103, + "grad_norm": 3.942132329644193, + "learning_rate": 3.3667662963119627e-07, + "loss": 0.1645, + "step": 19539 + }, + { + "epoch": 2.759107596724089, + "grad_norm": 2.4502926500201543, + "learning_rate": 3.362845045439911e-07, + "loss": 0.1305, + "step": 19540 + }, + { + "epoch": 2.759248799774075, + "grad_norm": 2.799034668617568, + "learning_rate": 3.3589260403736e-07, + "loss": 0.1392, + "step": 19541 + }, + { + "epoch": 2.7593900028240608, + "grad_norm": 2.83381248452317, + "learning_rate": 3.3550092812041244e-07, + "loss": 0.1317, + "step": 19542 + }, + { + "epoch": 2.7595312058740467, + "grad_norm": 3.5074770547662175, + "learning_rate": 3.3510947680224893e-07, + "loss": 0.1244, + "step": 19543 + }, + { + "epoch": 2.7596724089240325, + "grad_norm": 4.555084382809644, + "learning_rate": 3.347182500919677e-07, + "loss": 0.1955, + "step": 19544 + }, + { + "epoch": 2.7598136119740184, + "grad_norm": 3.1773148697972315, + "learning_rate": 3.3432724799866034e-07, + "loss": 0.1621, + "step": 19545 + }, + { + "epoch": 2.7599548150240043, + "grad_norm": 3.6582558558216065, + "learning_rate": 3.33936470531413e-07, + "loss": 0.1243, + "step": 19546 + }, + { + "epoch": 2.76009601807399, + "grad_norm": 3.6404124238059006, + "learning_rate": 3.335459176993083e-07, + "loss": 0.1359, + "step": 19547 + }, + { + "epoch": 2.760237221123976, + "grad_norm": 3.23281727214003, + "learning_rate": 3.3315558951142133e-07, + "loss": 0.1575, + "step": 19548 + }, + { + "epoch": 2.760378424173962, + "grad_norm": 3.3773389405855454, + "learning_rate": 3.3276548597682366e-07, + "loss": 0.1693, + "step": 19549 + }, + { + "epoch": 2.760519627223948, + "grad_norm": 3.606345556252987, + "learning_rate": 3.3237560710458137e-07, + "loss": 0.142, + "step": 19550 + }, + { + "epoch": 2.7606608302739337, + "grad_norm": 2.55250771795876, + "learning_rate": 3.31985952903755e-07, + "loss": 0.1031, + "step": 19551 + }, + { + "epoch": 2.7608020333239196, + "grad_norm": 3.3706025219757696, + "learning_rate": 3.3159652338339953e-07, + "loss": 0.1725, + "step": 19552 + }, + { + "epoch": 2.7609432363739055, + "grad_norm": 2.9236518818066615, + "learning_rate": 3.312073185525633e-07, + "loss": 0.1094, + "step": 19553 + }, + { + "epoch": 2.7610844394238914, + "grad_norm": 2.785924072233544, + "learning_rate": 3.3081833842029563e-07, + "loss": 0.1499, + "step": 19554 + }, + { + "epoch": 2.7612256424738773, + "grad_norm": 3.2092281278603205, + "learning_rate": 3.3042958299563386e-07, + "loss": 0.1577, + "step": 19555 + }, + { + "epoch": 2.761366845523863, + "grad_norm": 3.32422636749609, + "learning_rate": 3.300410522876141e-07, + "loss": 0.1721, + "step": 19556 + }, + { + "epoch": 2.761508048573849, + "grad_norm": 3.971616868191806, + "learning_rate": 3.2965274630526236e-07, + "loss": 0.1653, + "step": 19557 + }, + { + "epoch": 2.761649251623835, + "grad_norm": 2.926649857754778, + "learning_rate": 3.292646650576037e-07, + "loss": 0.1133, + "step": 19558 + }, + { + "epoch": 2.761790454673821, + "grad_norm": 3.3607366060362582, + "learning_rate": 3.2887680855365867e-07, + "loss": 0.1251, + "step": 19559 + }, + { + "epoch": 2.7619316577238067, + "grad_norm": 3.0315689725980435, + "learning_rate": 3.284891768024401e-07, + "loss": 0.149, + "step": 19560 + }, + { + "epoch": 2.7620728607737925, + "grad_norm": 3.6866328655452283, + "learning_rate": 3.281017698129563e-07, + "loss": 0.2047, + "step": 19561 + }, + { + "epoch": 2.7622140638237784, + "grad_norm": 3.4447937218122546, + "learning_rate": 3.2771458759421005e-07, + "loss": 0.1172, + "step": 19562 + }, + { + "epoch": 2.7623552668737643, + "grad_norm": 3.7076393911278425, + "learning_rate": 3.2732763015519977e-07, + "loss": 0.1551, + "step": 19563 + }, + { + "epoch": 2.76249646992375, + "grad_norm": 3.615581774096736, + "learning_rate": 3.269408975049182e-07, + "loss": 0.1255, + "step": 19564 + }, + { + "epoch": 2.762637672973736, + "grad_norm": 2.7598868782790884, + "learning_rate": 3.2655438965235265e-07, + "loss": 0.1464, + "step": 19565 + }, + { + "epoch": 2.762778876023722, + "grad_norm": 3.7200465880969062, + "learning_rate": 3.261681066064859e-07, + "loss": 0.1995, + "step": 19566 + }, + { + "epoch": 2.762920079073708, + "grad_norm": 2.629106668080258, + "learning_rate": 3.2578204837629414e-07, + "loss": 0.1159, + "step": 19567 + }, + { + "epoch": 2.7630612821236937, + "grad_norm": 3.30962640556775, + "learning_rate": 3.25396214970749e-07, + "loss": 0.143, + "step": 19568 + }, + { + "epoch": 2.7632024851736796, + "grad_norm": 3.0980524477300904, + "learning_rate": 3.250106063988179e-07, + "loss": 0.1507, + "step": 19569 + }, + { + "epoch": 2.7633436882236655, + "grad_norm": 3.215960454449379, + "learning_rate": 3.2462522266946127e-07, + "loss": 0.1378, + "step": 19570 + }, + { + "epoch": 2.7634848912736514, + "grad_norm": 3.105049481043069, + "learning_rate": 3.2424006379163764e-07, + "loss": 0.1639, + "step": 19571 + }, + { + "epoch": 2.7636260943236373, + "grad_norm": 3.2525499456536804, + "learning_rate": 3.238551297742953e-07, + "loss": 0.177, + "step": 19572 + }, + { + "epoch": 2.763767297373623, + "grad_norm": 3.4288453143469755, + "learning_rate": 3.234704206263828e-07, + "loss": 0.095, + "step": 19573 + }, + { + "epoch": 2.763908500423609, + "grad_norm": 4.549117535531631, + "learning_rate": 3.230859363568373e-07, + "loss": 0.1561, + "step": 19574 + }, + { + "epoch": 2.764049703473595, + "grad_norm": 3.7502780011628896, + "learning_rate": 3.22701676974595e-07, + "loss": 0.1846, + "step": 19575 + }, + { + "epoch": 2.764190906523581, + "grad_norm": 2.830901368806215, + "learning_rate": 3.2231764248858656e-07, + "loss": 0.1426, + "step": 19576 + }, + { + "epoch": 2.7643321095735667, + "grad_norm": 2.6768742034239574, + "learning_rate": 3.2193383290773705e-07, + "loss": 0.1333, + "step": 19577 + }, + { + "epoch": 2.7644733126235526, + "grad_norm": 3.184745903317428, + "learning_rate": 3.215502482409649e-07, + "loss": 0.1489, + "step": 19578 + }, + { + "epoch": 2.7646145156735384, + "grad_norm": 4.4498840411925755, + "learning_rate": 3.2116688849718637e-07, + "loss": 0.1963, + "step": 19579 + }, + { + "epoch": 2.7647557187235243, + "grad_norm": 2.255676671830971, + "learning_rate": 3.207837536853087e-07, + "loss": 0.1155, + "step": 19580 + }, + { + "epoch": 2.76489692177351, + "grad_norm": 3.5540901215847125, + "learning_rate": 3.2040084381423697e-07, + "loss": 0.1474, + "step": 19581 + }, + { + "epoch": 2.765038124823496, + "grad_norm": 2.683536738636351, + "learning_rate": 3.2001815889286856e-07, + "loss": 0.121, + "step": 19582 + }, + { + "epoch": 2.765179327873482, + "grad_norm": 2.8248423243288183, + "learning_rate": 3.196356989300986e-07, + "loss": 0.1269, + "step": 19583 + }, + { + "epoch": 2.765320530923468, + "grad_norm": 3.88574847785713, + "learning_rate": 3.1925346393481327e-07, + "loss": 0.1983, + "step": 19584 + }, + { + "epoch": 2.7654617339734537, + "grad_norm": 2.819869384131492, + "learning_rate": 3.188714539158977e-07, + "loss": 0.1035, + "step": 19585 + }, + { + "epoch": 2.7656029370234396, + "grad_norm": 2.8890683464789593, + "learning_rate": 3.184896688822281e-07, + "loss": 0.1242, + "step": 19586 + }, + { + "epoch": 2.7657441400734255, + "grad_norm": 2.2804280871246907, + "learning_rate": 3.181081088426774e-07, + "loss": 0.0857, + "step": 19587 + }, + { + "epoch": 2.7658853431234114, + "grad_norm": 3.3257308123049003, + "learning_rate": 3.1772677380611185e-07, + "loss": 0.1337, + "step": 19588 + }, + { + "epoch": 2.7660265461733973, + "grad_norm": 3.048719385196497, + "learning_rate": 3.1734566378139653e-07, + "loss": 0.1325, + "step": 19589 + }, + { + "epoch": 2.766167749223383, + "grad_norm": 3.628264071309252, + "learning_rate": 3.1696477877738664e-07, + "loss": 0.1498, + "step": 19590 + }, + { + "epoch": 2.766308952273369, + "grad_norm": 3.452834365576766, + "learning_rate": 3.165841188029328e-07, + "loss": 0.1636, + "step": 19591 + }, + { + "epoch": 2.766450155323355, + "grad_norm": 2.892655399266402, + "learning_rate": 3.1620368386688137e-07, + "loss": 0.1209, + "step": 19592 + }, + { + "epoch": 2.766591358373341, + "grad_norm": 3.1115898387076046, + "learning_rate": 3.158234739780741e-07, + "loss": 0.121, + "step": 19593 + }, + { + "epoch": 2.7667325614233267, + "grad_norm": 3.506284194986558, + "learning_rate": 3.154434891453473e-07, + "loss": 0.2141, + "step": 19594 + }, + { + "epoch": 2.7668737644733126, + "grad_norm": 3.3580486162799734, + "learning_rate": 3.1506372937753163e-07, + "loss": 0.1519, + "step": 19595 + }, + { + "epoch": 2.7670149675232985, + "grad_norm": 2.9799807487615966, + "learning_rate": 3.1468419468345223e-07, + "loss": 0.1425, + "step": 19596 + }, + { + "epoch": 2.7671561705732843, + "grad_norm": 3.0452341473004583, + "learning_rate": 3.143048850719299e-07, + "loss": 0.1485, + "step": 19597 + }, + { + "epoch": 2.76729737362327, + "grad_norm": 3.1282722469258126, + "learning_rate": 3.1392580055177867e-07, + "loss": 0.1544, + "step": 19598 + }, + { + "epoch": 2.767438576673256, + "grad_norm": 3.338097040866428, + "learning_rate": 3.135469411318082e-07, + "loss": 0.1523, + "step": 19599 + }, + { + "epoch": 2.767579779723242, + "grad_norm": 2.4413959824125393, + "learning_rate": 3.131683068208247e-07, + "loss": 0.1139, + "step": 19600 + }, + { + "epoch": 2.767720982773228, + "grad_norm": 2.78138289531275, + "learning_rate": 3.1278989762762556e-07, + "loss": 0.1369, + "step": 19601 + }, + { + "epoch": 2.7678621858232137, + "grad_norm": 3.5091370595881424, + "learning_rate": 3.1241171356100606e-07, + "loss": 0.1819, + "step": 19602 + }, + { + "epoch": 2.7680033888731996, + "grad_norm": 3.2027577153979085, + "learning_rate": 3.1203375462975474e-07, + "loss": 0.1435, + "step": 19603 + }, + { + "epoch": 2.7681445919231855, + "grad_norm": 3.6853600371966664, + "learning_rate": 3.1165602084265446e-07, + "loss": 0.1972, + "step": 19604 + }, + { + "epoch": 2.7682857949731714, + "grad_norm": 2.813220419690726, + "learning_rate": 3.1127851220848273e-07, + "loss": 0.126, + "step": 19605 + }, + { + "epoch": 2.7684269980231573, + "grad_norm": 3.2338330299545395, + "learning_rate": 3.109012287360158e-07, + "loss": 0.1534, + "step": 19606 + }, + { + "epoch": 2.768568201073143, + "grad_norm": 3.619583014897995, + "learning_rate": 3.1052417043402115e-07, + "loss": 0.1537, + "step": 19607 + }, + { + "epoch": 2.768709404123129, + "grad_norm": 3.1880480700565874, + "learning_rate": 3.1014733731125955e-07, + "loss": 0.1232, + "step": 19608 + }, + { + "epoch": 2.768850607173115, + "grad_norm": 3.4992004678162063, + "learning_rate": 3.0977072937648846e-07, + "loss": 0.1805, + "step": 19609 + }, + { + "epoch": 2.768991810223101, + "grad_norm": 3.635470892838778, + "learning_rate": 3.093943466384597e-07, + "loss": 0.1502, + "step": 19610 + }, + { + "epoch": 2.7691330132730867, + "grad_norm": 2.708651875386631, + "learning_rate": 3.0901818910592183e-07, + "loss": 0.1126, + "step": 19611 + }, + { + "epoch": 2.7692742163230726, + "grad_norm": 3.078773839199322, + "learning_rate": 3.0864225678761684e-07, + "loss": 0.1155, + "step": 19612 + }, + { + "epoch": 2.7694154193730585, + "grad_norm": 2.909802303822352, + "learning_rate": 3.082665496922799e-07, + "loss": 0.1473, + "step": 19613 + }, + { + "epoch": 2.7695566224230443, + "grad_norm": 2.883051784549847, + "learning_rate": 3.0789106782864285e-07, + "loss": 0.1378, + "step": 19614 + }, + { + "epoch": 2.7696978254730302, + "grad_norm": 3.6010201404992817, + "learning_rate": 3.0751581120543216e-07, + "loss": 0.1948, + "step": 19615 + }, + { + "epoch": 2.769839028523016, + "grad_norm": 4.147006348151358, + "learning_rate": 3.071407798313686e-07, + "loss": 0.1529, + "step": 19616 + }, + { + "epoch": 2.769980231573002, + "grad_norm": 2.988242827480415, + "learning_rate": 3.0676597371516627e-07, + "loss": 0.1353, + "step": 19617 + }, + { + "epoch": 2.770121434622988, + "grad_norm": 3.043948819837961, + "learning_rate": 3.0639139286553707e-07, + "loss": 0.1239, + "step": 19618 + }, + { + "epoch": 2.7702626376729738, + "grad_norm": 2.8728634787079046, + "learning_rate": 3.0601703729118524e-07, + "loss": 0.1442, + "step": 19619 + }, + { + "epoch": 2.7704038407229596, + "grad_norm": 3.7708298877762263, + "learning_rate": 3.0564290700081044e-07, + "loss": 0.1449, + "step": 19620 + }, + { + "epoch": 2.7705450437729455, + "grad_norm": 3.207569431158485, + "learning_rate": 3.0526900200310905e-07, + "loss": 0.1156, + "step": 19621 + }, + { + "epoch": 2.7706862468229314, + "grad_norm": 2.591919975616547, + "learning_rate": 3.0489532230676744e-07, + "loss": 0.1297, + "step": 19622 + }, + { + "epoch": 2.7708274498729173, + "grad_norm": 3.473771002822263, + "learning_rate": 3.04521867920472e-07, + "loss": 0.1814, + "step": 19623 + }, + { + "epoch": 2.770968652922903, + "grad_norm": 4.001109288340226, + "learning_rate": 3.041486388529036e-07, + "loss": 0.1855, + "step": 19624 + }, + { + "epoch": 2.771109855972889, + "grad_norm": 2.6159585989590726, + "learning_rate": 3.037756351127319e-07, + "loss": 0.1049, + "step": 19625 + }, + { + "epoch": 2.771251059022875, + "grad_norm": 3.0585527674668826, + "learning_rate": 3.0340285670862667e-07, + "loss": 0.162, + "step": 19626 + }, + { + "epoch": 2.771392262072861, + "grad_norm": 3.0136793795050982, + "learning_rate": 3.030303036492499e-07, + "loss": 0.1187, + "step": 19627 + }, + { + "epoch": 2.7715334651228467, + "grad_norm": 3.071604381672808, + "learning_rate": 3.026579759432635e-07, + "loss": 0.1428, + "step": 19628 + }, + { + "epoch": 2.7716746681728326, + "grad_norm": 2.631543688552586, + "learning_rate": 3.0228587359931726e-07, + "loss": 0.1093, + "step": 19629 + }, + { + "epoch": 2.7718158712228185, + "grad_norm": 3.4726543561840963, + "learning_rate": 3.019139966260587e-07, + "loss": 0.1354, + "step": 19630 + }, + { + "epoch": 2.7719570742728044, + "grad_norm": 3.594581741869066, + "learning_rate": 3.015423450321309e-07, + "loss": 0.1615, + "step": 19631 + }, + { + "epoch": 2.7720982773227902, + "grad_norm": 3.268048914941403, + "learning_rate": 3.0117091882617025e-07, + "loss": 0.19, + "step": 19632 + }, + { + "epoch": 2.772239480372776, + "grad_norm": 3.225866639173116, + "learning_rate": 3.0079971801680876e-07, + "loss": 0.1416, + "step": 19633 + }, + { + "epoch": 2.772380683422762, + "grad_norm": 2.7167347944405287, + "learning_rate": 3.0042874261267395e-07, + "loss": 0.1251, + "step": 19634 + }, + { + "epoch": 2.772521886472748, + "grad_norm": 3.4548878737418685, + "learning_rate": 3.0005799262238565e-07, + "loss": 0.2028, + "step": 19635 + }, + { + "epoch": 2.7726630895227338, + "grad_norm": 3.267261089051461, + "learning_rate": 2.996874680545603e-07, + "loss": 0.1485, + "step": 19636 + }, + { + "epoch": 2.7728042925727197, + "grad_norm": 3.0673821495415288, + "learning_rate": 2.993171689178098e-07, + "loss": 0.1481, + "step": 19637 + }, + { + "epoch": 2.7729454956227055, + "grad_norm": 3.1198652904547965, + "learning_rate": 2.989470952207385e-07, + "loss": 0.1562, + "step": 19638 + }, + { + "epoch": 2.7730866986726914, + "grad_norm": 3.434027546801516, + "learning_rate": 2.9857724697194503e-07, + "loss": 0.1358, + "step": 19639 + }, + { + "epoch": 2.7732279017226773, + "grad_norm": 3.009952851218732, + "learning_rate": 2.9820762418002916e-07, + "loss": 0.1567, + "step": 19640 + }, + { + "epoch": 2.773369104772663, + "grad_norm": 2.986340432938195, + "learning_rate": 2.9783822685357844e-07, + "loss": 0.1186, + "step": 19641 + }, + { + "epoch": 2.773510307822649, + "grad_norm": 3.4142620313609298, + "learning_rate": 2.9746905500117604e-07, + "loss": 0.1741, + "step": 19642 + }, + { + "epoch": 2.773651510872635, + "grad_norm": 2.9057243173812366, + "learning_rate": 2.971001086314029e-07, + "loss": 0.086, + "step": 19643 + }, + { + "epoch": 2.773792713922621, + "grad_norm": 3.010197026934564, + "learning_rate": 2.967313877528322e-07, + "loss": 0.1447, + "step": 19644 + }, + { + "epoch": 2.7739339169726067, + "grad_norm": 3.644161942264826, + "learning_rate": 2.963628923740347e-07, + "loss": 0.1673, + "step": 19645 + }, + { + "epoch": 2.7740751200225926, + "grad_norm": 3.3068130870957018, + "learning_rate": 2.959946225035726e-07, + "loss": 0.1462, + "step": 19646 + }, + { + "epoch": 2.7742163230725785, + "grad_norm": 3.60932267103286, + "learning_rate": 2.956265781500045e-07, + "loss": 0.1617, + "step": 19647 + }, + { + "epoch": 2.7743575261225644, + "grad_norm": 4.932320028245283, + "learning_rate": 2.9525875932188365e-07, + "loss": 0.2174, + "step": 19648 + }, + { + "epoch": 2.7744987291725502, + "grad_norm": 3.3948479607938826, + "learning_rate": 2.948911660277587e-07, + "loss": 0.1326, + "step": 19649 + }, + { + "epoch": 2.774639932222536, + "grad_norm": 3.783702551417677, + "learning_rate": 2.945237982761706e-07, + "loss": 0.1626, + "step": 19650 + }, + { + "epoch": 2.774781135272522, + "grad_norm": 4.169890797744005, + "learning_rate": 2.9415665607565923e-07, + "loss": 0.205, + "step": 19651 + }, + { + "epoch": 2.774922338322508, + "grad_norm": 4.104137848682385, + "learning_rate": 2.937897394347544e-07, + "loss": 0.1396, + "step": 19652 + }, + { + "epoch": 2.775063541372494, + "grad_norm": 3.132945152743542, + "learning_rate": 2.9342304836198486e-07, + "loss": 0.1195, + "step": 19653 + }, + { + "epoch": 2.7752047444224797, + "grad_norm": 2.5290306760723222, + "learning_rate": 2.930565828658716e-07, + "loss": 0.1425, + "step": 19654 + }, + { + "epoch": 2.7753459474724655, + "grad_norm": 3.539552893927693, + "learning_rate": 2.9269034295493105e-07, + "loss": 0.1444, + "step": 19655 + }, + { + "epoch": 2.7754871505224514, + "grad_norm": 4.111920421623204, + "learning_rate": 2.9232432863767424e-07, + "loss": 0.1482, + "step": 19656 + }, + { + "epoch": 2.7756283535724373, + "grad_norm": 2.7723911845093903, + "learning_rate": 2.919585399226077e-07, + "loss": 0.1076, + "step": 19657 + }, + { + "epoch": 2.775769556622423, + "grad_norm": 3.419980122406715, + "learning_rate": 2.915929768182335e-07, + "loss": 0.1518, + "step": 19658 + }, + { + "epoch": 2.775910759672409, + "grad_norm": 3.4917504937148616, + "learning_rate": 2.912276393330449e-07, + "loss": 0.1817, + "step": 19659 + }, + { + "epoch": 2.776051962722395, + "grad_norm": 3.3454459788807642, + "learning_rate": 2.908625274755339e-07, + "loss": 0.1794, + "step": 19660 + }, + { + "epoch": 2.776193165772381, + "grad_norm": 3.4914202941411046, + "learning_rate": 2.9049764125418266e-07, + "loss": 0.1404, + "step": 19661 + }, + { + "epoch": 2.7763343688223667, + "grad_norm": 2.7471783258963773, + "learning_rate": 2.901329806774744e-07, + "loss": 0.1158, + "step": 19662 + }, + { + "epoch": 2.7764755718723526, + "grad_norm": 2.51868242038154, + "learning_rate": 2.8976854575388235e-07, + "loss": 0.0927, + "step": 19663 + }, + { + "epoch": 2.7766167749223385, + "grad_norm": 4.019283736065937, + "learning_rate": 2.8940433649187525e-07, + "loss": 0.1912, + "step": 19664 + }, + { + "epoch": 2.7767579779723244, + "grad_norm": 3.1779389002310805, + "learning_rate": 2.890403528999175e-07, + "loss": 0.1722, + "step": 19665 + }, + { + "epoch": 2.7768991810223103, + "grad_norm": 3.33049382278745, + "learning_rate": 2.8867659498647e-07, + "loss": 0.1575, + "step": 19666 + }, + { + "epoch": 2.777040384072296, + "grad_norm": 2.9521074943509267, + "learning_rate": 2.8831306275998174e-07, + "loss": 0.1014, + "step": 19667 + }, + { + "epoch": 2.777181587122282, + "grad_norm": 2.352915037544021, + "learning_rate": 2.8794975622890573e-07, + "loss": 0.0975, + "step": 19668 + }, + { + "epoch": 2.777322790172268, + "grad_norm": 2.624389168238163, + "learning_rate": 2.8758667540168203e-07, + "loss": 0.074, + "step": 19669 + }, + { + "epoch": 2.777463993222254, + "grad_norm": 3.537142570820614, + "learning_rate": 2.8722382028675055e-07, + "loss": 0.1429, + "step": 19670 + }, + { + "epoch": 2.7776051962722397, + "grad_norm": 2.343178475189642, + "learning_rate": 2.8686119089254227e-07, + "loss": 0.102, + "step": 19671 + }, + { + "epoch": 2.7777463993222256, + "grad_norm": 2.8945583062665063, + "learning_rate": 2.864987872274849e-07, + "loss": 0.1403, + "step": 19672 + }, + { + "epoch": 2.7778876023722114, + "grad_norm": 3.124781648698604, + "learning_rate": 2.861366092999995e-07, + "loss": 0.1243, + "step": 19673 + }, + { + "epoch": 2.7780288054221973, + "grad_norm": 3.3806360372503845, + "learning_rate": 2.8577465711850605e-07, + "loss": 0.157, + "step": 19674 + }, + { + "epoch": 2.778170008472183, + "grad_norm": 3.716008540569517, + "learning_rate": 2.854129306914144e-07, + "loss": 0.1958, + "step": 19675 + }, + { + "epoch": 2.778311211522169, + "grad_norm": 3.051211052528312, + "learning_rate": 2.8505143002713007e-07, + "loss": 0.1426, + "step": 19676 + }, + { + "epoch": 2.778452414572155, + "grad_norm": 4.467449331854612, + "learning_rate": 2.8469015513405527e-07, + "loss": 0.1951, + "step": 19677 + }, + { + "epoch": 2.778593617622141, + "grad_norm": 2.988701156351104, + "learning_rate": 2.843291060205855e-07, + "loss": 0.1484, + "step": 19678 + }, + { + "epoch": 2.7787348206721267, + "grad_norm": 2.6660472853782116, + "learning_rate": 2.839682826951107e-07, + "loss": 0.0902, + "step": 19679 + }, + { + "epoch": 2.7788760237221126, + "grad_norm": 3.1476281517659035, + "learning_rate": 2.8360768516601745e-07, + "loss": 0.164, + "step": 19680 + }, + { + "epoch": 2.7790172267720985, + "grad_norm": 3.0024305256365222, + "learning_rate": 2.8324731344168575e-07, + "loss": 0.1586, + "step": 19681 + }, + { + "epoch": 2.7791584298220844, + "grad_norm": 3.01605984156517, + "learning_rate": 2.8288716753049007e-07, + "loss": 0.1611, + "step": 19682 + }, + { + "epoch": 2.7792996328720703, + "grad_norm": 3.3965850061955183, + "learning_rate": 2.825272474408014e-07, + "loss": 0.165, + "step": 19683 + }, + { + "epoch": 2.779440835922056, + "grad_norm": 2.466355255491916, + "learning_rate": 2.821675531809809e-07, + "loss": 0.1272, + "step": 19684 + }, + { + "epoch": 2.779582038972042, + "grad_norm": 2.818385815926966, + "learning_rate": 2.818080847593896e-07, + "loss": 0.091, + "step": 19685 + }, + { + "epoch": 2.779723242022028, + "grad_norm": 3.0318500771567805, + "learning_rate": 2.814488421843831e-07, + "loss": 0.1345, + "step": 19686 + }, + { + "epoch": 2.779864445072014, + "grad_norm": 3.20740407924879, + "learning_rate": 2.8108982546430687e-07, + "loss": 0.1501, + "step": 19687 + }, + { + "epoch": 2.7800056481219997, + "grad_norm": 3.127838158761328, + "learning_rate": 2.8073103460750653e-07, + "loss": 0.172, + "step": 19688 + }, + { + "epoch": 2.7801468511719856, + "grad_norm": 2.8054312851552163, + "learning_rate": 2.803724696223198e-07, + "loss": 0.1796, + "step": 19689 + }, + { + "epoch": 2.7802880542219714, + "grad_norm": 3.7679348900621985, + "learning_rate": 2.800141305170789e-07, + "loss": 0.2012, + "step": 19690 + }, + { + "epoch": 2.7804292572719573, + "grad_norm": 2.702551922135359, + "learning_rate": 2.796560173001106e-07, + "loss": 0.1328, + "step": 19691 + }, + { + "epoch": 2.780570460321943, + "grad_norm": 3.148342556969039, + "learning_rate": 2.7929812997974036e-07, + "loss": 0.1585, + "step": 19692 + }, + { + "epoch": 2.780711663371929, + "grad_norm": 4.168077721357103, + "learning_rate": 2.789404685642827e-07, + "loss": 0.1393, + "step": 19693 + }, + { + "epoch": 2.780852866421915, + "grad_norm": 2.5595603181145132, + "learning_rate": 2.785830330620509e-07, + "loss": 0.1268, + "step": 19694 + }, + { + "epoch": 2.780994069471901, + "grad_norm": 2.619623082654432, + "learning_rate": 2.782258234813506e-07, + "loss": 0.1098, + "step": 19695 + }, + { + "epoch": 2.7811352725218867, + "grad_norm": 3.21743862806986, + "learning_rate": 2.7786883983048294e-07, + "loss": 0.1277, + "step": 19696 + }, + { + "epoch": 2.781276475571872, + "grad_norm": 3.109280286698981, + "learning_rate": 2.775120821177457e-07, + "loss": 0.1634, + "step": 19697 + }, + { + "epoch": 2.781417678621858, + "grad_norm": 3.9849288504100047, + "learning_rate": 2.771555503514289e-07, + "loss": 0.1688, + "step": 19698 + }, + { + "epoch": 2.781558881671844, + "grad_norm": 3.246484610287638, + "learning_rate": 2.7679924453981823e-07, + "loss": 0.1692, + "step": 19699 + }, + { + "epoch": 2.78170008472183, + "grad_norm": 2.7868735532814637, + "learning_rate": 2.764431646911947e-07, + "loss": 0.1352, + "step": 19700 + }, + { + "epoch": 2.7818412877718157, + "grad_norm": 2.595928869104256, + "learning_rate": 2.7608731081383065e-07, + "loss": 0.1055, + "step": 19701 + }, + { + "epoch": 2.7819824908218016, + "grad_norm": 2.177425698604991, + "learning_rate": 2.757316829159995e-07, + "loss": 0.0971, + "step": 19702 + }, + { + "epoch": 2.7821236938717875, + "grad_norm": 3.110636862022969, + "learning_rate": 2.7537628100596457e-07, + "loss": 0.1289, + "step": 19703 + }, + { + "epoch": 2.7822648969217734, + "grad_norm": 3.7928507531082705, + "learning_rate": 2.750211050919849e-07, + "loss": 0.1869, + "step": 19704 + }, + { + "epoch": 2.7824060999717593, + "grad_norm": 4.183989018962651, + "learning_rate": 2.746661551823149e-07, + "loss": 0.1557, + "step": 19705 + }, + { + "epoch": 2.782547303021745, + "grad_norm": 3.6501848601041575, + "learning_rate": 2.7431143128520243e-07, + "loss": 0.1414, + "step": 19706 + }, + { + "epoch": 2.782688506071731, + "grad_norm": 3.3117378285897447, + "learning_rate": 2.739569334088932e-07, + "loss": 0.1587, + "step": 19707 + }, + { + "epoch": 2.782829709121717, + "grad_norm": 3.2764709001460144, + "learning_rate": 2.7360266156162274e-07, + "loss": 0.1224, + "step": 19708 + }, + { + "epoch": 2.782970912171703, + "grad_norm": 3.590277352324747, + "learning_rate": 2.7324861575162897e-07, + "loss": 0.1779, + "step": 19709 + }, + { + "epoch": 2.7831121152216887, + "grad_norm": 3.433827682210781, + "learning_rate": 2.728947959871353e-07, + "loss": 0.1516, + "step": 19710 + }, + { + "epoch": 2.7832533182716745, + "grad_norm": 3.0805470291293213, + "learning_rate": 2.7254120227636514e-07, + "loss": 0.136, + "step": 19711 + }, + { + "epoch": 2.7833945213216604, + "grad_norm": 3.3961867613309766, + "learning_rate": 2.721878346275364e-07, + "loss": 0.1549, + "step": 19712 + }, + { + "epoch": 2.7835357243716463, + "grad_norm": 2.94879791081446, + "learning_rate": 2.7183469304886136e-07, + "loss": 0.1225, + "step": 19713 + }, + { + "epoch": 2.783676927421632, + "grad_norm": 3.0960782423771604, + "learning_rate": 2.714817775485468e-07, + "loss": 0.131, + "step": 19714 + }, + { + "epoch": 2.783818130471618, + "grad_norm": 3.4196282586828515, + "learning_rate": 2.71129088134795e-07, + "loss": 0.1757, + "step": 19715 + }, + { + "epoch": 2.783959333521604, + "grad_norm": 2.7597716761271087, + "learning_rate": 2.707766248158006e-07, + "loss": 0.1525, + "step": 19716 + }, + { + "epoch": 2.78410053657159, + "grad_norm": 2.81462405283787, + "learning_rate": 2.704243875997581e-07, + "loss": 0.1214, + "step": 19717 + }, + { + "epoch": 2.7842417396215757, + "grad_norm": 3.4395900142089437, + "learning_rate": 2.7007237649484763e-07, + "loss": 0.1562, + "step": 19718 + }, + { + "epoch": 2.7843829426715616, + "grad_norm": 4.7799866702662275, + "learning_rate": 2.697205915092549e-07, + "loss": 0.1751, + "step": 19719 + }, + { + "epoch": 2.7845241457215475, + "grad_norm": 2.285274510292052, + "learning_rate": 2.693690326511533e-07, + "loss": 0.0999, + "step": 19720 + }, + { + "epoch": 2.7846653487715334, + "grad_norm": 2.339440104717664, + "learning_rate": 2.6901769992871305e-07, + "loss": 0.1109, + "step": 19721 + }, + { + "epoch": 2.7848065518215193, + "grad_norm": 3.276646308719734, + "learning_rate": 2.686665933500987e-07, + "loss": 0.1353, + "step": 19722 + }, + { + "epoch": 2.784947754871505, + "grad_norm": 3.0191103745595247, + "learning_rate": 2.683157129234704e-07, + "loss": 0.1402, + "step": 19723 + }, + { + "epoch": 2.785088957921491, + "grad_norm": 3.590451349502683, + "learning_rate": 2.6796505865698263e-07, + "loss": 0.139, + "step": 19724 + }, + { + "epoch": 2.785230160971477, + "grad_norm": 3.3609377698126974, + "learning_rate": 2.6761463055878347e-07, + "loss": 0.1708, + "step": 19725 + }, + { + "epoch": 2.785371364021463, + "grad_norm": 3.595471138314666, + "learning_rate": 2.672644286370163e-07, + "loss": 0.1761, + "step": 19726 + }, + { + "epoch": 2.7855125670714487, + "grad_norm": 3.1227629959426206, + "learning_rate": 2.669144528998213e-07, + "loss": 0.1436, + "step": 19727 + }, + { + "epoch": 2.7856537701214346, + "grad_norm": 3.386613925536485, + "learning_rate": 2.665647033553309e-07, + "loss": 0.1642, + "step": 19728 + }, + { + "epoch": 2.7857949731714204, + "grad_norm": 2.527652899813678, + "learning_rate": 2.662151800116741e-07, + "loss": 0.1288, + "step": 19729 + }, + { + "epoch": 2.7859361762214063, + "grad_norm": 3.8872211458799413, + "learning_rate": 2.6586588287697114e-07, + "loss": 0.1523, + "step": 19730 + }, + { + "epoch": 2.786077379271392, + "grad_norm": 2.9889262537859183, + "learning_rate": 2.655168119593421e-07, + "loss": 0.1405, + "step": 19731 + }, + { + "epoch": 2.786218582321378, + "grad_norm": 2.5078425801655095, + "learning_rate": 2.651679672668983e-07, + "loss": 0.1147, + "step": 19732 + }, + { + "epoch": 2.786359785371364, + "grad_norm": 2.5971809792033573, + "learning_rate": 2.6481934880774663e-07, + "loss": 0.115, + "step": 19733 + }, + { + "epoch": 2.78650098842135, + "grad_norm": 2.985195835689216, + "learning_rate": 2.6447095658999054e-07, + "loss": 0.1462, + "step": 19734 + }, + { + "epoch": 2.7866421914713357, + "grad_norm": 3.212421410801023, + "learning_rate": 2.641227906217225e-07, + "loss": 0.1547, + "step": 19735 + }, + { + "epoch": 2.7867833945213216, + "grad_norm": 4.748936257900525, + "learning_rate": 2.6377485091103825e-07, + "loss": 0.1967, + "step": 19736 + }, + { + "epoch": 2.7869245975713075, + "grad_norm": 3.8002290377331396, + "learning_rate": 2.6342713746602023e-07, + "loss": 0.1831, + "step": 19737 + }, + { + "epoch": 2.7870658006212934, + "grad_norm": 2.360553086865986, + "learning_rate": 2.630796502947519e-07, + "loss": 0.0976, + "step": 19738 + }, + { + "epoch": 2.7872070036712793, + "grad_norm": 3.6785745156118037, + "learning_rate": 2.6273238940530686e-07, + "loss": 0.1779, + "step": 19739 + }, + { + "epoch": 2.787348206721265, + "grad_norm": 3.7272884658778564, + "learning_rate": 2.6238535480575533e-07, + "loss": 0.1956, + "step": 19740 + }, + { + "epoch": 2.787489409771251, + "grad_norm": 3.4420236919899003, + "learning_rate": 2.6203854650416307e-07, + "loss": 0.1869, + "step": 19741 + }, + { + "epoch": 2.787630612821237, + "grad_norm": 3.0041869355223816, + "learning_rate": 2.616919645085902e-07, + "loss": 0.144, + "step": 19742 + }, + { + "epoch": 2.787771815871223, + "grad_norm": 3.2824009915361936, + "learning_rate": 2.613456088270894e-07, + "loss": 0.1551, + "step": 19743 + }, + { + "epoch": 2.7879130189212087, + "grad_norm": 3.48185830588085, + "learning_rate": 2.609994794677118e-07, + "loss": 0.1795, + "step": 19744 + }, + { + "epoch": 2.7880542219711946, + "grad_norm": 3.52228696731064, + "learning_rate": 2.6065357643849985e-07, + "loss": 0.1428, + "step": 19745 + }, + { + "epoch": 2.7881954250211805, + "grad_norm": 4.370579440122452, + "learning_rate": 2.6030789974749285e-07, + "loss": 0.1796, + "step": 19746 + }, + { + "epoch": 2.7883366280711663, + "grad_norm": 2.926163528297969, + "learning_rate": 2.599624494027231e-07, + "loss": 0.1345, + "step": 19747 + }, + { + "epoch": 2.788477831121152, + "grad_norm": 3.5434999724346676, + "learning_rate": 2.596172254122209e-07, + "loss": 0.1573, + "step": 19748 + }, + { + "epoch": 2.788619034171138, + "grad_norm": 2.9469587234222234, + "learning_rate": 2.592722277840065e-07, + "loss": 0.1059, + "step": 19749 + }, + { + "epoch": 2.788760237221124, + "grad_norm": 2.884543768726291, + "learning_rate": 2.589274565261002e-07, + "loss": 0.1253, + "step": 19750 + }, + { + "epoch": 2.78890144027111, + "grad_norm": 3.6432340478225904, + "learning_rate": 2.585829116465133e-07, + "loss": 0.1821, + "step": 19751 + }, + { + "epoch": 2.7890426433210957, + "grad_norm": 3.905786455544547, + "learning_rate": 2.582385931532505e-07, + "loss": 0.1271, + "step": 19752 + }, + { + "epoch": 2.7891838463710816, + "grad_norm": 3.3668998324564345, + "learning_rate": 2.578945010543177e-07, + "loss": 0.1415, + "step": 19753 + }, + { + "epoch": 2.7893250494210675, + "grad_norm": 3.6171165129076623, + "learning_rate": 2.575506353577084e-07, + "loss": 0.1754, + "step": 19754 + }, + { + "epoch": 2.7894662524710534, + "grad_norm": 3.558362181852943, + "learning_rate": 2.5720699607141517e-07, + "loss": 0.1525, + "step": 19755 + }, + { + "epoch": 2.7896074555210393, + "grad_norm": 2.6752172064437993, + "learning_rate": 2.5686358320342387e-07, + "loss": 0.1303, + "step": 19756 + }, + { + "epoch": 2.789748658571025, + "grad_norm": 3.221259491270782, + "learning_rate": 2.565203967617147e-07, + "loss": 0.1756, + "step": 19757 + }, + { + "epoch": 2.789889861621011, + "grad_norm": 2.8220728264288333, + "learning_rate": 2.5617743675426354e-07, + "loss": 0.1119, + "step": 19758 + }, + { + "epoch": 2.790031064670997, + "grad_norm": 3.078113985719411, + "learning_rate": 2.558347031890418e-07, + "loss": 0.1381, + "step": 19759 + }, + { + "epoch": 2.790172267720983, + "grad_norm": 2.861955372763654, + "learning_rate": 2.55492196074012e-07, + "loss": 0.1415, + "step": 19760 + }, + { + "epoch": 2.7903134707709687, + "grad_norm": 3.557964171112699, + "learning_rate": 2.5514991541713664e-07, + "loss": 0.1746, + "step": 19761 + }, + { + "epoch": 2.7904546738209546, + "grad_norm": 3.5988090245121094, + "learning_rate": 2.5480786122636713e-07, + "loss": 0.1626, + "step": 19762 + }, + { + "epoch": 2.7905958768709405, + "grad_norm": 3.096196558917275, + "learning_rate": 2.5446603350965606e-07, + "loss": 0.1145, + "step": 19763 + }, + { + "epoch": 2.7907370799209263, + "grad_norm": 3.218223223297159, + "learning_rate": 2.5412443227494365e-07, + "loss": 0.1576, + "step": 19764 + }, + { + "epoch": 2.7908782829709122, + "grad_norm": 3.195011441759945, + "learning_rate": 2.537830575301714e-07, + "loss": 0.1454, + "step": 19765 + }, + { + "epoch": 2.791019486020898, + "grad_norm": 3.2710454837699676, + "learning_rate": 2.534419092832718e-07, + "loss": 0.1309, + "step": 19766 + }, + { + "epoch": 2.791160689070884, + "grad_norm": 4.089959851962814, + "learning_rate": 2.531009875421731e-07, + "loss": 0.1627, + "step": 19767 + }, + { + "epoch": 2.79130189212087, + "grad_norm": 2.48870334664276, + "learning_rate": 2.527602923147998e-07, + "loss": 0.1089, + "step": 19768 + }, + { + "epoch": 2.7914430951708558, + "grad_norm": 3.868846461620955, + "learning_rate": 2.524198236090658e-07, + "loss": 0.1848, + "step": 19769 + }, + { + "epoch": 2.7915842982208416, + "grad_norm": 2.941945756781059, + "learning_rate": 2.520795814328847e-07, + "loss": 0.1099, + "step": 19770 + }, + { + "epoch": 2.7917255012708275, + "grad_norm": 3.0437126894051496, + "learning_rate": 2.517395657941657e-07, + "loss": 0.1423, + "step": 19771 + }, + { + "epoch": 2.7918667043208134, + "grad_norm": 3.416545823309591, + "learning_rate": 2.513997767008092e-07, + "loss": 0.1611, + "step": 19772 + }, + { + "epoch": 2.7920079073707993, + "grad_norm": 3.4982600948230176, + "learning_rate": 2.51060214160711e-07, + "loss": 0.1635, + "step": 19773 + }, + { + "epoch": 2.792149110420785, + "grad_norm": 3.4204796345548076, + "learning_rate": 2.507208781817638e-07, + "loss": 0.158, + "step": 19774 + }, + { + "epoch": 2.792290313470771, + "grad_norm": 2.7485241680568326, + "learning_rate": 2.503817687718535e-07, + "loss": 0.0979, + "step": 19775 + }, + { + "epoch": 2.792431516520757, + "grad_norm": 3.274028409615973, + "learning_rate": 2.500428859388593e-07, + "loss": 0.1522, + "step": 19776 + }, + { + "epoch": 2.792572719570743, + "grad_norm": 2.910238675797789, + "learning_rate": 2.4970422969065823e-07, + "loss": 0.1356, + "step": 19777 + }, + { + "epoch": 2.7927139226207287, + "grad_norm": 3.7489227791941104, + "learning_rate": 2.4936580003512066e-07, + "loss": 0.1631, + "step": 19778 + }, + { + "epoch": 2.7928551256707146, + "grad_norm": 2.624590953545149, + "learning_rate": 2.4902759698011036e-07, + "loss": 0.0993, + "step": 19779 + }, + { + "epoch": 2.7929963287207005, + "grad_norm": 3.196320439324553, + "learning_rate": 2.4868962053348764e-07, + "loss": 0.133, + "step": 19780 + }, + { + "epoch": 2.7931375317706864, + "grad_norm": 2.543056808495935, + "learning_rate": 2.483518707031063e-07, + "loss": 0.1097, + "step": 19781 + }, + { + "epoch": 2.7932787348206722, + "grad_norm": 3.2965514813937213, + "learning_rate": 2.4801434749681553e-07, + "loss": 0.1644, + "step": 19782 + }, + { + "epoch": 2.793419937870658, + "grad_norm": 2.6367521778865495, + "learning_rate": 2.476770509224613e-07, + "loss": 0.1171, + "step": 19783 + }, + { + "epoch": 2.793561140920644, + "grad_norm": 2.8136171927157743, + "learning_rate": 2.473399809878807e-07, + "loss": 0.0926, + "step": 19784 + }, + { + "epoch": 2.79370234397063, + "grad_norm": 2.587668848106781, + "learning_rate": 2.4700313770090745e-07, + "loss": 0.0886, + "step": 19785 + }, + { + "epoch": 2.7938435470206158, + "grad_norm": 3.080842706029899, + "learning_rate": 2.466665210693686e-07, + "loss": 0.1358, + "step": 19786 + }, + { + "epoch": 2.7939847500706017, + "grad_norm": 3.0970160914555698, + "learning_rate": 2.463301311010857e-07, + "loss": 0.1641, + "step": 19787 + }, + { + "epoch": 2.7941259531205875, + "grad_norm": 4.084603500357253, + "learning_rate": 2.459939678038803e-07, + "loss": 0.1877, + "step": 19788 + }, + { + "epoch": 2.7942671561705734, + "grad_norm": 3.0875846681330144, + "learning_rate": 2.4565803118556273e-07, + "loss": 0.1621, + "step": 19789 + }, + { + "epoch": 2.7944083592205593, + "grad_norm": 2.904387360432018, + "learning_rate": 2.453223212539391e-07, + "loss": 0.137, + "step": 19790 + }, + { + "epoch": 2.794549562270545, + "grad_norm": 2.890734556719952, + "learning_rate": 2.44986838016813e-07, + "loss": 0.1404, + "step": 19791 + }, + { + "epoch": 2.794690765320531, + "grad_norm": 2.9278152837346214, + "learning_rate": 2.446515814819794e-07, + "loss": 0.144, + "step": 19792 + }, + { + "epoch": 2.794831968370517, + "grad_norm": 2.575229335020528, + "learning_rate": 2.443165516572299e-07, + "loss": 0.1222, + "step": 19793 + }, + { + "epoch": 2.794973171420503, + "grad_norm": 3.0127385635915798, + "learning_rate": 2.4398174855035037e-07, + "loss": 0.1459, + "step": 19794 + }, + { + "epoch": 2.7951143744704887, + "grad_norm": 2.996121451302781, + "learning_rate": 2.4364717216912246e-07, + "loss": 0.1458, + "step": 19795 + }, + { + "epoch": 2.7952555775204746, + "grad_norm": 3.6520492088059693, + "learning_rate": 2.4331282252132103e-07, + "loss": 0.1478, + "step": 19796 + }, + { + "epoch": 2.7953967805704605, + "grad_norm": 2.779089676498598, + "learning_rate": 2.4297869961471544e-07, + "loss": 0.1359, + "step": 19797 + }, + { + "epoch": 2.7955379836204464, + "grad_norm": 2.804376087681676, + "learning_rate": 2.4264480345707053e-07, + "loss": 0.132, + "step": 19798 + }, + { + "epoch": 2.795679186670432, + "grad_norm": 2.8383130752549524, + "learning_rate": 2.4231113405614684e-07, + "loss": 0.112, + "step": 19799 + }, + { + "epoch": 2.7958203897204177, + "grad_norm": 2.908452757002246, + "learning_rate": 2.419776914196981e-07, + "loss": 0.1513, + "step": 19800 + }, + { + "epoch": 2.7959615927704036, + "grad_norm": 3.2273178864815106, + "learning_rate": 2.4164447555547475e-07, + "loss": 0.1378, + "step": 19801 + }, + { + "epoch": 2.7961027958203895, + "grad_norm": 3.9839313726308414, + "learning_rate": 2.413114864712196e-07, + "loss": 0.1882, + "step": 19802 + }, + { + "epoch": 2.7962439988703753, + "grad_norm": 3.291626911587527, + "learning_rate": 2.4097872417467085e-07, + "loss": 0.1711, + "step": 19803 + }, + { + "epoch": 2.796385201920361, + "grad_norm": 2.578288245895344, + "learning_rate": 2.4064618867356003e-07, + "loss": 0.1164, + "step": 19804 + }, + { + "epoch": 2.796526404970347, + "grad_norm": 3.3662448965881757, + "learning_rate": 2.4031387997561885e-07, + "loss": 0.176, + "step": 19805 + }, + { + "epoch": 2.796667608020333, + "grad_norm": 3.515644057236048, + "learning_rate": 2.399817980885677e-07, + "loss": 0.1685, + "step": 19806 + }, + { + "epoch": 2.796808811070319, + "grad_norm": 3.405302845096384, + "learning_rate": 2.396499430201249e-07, + "loss": 0.1598, + "step": 19807 + }, + { + "epoch": 2.7969500141203048, + "grad_norm": 2.8651385750390284, + "learning_rate": 2.3931831477800207e-07, + "loss": 0.0948, + "step": 19808 + }, + { + "epoch": 2.7970912171702906, + "grad_norm": 3.66639107502556, + "learning_rate": 2.389869133699063e-07, + "loss": 0.1405, + "step": 19809 + }, + { + "epoch": 2.7972324202202765, + "grad_norm": 2.759372017256367, + "learning_rate": 2.3865573880353933e-07, + "loss": 0.1373, + "step": 19810 + }, + { + "epoch": 2.7973736232702624, + "grad_norm": 4.238204390451638, + "learning_rate": 2.3832479108659712e-07, + "loss": 0.2025, + "step": 19811 + }, + { + "epoch": 2.7975148263202483, + "grad_norm": 3.0515705610389485, + "learning_rate": 2.3799407022677022e-07, + "loss": 0.1355, + "step": 19812 + }, + { + "epoch": 2.797656029370234, + "grad_norm": 3.253519447937637, + "learning_rate": 2.3766357623174697e-07, + "loss": 0.1332, + "step": 19813 + }, + { + "epoch": 2.79779723242022, + "grad_norm": 2.7405457744602217, + "learning_rate": 2.373333091092056e-07, + "loss": 0.1387, + "step": 19814 + }, + { + "epoch": 2.797938435470206, + "grad_norm": 2.4886015473770624, + "learning_rate": 2.3700326886682113e-07, + "loss": 0.0992, + "step": 19815 + }, + { + "epoch": 2.798079638520192, + "grad_norm": 3.2068818422431193, + "learning_rate": 2.3667345551226406e-07, + "loss": 0.1491, + "step": 19816 + }, + { + "epoch": 2.7982208415701777, + "grad_norm": 3.1164620004700785, + "learning_rate": 2.3634386905320051e-07, + "loss": 0.1107, + "step": 19817 + }, + { + "epoch": 2.7983620446201636, + "grad_norm": 3.421131878222402, + "learning_rate": 2.3601450949728876e-07, + "loss": 0.1988, + "step": 19818 + }, + { + "epoch": 2.7985032476701495, + "grad_norm": 3.0871150932237823, + "learning_rate": 2.3568537685218386e-07, + "loss": 0.1577, + "step": 19819 + }, + { + "epoch": 2.7986444507201353, + "grad_norm": 2.9761272613025387, + "learning_rate": 2.3535647112553295e-07, + "loss": 0.1227, + "step": 19820 + }, + { + "epoch": 2.7987856537701212, + "grad_norm": 4.20404782126191, + "learning_rate": 2.3502779232497996e-07, + "loss": 0.1745, + "step": 19821 + }, + { + "epoch": 2.798926856820107, + "grad_norm": 2.863490475552275, + "learning_rate": 2.3469934045816435e-07, + "loss": 0.1136, + "step": 19822 + }, + { + "epoch": 2.799068059870093, + "grad_norm": 3.8291560660389274, + "learning_rate": 2.3437111553271884e-07, + "loss": 0.1959, + "step": 19823 + }, + { + "epoch": 2.799209262920079, + "grad_norm": 2.616143584175229, + "learning_rate": 2.3404311755627184e-07, + "loss": 0.1214, + "step": 19824 + }, + { + "epoch": 2.7993504659700648, + "grad_norm": 3.599342567482938, + "learning_rate": 2.33715346536445e-07, + "loss": 0.1568, + "step": 19825 + }, + { + "epoch": 2.7994916690200506, + "grad_norm": 2.3097396507714647, + "learning_rate": 2.3338780248085557e-07, + "loss": 0.086, + "step": 19826 + }, + { + "epoch": 2.7996328720700365, + "grad_norm": 3.3857436165788224, + "learning_rate": 2.3306048539711523e-07, + "loss": 0.1563, + "step": 19827 + }, + { + "epoch": 2.7997740751200224, + "grad_norm": 3.0497383476757154, + "learning_rate": 2.3273339529283123e-07, + "loss": 0.1321, + "step": 19828 + }, + { + "epoch": 2.7999152781700083, + "grad_norm": 2.549700587600999, + "learning_rate": 2.3240653217560528e-07, + "loss": 0.1072, + "step": 19829 + }, + { + "epoch": 2.800056481219994, + "grad_norm": 3.202581099256297, + "learning_rate": 2.320798960530335e-07, + "loss": 0.133, + "step": 19830 + }, + { + "epoch": 2.80019768426998, + "grad_norm": 3.3826781173382128, + "learning_rate": 2.317534869327065e-07, + "loss": 0.1503, + "step": 19831 + }, + { + "epoch": 2.800338887319966, + "grad_norm": 2.8956930173423774, + "learning_rate": 2.314273048222093e-07, + "loss": 0.104, + "step": 19832 + }, + { + "epoch": 2.800480090369952, + "grad_norm": 2.9903381989421773, + "learning_rate": 2.3110134972912257e-07, + "loss": 0.1129, + "step": 19833 + }, + { + "epoch": 2.8006212934199377, + "grad_norm": 2.882903487631556, + "learning_rate": 2.307756216610224e-07, + "loss": 0.1179, + "step": 19834 + }, + { + "epoch": 2.8007624964699236, + "grad_norm": 2.8401168358658, + "learning_rate": 2.3045012062547723e-07, + "loss": 0.1051, + "step": 19835 + }, + { + "epoch": 2.8009036995199095, + "grad_norm": 3.371857572473701, + "learning_rate": 2.301248466300543e-07, + "loss": 0.156, + "step": 19836 + }, + { + "epoch": 2.8010449025698954, + "grad_norm": 3.096511181361602, + "learning_rate": 2.297997996823087e-07, + "loss": 0.1321, + "step": 19837 + }, + { + "epoch": 2.8011861056198812, + "grad_norm": 2.812072138075477, + "learning_rate": 2.294749797897955e-07, + "loss": 0.1288, + "step": 19838 + }, + { + "epoch": 2.801327308669867, + "grad_norm": 3.84347055311232, + "learning_rate": 2.2915038696006532e-07, + "loss": 0.1884, + "step": 19839 + }, + { + "epoch": 2.801468511719853, + "grad_norm": 2.8362311181094486, + "learning_rate": 2.288260212006599e-07, + "loss": 0.1132, + "step": 19840 + }, + { + "epoch": 2.801609714769839, + "grad_norm": 3.2495479260667572, + "learning_rate": 2.2850188251911877e-07, + "loss": 0.1335, + "step": 19841 + }, + { + "epoch": 2.8017509178198248, + "grad_norm": 3.120511188023168, + "learning_rate": 2.2817797092297256e-07, + "loss": 0.1088, + "step": 19842 + }, + { + "epoch": 2.8018921208698107, + "grad_norm": 3.9133807311803337, + "learning_rate": 2.2785428641975194e-07, + "loss": 0.1324, + "step": 19843 + }, + { + "epoch": 2.8020333239197965, + "grad_norm": 2.779194699909916, + "learning_rate": 2.2753082901697644e-07, + "loss": 0.1204, + "step": 19844 + }, + { + "epoch": 2.8021745269697824, + "grad_norm": 2.485940268657665, + "learning_rate": 2.2720759872216446e-07, + "loss": 0.1177, + "step": 19845 + }, + { + "epoch": 2.8023157300197683, + "grad_norm": 2.9430383261182302, + "learning_rate": 2.2688459554282673e-07, + "loss": 0.1229, + "step": 19846 + }, + { + "epoch": 2.802456933069754, + "grad_norm": 3.1661517946768813, + "learning_rate": 2.265618194864705e-07, + "loss": 0.1504, + "step": 19847 + }, + { + "epoch": 2.80259813611974, + "grad_norm": 3.3488258924551726, + "learning_rate": 2.2623927056059647e-07, + "loss": 0.191, + "step": 19848 + }, + { + "epoch": 2.802739339169726, + "grad_norm": 2.8219201459371326, + "learning_rate": 2.25916948772702e-07, + "loss": 0.1262, + "step": 19849 + }, + { + "epoch": 2.802880542219712, + "grad_norm": 2.7883782414304505, + "learning_rate": 2.2559485413027438e-07, + "loss": 0.1446, + "step": 19850 + }, + { + "epoch": 2.8030217452696977, + "grad_norm": 3.1969355966536135, + "learning_rate": 2.2527298664080323e-07, + "loss": 0.1575, + "step": 19851 + }, + { + "epoch": 2.8031629483196836, + "grad_norm": 2.9628528182886025, + "learning_rate": 2.2495134631176585e-07, + "loss": 0.1231, + "step": 19852 + }, + { + "epoch": 2.8033041513696695, + "grad_norm": 2.990317245860659, + "learning_rate": 2.2462993315063853e-07, + "loss": 0.1472, + "step": 19853 + }, + { + "epoch": 2.8034453544196554, + "grad_norm": 3.5269220350115624, + "learning_rate": 2.243087471648886e-07, + "loss": 0.1595, + "step": 19854 + }, + { + "epoch": 2.8035865574696412, + "grad_norm": 3.646376824355197, + "learning_rate": 2.239877883619812e-07, + "loss": 0.1349, + "step": 19855 + }, + { + "epoch": 2.803727760519627, + "grad_norm": 3.567234596145686, + "learning_rate": 2.2366705674937596e-07, + "loss": 0.1713, + "step": 19856 + }, + { + "epoch": 2.803868963569613, + "grad_norm": 3.2875378059391758, + "learning_rate": 2.2334655233452683e-07, + "loss": 0.1633, + "step": 19857 + }, + { + "epoch": 2.804010166619599, + "grad_norm": 3.33718009344948, + "learning_rate": 2.230262751248813e-07, + "loss": 0.1494, + "step": 19858 + }, + { + "epoch": 2.804151369669585, + "grad_norm": 3.2532603283583263, + "learning_rate": 2.2270622512788332e-07, + "loss": 0.1687, + "step": 19859 + }, + { + "epoch": 2.8042925727195707, + "grad_norm": 3.4247493075007176, + "learning_rate": 2.2238640235097032e-07, + "loss": 0.1756, + "step": 19860 + }, + { + "epoch": 2.8044337757695565, + "grad_norm": 3.053248062521965, + "learning_rate": 2.220668068015741e-07, + "loss": 0.1334, + "step": 19861 + }, + { + "epoch": 2.8045749788195424, + "grad_norm": 2.7526404970463307, + "learning_rate": 2.217474384871221e-07, + "loss": 0.1331, + "step": 19862 + }, + { + "epoch": 2.8047161818695283, + "grad_norm": 3.6330934016895546, + "learning_rate": 2.2142829741503723e-07, + "loss": 0.2129, + "step": 19863 + }, + { + "epoch": 2.804857384919514, + "grad_norm": 3.1718699210032577, + "learning_rate": 2.2110938359273583e-07, + "loss": 0.1675, + "step": 19864 + }, + { + "epoch": 2.8049985879695, + "grad_norm": 2.914452023842297, + "learning_rate": 2.2079069702762968e-07, + "loss": 0.1333, + "step": 19865 + }, + { + "epoch": 2.805139791019486, + "grad_norm": 3.6591772853598226, + "learning_rate": 2.20472237727124e-07, + "loss": 0.1993, + "step": 19866 + }, + { + "epoch": 2.805280994069472, + "grad_norm": 3.3520677607277265, + "learning_rate": 2.2015400569861845e-07, + "loss": 0.1826, + "step": 19867 + }, + { + "epoch": 2.8054221971194577, + "grad_norm": 3.1360796626234215, + "learning_rate": 2.1983600094951153e-07, + "loss": 0.1073, + "step": 19868 + }, + { + "epoch": 2.8055634001694436, + "grad_norm": 2.602562423349869, + "learning_rate": 2.1951822348719287e-07, + "loss": 0.1328, + "step": 19869 + }, + { + "epoch": 2.8057046032194295, + "grad_norm": 3.0119026707439684, + "learning_rate": 2.192006733190466e-07, + "loss": 0.1112, + "step": 19870 + }, + { + "epoch": 2.8058458062694154, + "grad_norm": 3.6665006011981482, + "learning_rate": 2.1888335045245235e-07, + "loss": 0.1696, + "step": 19871 + }, + { + "epoch": 2.8059870093194013, + "grad_norm": 3.0730992035101186, + "learning_rate": 2.1856625489478532e-07, + "loss": 0.1409, + "step": 19872 + }, + { + "epoch": 2.806128212369387, + "grad_norm": 2.7664630164233595, + "learning_rate": 2.18249386653413e-07, + "loss": 0.1125, + "step": 19873 + }, + { + "epoch": 2.806269415419373, + "grad_norm": 3.1220354743612138, + "learning_rate": 2.1793274573570166e-07, + "loss": 0.1158, + "step": 19874 + }, + { + "epoch": 2.806410618469359, + "grad_norm": 4.032470883426098, + "learning_rate": 2.1761633214900767e-07, + "loss": 0.1751, + "step": 19875 + }, + { + "epoch": 2.806551821519345, + "grad_norm": 3.6935241113745283, + "learning_rate": 2.1730014590068625e-07, + "loss": 0.1598, + "step": 19876 + }, + { + "epoch": 2.8066930245693307, + "grad_norm": 3.342671238861575, + "learning_rate": 2.1698418699808488e-07, + "loss": 0.1451, + "step": 19877 + }, + { + "epoch": 2.8068342276193166, + "grad_norm": 3.5874192380057206, + "learning_rate": 2.1666845544854542e-07, + "loss": 0.1937, + "step": 19878 + }, + { + "epoch": 2.8069754306693024, + "grad_norm": 2.895591784977058, + "learning_rate": 2.1635295125940647e-07, + "loss": 0.1683, + "step": 19879 + }, + { + "epoch": 2.8071166337192883, + "grad_norm": 2.871996802385165, + "learning_rate": 2.1603767443799994e-07, + "loss": 0.1247, + "step": 19880 + }, + { + "epoch": 2.807257836769274, + "grad_norm": 2.8136607588017113, + "learning_rate": 2.157226249916522e-07, + "loss": 0.1244, + "step": 19881 + }, + { + "epoch": 2.80739903981926, + "grad_norm": 3.4856417171993717, + "learning_rate": 2.1540780292768516e-07, + "loss": 0.1424, + "step": 19882 + }, + { + "epoch": 2.807540242869246, + "grad_norm": 2.8603449844994, + "learning_rate": 2.1509320825341407e-07, + "loss": 0.1029, + "step": 19883 + }, + { + "epoch": 2.807681445919232, + "grad_norm": 3.23413525380374, + "learning_rate": 2.1477884097615308e-07, + "loss": 0.1595, + "step": 19884 + }, + { + "epoch": 2.8078226489692177, + "grad_norm": 3.154878024422754, + "learning_rate": 2.1446470110320306e-07, + "loss": 0.1178, + "step": 19885 + }, + { + "epoch": 2.8079638520192036, + "grad_norm": 2.981853921166106, + "learning_rate": 2.1415078864187034e-07, + "loss": 0.1112, + "step": 19886 + }, + { + "epoch": 2.8081050550691895, + "grad_norm": 3.6848725420251958, + "learning_rate": 2.1383710359944576e-07, + "loss": 0.1467, + "step": 19887 + }, + { + "epoch": 2.8082462581191754, + "grad_norm": 3.451982914028179, + "learning_rate": 2.135236459832213e-07, + "loss": 0.1407, + "step": 19888 + }, + { + "epoch": 2.8083874611691613, + "grad_norm": 2.908639996240426, + "learning_rate": 2.1321041580047997e-07, + "loss": 0.1331, + "step": 19889 + }, + { + "epoch": 2.808528664219147, + "grad_norm": 3.651446783025837, + "learning_rate": 2.1289741305850154e-07, + "loss": 0.1517, + "step": 19890 + }, + { + "epoch": 2.808669867269133, + "grad_norm": 3.874647278281282, + "learning_rate": 2.125846377645613e-07, + "loss": 0.1526, + "step": 19891 + }, + { + "epoch": 2.808811070319119, + "grad_norm": 3.1391740039980895, + "learning_rate": 2.1227208992592675e-07, + "loss": 0.1464, + "step": 19892 + }, + { + "epoch": 2.808952273369105, + "grad_norm": 2.563789612976445, + "learning_rate": 2.119597695498621e-07, + "loss": 0.1239, + "step": 19893 + }, + { + "epoch": 2.8090934764190907, + "grad_norm": 3.059663121316726, + "learning_rate": 2.1164767664362485e-07, + "loss": 0.1217, + "step": 19894 + }, + { + "epoch": 2.8092346794690766, + "grad_norm": 4.673663293850825, + "learning_rate": 2.1133581121446923e-07, + "loss": 0.2062, + "step": 19895 + }, + { + "epoch": 2.8093758825190625, + "grad_norm": 2.9714692210771, + "learning_rate": 2.1102417326964165e-07, + "loss": 0.1206, + "step": 19896 + }, + { + "epoch": 2.8095170855690483, + "grad_norm": 2.7546180952924124, + "learning_rate": 2.107127628163852e-07, + "loss": 0.1096, + "step": 19897 + }, + { + "epoch": 2.809658288619034, + "grad_norm": 3.4120931958782146, + "learning_rate": 2.104015798619352e-07, + "loss": 0.141, + "step": 19898 + }, + { + "epoch": 2.80979949166902, + "grad_norm": 2.599002414194393, + "learning_rate": 2.100906244135259e-07, + "loss": 0.1183, + "step": 19899 + }, + { + "epoch": 2.809940694719006, + "grad_norm": 2.656151464727653, + "learning_rate": 2.097798964783826e-07, + "loss": 0.1308, + "step": 19900 + }, + { + "epoch": 2.810081897768992, + "grad_norm": 3.3497727102697366, + "learning_rate": 2.0946939606372508e-07, + "loss": 0.1481, + "step": 19901 + }, + { + "epoch": 2.8102231008189777, + "grad_norm": 2.9843185994000065, + "learning_rate": 2.091591231767709e-07, + "loss": 0.1192, + "step": 19902 + }, + { + "epoch": 2.8103643038689636, + "grad_norm": 3.5992234484189716, + "learning_rate": 2.0884907782473206e-07, + "loss": 0.1402, + "step": 19903 + }, + { + "epoch": 2.8105055069189495, + "grad_norm": 2.7362378711712316, + "learning_rate": 2.085392600148106e-07, + "loss": 0.1113, + "step": 19904 + }, + { + "epoch": 2.8106467099689354, + "grad_norm": 2.173603154661741, + "learning_rate": 2.0822966975420856e-07, + "loss": 0.082, + "step": 19905 + }, + { + "epoch": 2.8107879130189213, + "grad_norm": 3.162703552957746, + "learning_rate": 2.0792030705012013e-07, + "loss": 0.1067, + "step": 19906 + }, + { + "epoch": 2.810929116068907, + "grad_norm": 2.67596595555186, + "learning_rate": 2.07611171909734e-07, + "loss": 0.1139, + "step": 19907 + }, + { + "epoch": 2.811070319118893, + "grad_norm": 4.15354068342097, + "learning_rate": 2.0730226434023671e-07, + "loss": 0.1649, + "step": 19908 + }, + { + "epoch": 2.811211522168879, + "grad_norm": 3.2557454396503926, + "learning_rate": 2.0699358434880468e-07, + "loss": 0.146, + "step": 19909 + }, + { + "epoch": 2.811352725218865, + "grad_norm": 3.750295435886425, + "learning_rate": 2.066851319426133e-07, + "loss": 0.1213, + "step": 19910 + }, + { + "epoch": 2.8114939282688507, + "grad_norm": 3.1293551892849027, + "learning_rate": 2.063769071288302e-07, + "loss": 0.1326, + "step": 19911 + }, + { + "epoch": 2.8116351313188366, + "grad_norm": 3.3683994362502583, + "learning_rate": 2.0606890991461737e-07, + "loss": 0.1281, + "step": 19912 + }, + { + "epoch": 2.8117763343688225, + "grad_norm": 3.0332567748548036, + "learning_rate": 2.0576114030713355e-07, + "loss": 0.1489, + "step": 19913 + }, + { + "epoch": 2.8119175374188083, + "grad_norm": 3.187496520032008, + "learning_rate": 2.0545359831353195e-07, + "loss": 0.1371, + "step": 19914 + }, + { + "epoch": 2.8120587404687942, + "grad_norm": 3.43973389191249, + "learning_rate": 2.051462839409579e-07, + "loss": 0.1276, + "step": 19915 + }, + { + "epoch": 2.81219994351878, + "grad_norm": 2.95050164583715, + "learning_rate": 2.0483919719655466e-07, + "loss": 0.1272, + "step": 19916 + }, + { + "epoch": 2.812341146568766, + "grad_norm": 3.4233778234758985, + "learning_rate": 2.0453233808745753e-07, + "loss": 0.1609, + "step": 19917 + }, + { + "epoch": 2.812482349618752, + "grad_norm": 3.7242376893870577, + "learning_rate": 2.0422570662079866e-07, + "loss": 0.1159, + "step": 19918 + }, + { + "epoch": 2.8126235526687378, + "grad_norm": 3.1273162896012274, + "learning_rate": 2.0391930280370342e-07, + "loss": 0.1471, + "step": 19919 + }, + { + "epoch": 2.8127647557187236, + "grad_norm": 4.569217867324224, + "learning_rate": 2.0361312664329502e-07, + "loss": 0.195, + "step": 19920 + }, + { + "epoch": 2.8129059587687095, + "grad_norm": 3.297864843268361, + "learning_rate": 2.0330717814668556e-07, + "loss": 0.1237, + "step": 19921 + }, + { + "epoch": 2.8130471618186954, + "grad_norm": 3.4029588593975717, + "learning_rate": 2.0300145732098596e-07, + "loss": 0.1468, + "step": 19922 + }, + { + "epoch": 2.8131883648686813, + "grad_norm": 2.474800549439491, + "learning_rate": 2.0269596417330173e-07, + "loss": 0.1351, + "step": 19923 + }, + { + "epoch": 2.813329567918667, + "grad_norm": 4.469617574695108, + "learning_rate": 2.0239069871073157e-07, + "loss": 0.1597, + "step": 19924 + }, + { + "epoch": 2.813470770968653, + "grad_norm": 2.455344212099276, + "learning_rate": 2.0208566094037096e-07, + "loss": 0.0922, + "step": 19925 + }, + { + "epoch": 2.813611974018639, + "grad_norm": 3.4281699016310156, + "learning_rate": 2.0178085086930865e-07, + "loss": 0.1514, + "step": 19926 + }, + { + "epoch": 2.813753177068625, + "grad_norm": 2.9901803542713243, + "learning_rate": 2.0147626850462786e-07, + "loss": 0.1408, + "step": 19927 + }, + { + "epoch": 2.8138943801186107, + "grad_norm": 3.714182344893192, + "learning_rate": 2.0117191385340629e-07, + "loss": 0.1835, + "step": 19928 + }, + { + "epoch": 2.8140355831685966, + "grad_norm": 3.2533105363564605, + "learning_rate": 2.0086778692271824e-07, + "loss": 0.1854, + "step": 19929 + }, + { + "epoch": 2.8141767862185825, + "grad_norm": 2.9722172423195987, + "learning_rate": 2.005638877196303e-07, + "loss": 0.1303, + "step": 19930 + }, + { + "epoch": 2.8143179892685684, + "grad_norm": 3.009214846768815, + "learning_rate": 2.0026021625120574e-07, + "loss": 0.1419, + "step": 19931 + }, + { + "epoch": 2.8144591923185542, + "grad_norm": 3.047166019452896, + "learning_rate": 1.999567725245022e-07, + "loss": 0.1346, + "step": 19932 + }, + { + "epoch": 2.81460039536854, + "grad_norm": 2.599856736831499, + "learning_rate": 1.9965355654656958e-07, + "loss": 0.1123, + "step": 19933 + }, + { + "epoch": 2.814741598418526, + "grad_norm": 3.2746826505589475, + "learning_rate": 1.9935056832445676e-07, + "loss": 0.1641, + "step": 19934 + }, + { + "epoch": 2.814882801468512, + "grad_norm": 2.4700331236673487, + "learning_rate": 1.9904780786520473e-07, + "loss": 0.109, + "step": 19935 + }, + { + "epoch": 2.8150240045184978, + "grad_norm": 3.5553211471332715, + "learning_rate": 1.9874527517584784e-07, + "loss": 0.1588, + "step": 19936 + }, + { + "epoch": 2.8151652075684837, + "grad_norm": 3.1693383097547185, + "learning_rate": 1.984429702634194e-07, + "loss": 0.1302, + "step": 19937 + }, + { + "epoch": 2.8153064106184695, + "grad_norm": 3.4500170097762735, + "learning_rate": 1.9814089313494157e-07, + "loss": 0.1448, + "step": 19938 + }, + { + "epoch": 2.8154476136684554, + "grad_norm": 3.1163600923269374, + "learning_rate": 1.9783904379743758e-07, + "loss": 0.1551, + "step": 19939 + }, + { + "epoch": 2.8155888167184413, + "grad_norm": 3.8876502651382094, + "learning_rate": 1.975374222579207e-07, + "loss": 0.1477, + "step": 19940 + }, + { + "epoch": 2.815730019768427, + "grad_norm": 2.153131549769038, + "learning_rate": 1.9723602852339985e-07, + "loss": 0.0877, + "step": 19941 + }, + { + "epoch": 2.815871222818413, + "grad_norm": 3.0825470440176255, + "learning_rate": 1.9693486260088047e-07, + "loss": 0.1713, + "step": 19942 + }, + { + "epoch": 2.816012425868399, + "grad_norm": 2.7147099591208232, + "learning_rate": 1.9663392449736142e-07, + "loss": 0.1145, + "step": 19943 + }, + { + "epoch": 2.816153628918385, + "grad_norm": 3.3409853228136113, + "learning_rate": 1.9633321421983708e-07, + "loss": 0.1201, + "step": 19944 + }, + { + "epoch": 2.8162948319683707, + "grad_norm": 2.9415384879205098, + "learning_rate": 1.9603273177529415e-07, + "loss": 0.1027, + "step": 19945 + }, + { + "epoch": 2.8164360350183566, + "grad_norm": 3.106972323325981, + "learning_rate": 1.957324771707181e-07, + "loss": 0.1043, + "step": 19946 + }, + { + "epoch": 2.8165772380683425, + "grad_norm": 2.435429001964734, + "learning_rate": 1.9543245041308224e-07, + "loss": 0.1094, + "step": 19947 + }, + { + "epoch": 2.8167184411183284, + "grad_norm": 3.38095144185477, + "learning_rate": 1.9513265150936433e-07, + "loss": 0.1414, + "step": 19948 + }, + { + "epoch": 2.8168596441683142, + "grad_norm": 3.108336136514969, + "learning_rate": 1.948330804665277e-07, + "loss": 0.1648, + "step": 19949 + }, + { + "epoch": 2.8170008472183, + "grad_norm": 2.797628039061124, + "learning_rate": 1.945337372915368e-07, + "loss": 0.0963, + "step": 19950 + }, + { + "epoch": 2.817142050268286, + "grad_norm": 2.940999376394885, + "learning_rate": 1.9423462199134713e-07, + "loss": 0.1402, + "step": 19951 + }, + { + "epoch": 2.817283253318272, + "grad_norm": 2.9326721584824234, + "learning_rate": 1.9393573457290983e-07, + "loss": 0.1026, + "step": 19952 + }, + { + "epoch": 2.817424456368258, + "grad_norm": 3.2435673757005734, + "learning_rate": 1.9363707504317042e-07, + "loss": 0.135, + "step": 19953 + }, + { + "epoch": 2.8175656594182437, + "grad_norm": 2.627565942906979, + "learning_rate": 1.9333864340907116e-07, + "loss": 0.0947, + "step": 19954 + }, + { + "epoch": 2.8177068624682295, + "grad_norm": 2.495772579634133, + "learning_rate": 1.930404396775465e-07, + "loss": 0.1069, + "step": 19955 + }, + { + "epoch": 2.8178480655182154, + "grad_norm": 3.420042760600069, + "learning_rate": 1.9274246385552753e-07, + "loss": 0.1625, + "step": 19956 + }, + { + "epoch": 2.8179892685682013, + "grad_norm": 3.4156790617669177, + "learning_rate": 1.9244471594993652e-07, + "loss": 0.1384, + "step": 19957 + }, + { + "epoch": 2.818130471618187, + "grad_norm": 3.0881298321118016, + "learning_rate": 1.921471959676957e-07, + "loss": 0.1344, + "step": 19958 + }, + { + "epoch": 2.818271674668173, + "grad_norm": 2.7476251885555927, + "learning_rate": 1.9184990391571846e-07, + "loss": 0.1129, + "step": 19959 + }, + { + "epoch": 2.818412877718159, + "grad_norm": 3.066981159504285, + "learning_rate": 1.9155283980091366e-07, + "loss": 0.1121, + "step": 19960 + }, + { + "epoch": 2.818554080768145, + "grad_norm": 3.341191280030943, + "learning_rate": 1.9125600363018472e-07, + "loss": 0.1534, + "step": 19961 + }, + { + "epoch": 2.8186952838181307, + "grad_norm": 3.3361115946119204, + "learning_rate": 1.909593954104294e-07, + "loss": 0.1296, + "step": 19962 + }, + { + "epoch": 2.8188364868681166, + "grad_norm": 3.3250030270386564, + "learning_rate": 1.9066301514854334e-07, + "loss": 0.1436, + "step": 19963 + }, + { + "epoch": 2.8189776899181025, + "grad_norm": 2.8798322727909103, + "learning_rate": 1.9036686285141105e-07, + "loss": 0.1525, + "step": 19964 + }, + { + "epoch": 2.8191188929680884, + "grad_norm": 3.742796963550915, + "learning_rate": 1.9007093852591696e-07, + "loss": 0.1595, + "step": 19965 + }, + { + "epoch": 2.8192600960180743, + "grad_norm": 3.1610697184557837, + "learning_rate": 1.8977524217893782e-07, + "loss": 0.14, + "step": 19966 + }, + { + "epoch": 2.81940129906806, + "grad_norm": 3.6196421448622282, + "learning_rate": 1.8947977381734484e-07, + "loss": 0.1919, + "step": 19967 + }, + { + "epoch": 2.819542502118046, + "grad_norm": 3.5052864846912186, + "learning_rate": 1.891845334480058e-07, + "loss": 0.165, + "step": 19968 + }, + { + "epoch": 2.819683705168032, + "grad_norm": 2.801407731596878, + "learning_rate": 1.8888952107778081e-07, + "loss": 0.1134, + "step": 19969 + }, + { + "epoch": 2.8198249082180173, + "grad_norm": 2.429398685236878, + "learning_rate": 1.8859473671352546e-07, + "loss": 0.0971, + "step": 19970 + }, + { + "epoch": 2.8199661112680032, + "grad_norm": 3.1523193342893365, + "learning_rate": 1.8830018036209208e-07, + "loss": 0.1262, + "step": 19971 + }, + { + "epoch": 2.820107314317989, + "grad_norm": 3.3911042946535446, + "learning_rate": 1.8800585203032517e-07, + "loss": 0.1421, + "step": 19972 + }, + { + "epoch": 2.820248517367975, + "grad_norm": 2.844415942423072, + "learning_rate": 1.8771175172506484e-07, + "loss": 0.0994, + "step": 19973 + }, + { + "epoch": 2.820389720417961, + "grad_norm": 3.57009276235482, + "learning_rate": 1.874178794531456e-07, + "loss": 0.163, + "step": 19974 + }, + { + "epoch": 2.8205309234679468, + "grad_norm": 3.1666234009411003, + "learning_rate": 1.8712423522139756e-07, + "loss": 0.1636, + "step": 19975 + }, + { + "epoch": 2.8206721265179326, + "grad_norm": 2.8244028866461752, + "learning_rate": 1.86830819036643e-07, + "loss": 0.1118, + "step": 19976 + }, + { + "epoch": 2.8208133295679185, + "grad_norm": 3.1805826012913188, + "learning_rate": 1.865376309057032e-07, + "loss": 0.1148, + "step": 19977 + }, + { + "epoch": 2.8209545326179044, + "grad_norm": 4.116302741670571, + "learning_rate": 1.8624467083539154e-07, + "loss": 0.1609, + "step": 19978 + }, + { + "epoch": 2.8210957356678903, + "grad_norm": 2.7243995910849854, + "learning_rate": 1.8595193883251484e-07, + "loss": 0.0978, + "step": 19979 + }, + { + "epoch": 2.821236938717876, + "grad_norm": 2.2818341253199663, + "learning_rate": 1.8565943490387761e-07, + "loss": 0.0967, + "step": 19980 + }, + { + "epoch": 2.821378141767862, + "grad_norm": 2.912330235195069, + "learning_rate": 1.8536715905627445e-07, + "loss": 0.1362, + "step": 19981 + }, + { + "epoch": 2.821519344817848, + "grad_norm": 3.3738034268073913, + "learning_rate": 1.85075111296501e-07, + "loss": 0.1532, + "step": 19982 + }, + { + "epoch": 2.821660547867834, + "grad_norm": 3.6794137805430815, + "learning_rate": 1.84783291631343e-07, + "loss": 0.1804, + "step": 19983 + }, + { + "epoch": 2.8218017509178197, + "grad_norm": 2.8305574850888546, + "learning_rate": 1.8449170006758278e-07, + "loss": 0.1522, + "step": 19984 + }, + { + "epoch": 2.8219429539678056, + "grad_norm": 2.973856765596425, + "learning_rate": 1.842003366119971e-07, + "loss": 0.1533, + "step": 19985 + }, + { + "epoch": 2.8220841570177915, + "grad_norm": 3.3986121684985022, + "learning_rate": 1.8390920127135613e-07, + "loss": 0.1453, + "step": 19986 + }, + { + "epoch": 2.8222253600677774, + "grad_norm": 3.1375206305441776, + "learning_rate": 1.836182940524256e-07, + "loss": 0.1569, + "step": 19987 + }, + { + "epoch": 2.8223665631177632, + "grad_norm": 3.3080607177037273, + "learning_rate": 1.833276149619667e-07, + "loss": 0.1574, + "step": 19988 + }, + { + "epoch": 2.822507766167749, + "grad_norm": 3.8773492877765143, + "learning_rate": 1.830371640067341e-07, + "loss": 0.1422, + "step": 19989 + }, + { + "epoch": 2.822648969217735, + "grad_norm": 2.719084425884646, + "learning_rate": 1.8274694119347901e-07, + "loss": 0.1568, + "step": 19990 + }, + { + "epoch": 2.822790172267721, + "grad_norm": 3.87600917519555, + "learning_rate": 1.8245694652894496e-07, + "loss": 0.1579, + "step": 19991 + }, + { + "epoch": 2.8229313753177068, + "grad_norm": 3.020654148882816, + "learning_rate": 1.8216718001987098e-07, + "loss": 0.1094, + "step": 19992 + }, + { + "epoch": 2.8230725783676927, + "grad_norm": 4.898352693979814, + "learning_rate": 1.8187764167299171e-07, + "loss": 0.2184, + "step": 19993 + }, + { + "epoch": 2.8232137814176785, + "grad_norm": 2.7835101836987013, + "learning_rate": 1.815883314950373e-07, + "loss": 0.1335, + "step": 19994 + }, + { + "epoch": 2.8233549844676644, + "grad_norm": 3.1076394258538493, + "learning_rate": 1.8129924949272904e-07, + "loss": 0.1289, + "step": 19995 + }, + { + "epoch": 2.8234961875176503, + "grad_norm": 3.6946907431862357, + "learning_rate": 1.81010395672786e-07, + "loss": 0.1639, + "step": 19996 + }, + { + "epoch": 2.823637390567636, + "grad_norm": 3.151729676732851, + "learning_rate": 1.807217700419206e-07, + "loss": 0.1111, + "step": 19997 + }, + { + "epoch": 2.823778593617622, + "grad_norm": 3.318976105346471, + "learning_rate": 1.804333726068408e-07, + "loss": 0.1618, + "step": 19998 + }, + { + "epoch": 2.823919796667608, + "grad_norm": 4.254973572717238, + "learning_rate": 1.801452033742479e-07, + "loss": 0.175, + "step": 19999 + }, + { + "epoch": 2.824060999717594, + "grad_norm": 2.4012838561269487, + "learning_rate": 1.79857262350841e-07, + "loss": 0.1078, + "step": 20000 + }, + { + "epoch": 2.8242022027675797, + "grad_norm": 2.7468931906044376, + "learning_rate": 1.7956954954330918e-07, + "loss": 0.1231, + "step": 20001 + }, + { + "epoch": 2.8243434058175656, + "grad_norm": 4.187355013022518, + "learning_rate": 1.7928206495834043e-07, + "loss": 0.1617, + "step": 20002 + }, + { + "epoch": 2.8244846088675515, + "grad_norm": 4.9777430289691065, + "learning_rate": 1.78994808602615e-07, + "loss": 0.2173, + "step": 20003 + }, + { + "epoch": 2.8246258119175374, + "grad_norm": 2.8064552756946832, + "learning_rate": 1.787077804828097e-07, + "loss": 0.1279, + "step": 20004 + }, + { + "epoch": 2.8247670149675232, + "grad_norm": 2.5618895442132725, + "learning_rate": 1.7842098060559366e-07, + "loss": 0.1146, + "step": 20005 + }, + { + "epoch": 2.824908218017509, + "grad_norm": 2.7035873128791357, + "learning_rate": 1.7813440897763158e-07, + "loss": 0.1128, + "step": 20006 + }, + { + "epoch": 2.825049421067495, + "grad_norm": 3.4576972423535772, + "learning_rate": 1.7784806560558477e-07, + "loss": 0.1384, + "step": 20007 + }, + { + "epoch": 2.825190624117481, + "grad_norm": 3.547624333031972, + "learning_rate": 1.7756195049610682e-07, + "loss": 0.1632, + "step": 20008 + }, + { + "epoch": 2.825331827167467, + "grad_norm": 3.537132018896759, + "learning_rate": 1.7727606365584792e-07, + "loss": 0.1718, + "step": 20009 + }, + { + "epoch": 2.8254730302174527, + "grad_norm": 2.7726777317094378, + "learning_rate": 1.769904050914495e-07, + "loss": 0.1572, + "step": 20010 + }, + { + "epoch": 2.8256142332674385, + "grad_norm": 3.0074108247981486, + "learning_rate": 1.7670497480955286e-07, + "loss": 0.1166, + "step": 20011 + }, + { + "epoch": 2.8257554363174244, + "grad_norm": 3.134770383679413, + "learning_rate": 1.7641977281679046e-07, + "loss": 0.1585, + "step": 20012 + }, + { + "epoch": 2.8258966393674103, + "grad_norm": 3.0944634560173467, + "learning_rate": 1.7613479911979036e-07, + "loss": 0.1718, + "step": 20013 + }, + { + "epoch": 2.826037842417396, + "grad_norm": 3.38886925330426, + "learning_rate": 1.7585005372517504e-07, + "loss": 0.1661, + "step": 20014 + }, + { + "epoch": 2.826179045467382, + "grad_norm": 2.7393362901219307, + "learning_rate": 1.7556553663956034e-07, + "loss": 0.1138, + "step": 20015 + }, + { + "epoch": 2.826320248517368, + "grad_norm": 3.0202584126381806, + "learning_rate": 1.7528124786956092e-07, + "loss": 0.1217, + "step": 20016 + }, + { + "epoch": 2.826461451567354, + "grad_norm": 3.290678634249369, + "learning_rate": 1.7499718742178152e-07, + "loss": 0.1553, + "step": 20017 + }, + { + "epoch": 2.8266026546173397, + "grad_norm": 2.9239943075614496, + "learning_rate": 1.7471335530282574e-07, + "loss": 0.1517, + "step": 20018 + }, + { + "epoch": 2.8267438576673256, + "grad_norm": 3.1155126303046683, + "learning_rate": 1.744297515192872e-07, + "loss": 0.1471, + "step": 20019 + }, + { + "epoch": 2.8268850607173115, + "grad_norm": 3.0451486889715937, + "learning_rate": 1.741463760777584e-07, + "loss": 0.1162, + "step": 20020 + }, + { + "epoch": 2.8270262637672974, + "grad_norm": 2.9090605696067655, + "learning_rate": 1.7386322898482412e-07, + "loss": 0.1347, + "step": 20021 + }, + { + "epoch": 2.8271674668172833, + "grad_norm": 2.925972617336945, + "learning_rate": 1.7358031024706456e-07, + "loss": 0.1568, + "step": 20022 + }, + { + "epoch": 2.827308669867269, + "grad_norm": 3.3069164735181493, + "learning_rate": 1.7329761987105564e-07, + "loss": 0.1311, + "step": 20023 + }, + { + "epoch": 2.827449872917255, + "grad_norm": 2.282752963461291, + "learning_rate": 1.7301515786336541e-07, + "loss": 0.1048, + "step": 20024 + }, + { + "epoch": 2.827591075967241, + "grad_norm": 3.166255281648736, + "learning_rate": 1.7273292423055975e-07, + "loss": 0.1068, + "step": 20025 + }, + { + "epoch": 2.827732279017227, + "grad_norm": 2.833925617736957, + "learning_rate": 1.7245091897919564e-07, + "loss": 0.1081, + "step": 20026 + }, + { + "epoch": 2.8278734820672127, + "grad_norm": 2.8406202269014633, + "learning_rate": 1.7216914211582892e-07, + "loss": 0.1246, + "step": 20027 + }, + { + "epoch": 2.8280146851171986, + "grad_norm": 3.606967636895364, + "learning_rate": 1.7188759364700658e-07, + "loss": 0.1975, + "step": 20028 + }, + { + "epoch": 2.8281558881671844, + "grad_norm": 3.4952839360828665, + "learning_rate": 1.716062735792723e-07, + "loss": 0.161, + "step": 20029 + }, + { + "epoch": 2.8282970912171703, + "grad_norm": 2.887793045218106, + "learning_rate": 1.7132518191916413e-07, + "loss": 0.1161, + "step": 20030 + }, + { + "epoch": 2.828438294267156, + "grad_norm": 3.6815798641138, + "learning_rate": 1.710443186732147e-07, + "loss": 0.1627, + "step": 20031 + }, + { + "epoch": 2.828579497317142, + "grad_norm": 3.7854032044275914, + "learning_rate": 1.7076368384794872e-07, + "loss": 0.1444, + "step": 20032 + }, + { + "epoch": 2.828720700367128, + "grad_norm": 2.801745923695371, + "learning_rate": 1.704832774498899e-07, + "loss": 0.1046, + "step": 20033 + }, + { + "epoch": 2.828861903417114, + "grad_norm": 3.4479850668546557, + "learning_rate": 1.7020309948555525e-07, + "loss": 0.1676, + "step": 20034 + }, + { + "epoch": 2.8290031064670997, + "grad_norm": 3.2734328039527525, + "learning_rate": 1.699231499614562e-07, + "loss": 0.1284, + "step": 20035 + }, + { + "epoch": 2.8291443095170856, + "grad_norm": 2.889018498899647, + "learning_rate": 1.6964342888409646e-07, + "loss": 0.1101, + "step": 20036 + }, + { + "epoch": 2.8292855125670715, + "grad_norm": 3.3830369885861074, + "learning_rate": 1.693639362599786e-07, + "loss": 0.1656, + "step": 20037 + }, + { + "epoch": 2.8294267156170574, + "grad_norm": 3.743580663291065, + "learning_rate": 1.6908467209559853e-07, + "loss": 0.1742, + "step": 20038 + }, + { + "epoch": 2.8295679186670433, + "grad_norm": 3.87866383737866, + "learning_rate": 1.688056363974433e-07, + "loss": 0.1683, + "step": 20039 + }, + { + "epoch": 2.829709121717029, + "grad_norm": 3.413257315776105, + "learning_rate": 1.685268291719999e-07, + "loss": 0.1366, + "step": 20040 + }, + { + "epoch": 2.829850324767015, + "grad_norm": 2.9809593811841593, + "learning_rate": 1.6824825042574766e-07, + "loss": 0.137, + "step": 20041 + }, + { + "epoch": 2.829991527817001, + "grad_norm": 3.54127210834927, + "learning_rate": 1.6796990016515914e-07, + "loss": 0.1694, + "step": 20042 + }, + { + "epoch": 2.830132730866987, + "grad_norm": 3.587748049388663, + "learning_rate": 1.6769177839670468e-07, + "loss": 0.1747, + "step": 20043 + }, + { + "epoch": 2.8302739339169727, + "grad_norm": 2.6410395721894093, + "learning_rate": 1.6741388512684586e-07, + "loss": 0.1292, + "step": 20044 + }, + { + "epoch": 2.8304151369669586, + "grad_norm": 3.388886587797723, + "learning_rate": 1.6713622036204303e-07, + "loss": 0.1334, + "step": 20045 + }, + { + "epoch": 2.8305563400169444, + "grad_norm": 3.0715184052644053, + "learning_rate": 1.6685878410874768e-07, + "loss": 0.1665, + "step": 20046 + }, + { + "epoch": 2.8306975430669303, + "grad_norm": 2.985054510823789, + "learning_rate": 1.665815763734091e-07, + "loss": 0.1337, + "step": 20047 + }, + { + "epoch": 2.830838746116916, + "grad_norm": 3.143342979931068, + "learning_rate": 1.663045971624666e-07, + "loss": 0.1423, + "step": 20048 + }, + { + "epoch": 2.830979949166902, + "grad_norm": 3.151113221804687, + "learning_rate": 1.6602784648235838e-07, + "loss": 0.1419, + "step": 20049 + }, + { + "epoch": 2.831121152216888, + "grad_norm": 4.8016177881321545, + "learning_rate": 1.657513243395159e-07, + "loss": 0.1859, + "step": 20050 + }, + { + "epoch": 2.831262355266874, + "grad_norm": 2.963714438418722, + "learning_rate": 1.6547503074036518e-07, + "loss": 0.1384, + "step": 20051 + }, + { + "epoch": 2.8314035583168597, + "grad_norm": 3.2996236762080313, + "learning_rate": 1.6519896569132886e-07, + "loss": 0.1296, + "step": 20052 + }, + { + "epoch": 2.8315447613668456, + "grad_norm": 3.136524718997218, + "learning_rate": 1.649231291988196e-07, + "loss": 0.1396, + "step": 20053 + }, + { + "epoch": 2.8316859644168315, + "grad_norm": 2.4764576082112204, + "learning_rate": 1.646475212692511e-07, + "loss": 0.1118, + "step": 20054 + }, + { + "epoch": 2.8318271674668174, + "grad_norm": 3.940081522566291, + "learning_rate": 1.6437214190902606e-07, + "loss": 0.1604, + "step": 20055 + }, + { + "epoch": 2.8319683705168033, + "grad_norm": 3.8983725695851583, + "learning_rate": 1.640969911245438e-07, + "loss": 0.139, + "step": 20056 + }, + { + "epoch": 2.832109573566789, + "grad_norm": 3.03172787183828, + "learning_rate": 1.6382206892220032e-07, + "loss": 0.1107, + "step": 20057 + }, + { + "epoch": 2.832250776616775, + "grad_norm": 2.921795100658245, + "learning_rate": 1.6354737530838494e-07, + "loss": 0.1655, + "step": 20058 + }, + { + "epoch": 2.832391979666761, + "grad_norm": 3.2768851485848827, + "learning_rate": 1.6327291028947923e-07, + "loss": 0.147, + "step": 20059 + }, + { + "epoch": 2.832533182716747, + "grad_norm": 3.9173352919643625, + "learning_rate": 1.6299867387186363e-07, + "loss": 0.2269, + "step": 20060 + }, + { + "epoch": 2.8326743857667327, + "grad_norm": 3.68988948154806, + "learning_rate": 1.6272466606190972e-07, + "loss": 0.1611, + "step": 20061 + }, + { + "epoch": 2.8328155888167186, + "grad_norm": 3.081646350689254, + "learning_rate": 1.6245088686598686e-07, + "loss": 0.1443, + "step": 20062 + }, + { + "epoch": 2.8329567918667045, + "grad_norm": 2.893537884250687, + "learning_rate": 1.621773362904566e-07, + "loss": 0.1126, + "step": 20063 + }, + { + "epoch": 2.8330979949166903, + "grad_norm": 4.112954248522791, + "learning_rate": 1.6190401434167725e-07, + "loss": 0.1995, + "step": 20064 + }, + { + "epoch": 2.8332391979666762, + "grad_norm": 3.698312193170609, + "learning_rate": 1.616309210259992e-07, + "loss": 0.1626, + "step": 20065 + }, + { + "epoch": 2.833380401016662, + "grad_norm": 3.7188467830075616, + "learning_rate": 1.6135805634976966e-07, + "loss": 0.1791, + "step": 20066 + }, + { + "epoch": 2.833521604066648, + "grad_norm": 2.5095339745503993, + "learning_rate": 1.6108542031932904e-07, + "loss": 0.117, + "step": 20067 + }, + { + "epoch": 2.833662807116634, + "grad_norm": 3.6649368234155304, + "learning_rate": 1.608130129410157e-07, + "loss": 0.1849, + "step": 20068 + }, + { + "epoch": 2.8338040101666198, + "grad_norm": 3.444949806182833, + "learning_rate": 1.6054083422115786e-07, + "loss": 0.1672, + "step": 20069 + }, + { + "epoch": 2.8339452132166056, + "grad_norm": 2.9269732816384515, + "learning_rate": 1.6026888416608267e-07, + "loss": 0.1324, + "step": 20070 + }, + { + "epoch": 2.8340864162665915, + "grad_norm": 2.399734641112467, + "learning_rate": 1.599971627821084e-07, + "loss": 0.0983, + "step": 20071 + }, + { + "epoch": 2.834227619316577, + "grad_norm": 2.9320858757455768, + "learning_rate": 1.5972567007555008e-07, + "loss": 0.1373, + "step": 20072 + }, + { + "epoch": 2.834368822366563, + "grad_norm": 3.7376639511626313, + "learning_rate": 1.5945440605271812e-07, + "loss": 0.151, + "step": 20073 + }, + { + "epoch": 2.8345100254165487, + "grad_norm": 2.6430765057450243, + "learning_rate": 1.591833707199153e-07, + "loss": 0.1222, + "step": 20074 + }, + { + "epoch": 2.8346512284665346, + "grad_norm": 4.339031596277187, + "learning_rate": 1.5891256408344214e-07, + "loss": 0.1599, + "step": 20075 + }, + { + "epoch": 2.8347924315165205, + "grad_norm": 3.8358527825448867, + "learning_rate": 1.5864198614959025e-07, + "loss": 0.1349, + "step": 20076 + }, + { + "epoch": 2.8349336345665064, + "grad_norm": 3.839986358422419, + "learning_rate": 1.5837163692464797e-07, + "loss": 0.182, + "step": 20077 + }, + { + "epoch": 2.8350748376164923, + "grad_norm": 3.763195504779256, + "learning_rate": 1.5810151641489912e-07, + "loss": 0.1805, + "step": 20078 + }, + { + "epoch": 2.835216040666478, + "grad_norm": 3.165343900013053, + "learning_rate": 1.5783162462661983e-07, + "loss": 0.1283, + "step": 20079 + }, + { + "epoch": 2.835357243716464, + "grad_norm": 2.9614233286449902, + "learning_rate": 1.5756196156608393e-07, + "loss": 0.1282, + "step": 20080 + }, + { + "epoch": 2.83549844676645, + "grad_norm": 3.7761481557618604, + "learning_rate": 1.572925272395587e-07, + "loss": 0.1711, + "step": 20081 + }, + { + "epoch": 2.835639649816436, + "grad_norm": 3.537461371354076, + "learning_rate": 1.5702332165330348e-07, + "loss": 0.1632, + "step": 20082 + }, + { + "epoch": 2.8357808528664217, + "grad_norm": 3.2157354586250206, + "learning_rate": 1.5675434481357444e-07, + "loss": 0.1399, + "step": 20083 + }, + { + "epoch": 2.8359220559164076, + "grad_norm": 3.532780852614847, + "learning_rate": 1.5648559672662322e-07, + "loss": 0.1321, + "step": 20084 + }, + { + "epoch": 2.8360632589663934, + "grad_norm": 2.763795242784741, + "learning_rate": 1.5621707739869707e-07, + "loss": 0.1291, + "step": 20085 + }, + { + "epoch": 2.8362044620163793, + "grad_norm": 3.3270571781773435, + "learning_rate": 1.559487868360343e-07, + "loss": 0.1564, + "step": 20086 + }, + { + "epoch": 2.836345665066365, + "grad_norm": 2.621539801785005, + "learning_rate": 1.5568072504486997e-07, + "loss": 0.1167, + "step": 20087 + }, + { + "epoch": 2.836486868116351, + "grad_norm": 3.592711960328074, + "learning_rate": 1.554128920314346e-07, + "loss": 0.1982, + "step": 20088 + }, + { + "epoch": 2.836628071166337, + "grad_norm": 2.5159818649070713, + "learning_rate": 1.5514528780195215e-07, + "loss": 0.1074, + "step": 20089 + }, + { + "epoch": 2.836769274216323, + "grad_norm": 3.72750913575453, + "learning_rate": 1.5487791236264095e-07, + "loss": 0.1663, + "step": 20090 + }, + { + "epoch": 2.8369104772663087, + "grad_norm": 3.0813563181449397, + "learning_rate": 1.54610765719716e-07, + "loss": 0.0976, + "step": 20091 + }, + { + "epoch": 2.8370516803162946, + "grad_norm": 3.090326759927255, + "learning_rate": 1.543438478793846e-07, + "loss": 0.1442, + "step": 20092 + }, + { + "epoch": 2.8371928833662805, + "grad_norm": 3.229843290110156, + "learning_rate": 1.5407715884785068e-07, + "loss": 0.1545, + "step": 20093 + }, + { + "epoch": 2.8373340864162664, + "grad_norm": 3.1787425761536783, + "learning_rate": 1.5381069863131037e-07, + "loss": 0.114, + "step": 20094 + }, + { + "epoch": 2.8374752894662523, + "grad_norm": 2.7349647252627105, + "learning_rate": 1.535444672359576e-07, + "loss": 0.1191, + "step": 20095 + }, + { + "epoch": 2.837616492516238, + "grad_norm": 2.812462470016027, + "learning_rate": 1.5327846466797857e-07, + "loss": 0.1312, + "step": 20096 + }, + { + "epoch": 2.837757695566224, + "grad_norm": 2.981400767301244, + "learning_rate": 1.5301269093355607e-07, + "loss": 0.1467, + "step": 20097 + }, + { + "epoch": 2.83789889861621, + "grad_norm": 3.9790337085341196, + "learning_rate": 1.5274714603886742e-07, + "loss": 0.1675, + "step": 20098 + }, + { + "epoch": 2.838040101666196, + "grad_norm": 2.958917806545585, + "learning_rate": 1.52481829990081e-07, + "loss": 0.1401, + "step": 20099 + }, + { + "epoch": 2.8381813047161817, + "grad_norm": 4.018486294393013, + "learning_rate": 1.5221674279336408e-07, + "loss": 0.1633, + "step": 20100 + }, + { + "epoch": 2.8383225077661676, + "grad_norm": 3.2114286581721836, + "learning_rate": 1.519518844548773e-07, + "loss": 0.1448, + "step": 20101 + }, + { + "epoch": 2.8384637108161535, + "grad_norm": 3.280430199138361, + "learning_rate": 1.5168725498077574e-07, + "loss": 0.1589, + "step": 20102 + }, + { + "epoch": 2.8386049138661393, + "grad_norm": 2.7075444569272755, + "learning_rate": 1.5142285437720894e-07, + "loss": 0.1041, + "step": 20103 + }, + { + "epoch": 2.838746116916125, + "grad_norm": 3.5067255298297497, + "learning_rate": 1.5115868265032195e-07, + "loss": 0.1323, + "step": 20104 + }, + { + "epoch": 2.838887319966111, + "grad_norm": 3.0493362467362655, + "learning_rate": 1.5089473980625324e-07, + "loss": 0.1514, + "step": 20105 + }, + { + "epoch": 2.839028523016097, + "grad_norm": 3.9128557189699724, + "learning_rate": 1.5063102585113786e-07, + "loss": 0.1321, + "step": 20106 + }, + { + "epoch": 2.839169726066083, + "grad_norm": 3.317676923487786, + "learning_rate": 1.5036754079110427e-07, + "loss": 0.1428, + "step": 20107 + }, + { + "epoch": 2.8393109291160687, + "grad_norm": 3.0864751618088793, + "learning_rate": 1.5010428463227423e-07, + "loss": 0.1292, + "step": 20108 + }, + { + "epoch": 2.8394521321660546, + "grad_norm": 3.310862928978597, + "learning_rate": 1.4984125738076728e-07, + "loss": 0.1682, + "step": 20109 + }, + { + "epoch": 2.8395933352160405, + "grad_norm": 3.363593724724202, + "learning_rate": 1.495784590426963e-07, + "loss": 0.1412, + "step": 20110 + }, + { + "epoch": 2.8397345382660264, + "grad_norm": 2.7305328622456506, + "learning_rate": 1.4931588962416755e-07, + "loss": 0.1552, + "step": 20111 + }, + { + "epoch": 2.8398757413160123, + "grad_norm": 3.03102161813855, + "learning_rate": 1.4905354913128279e-07, + "loss": 0.1399, + "step": 20112 + }, + { + "epoch": 2.840016944365998, + "grad_norm": 2.8206466452122676, + "learning_rate": 1.4879143757013824e-07, + "loss": 0.1205, + "step": 20113 + }, + { + "epoch": 2.840158147415984, + "grad_norm": 3.647103150902528, + "learning_rate": 1.4852955494682798e-07, + "loss": 0.163, + "step": 20114 + }, + { + "epoch": 2.84029935046597, + "grad_norm": 3.6350826105702976, + "learning_rate": 1.4826790126743596e-07, + "loss": 0.1607, + "step": 20115 + }, + { + "epoch": 2.840440553515956, + "grad_norm": 3.5436747863870584, + "learning_rate": 1.4800647653804289e-07, + "loss": 0.1732, + "step": 20116 + }, + { + "epoch": 2.8405817565659417, + "grad_norm": 2.7397490192075455, + "learning_rate": 1.4774528076472505e-07, + "loss": 0.1137, + "step": 20117 + }, + { + "epoch": 2.8407229596159276, + "grad_norm": 3.753796034590914, + "learning_rate": 1.4748431395355088e-07, + "loss": 0.1667, + "step": 20118 + }, + { + "epoch": 2.8408641626659135, + "grad_norm": 3.3303348939922186, + "learning_rate": 1.472235761105878e-07, + "loss": 0.134, + "step": 20119 + }, + { + "epoch": 2.8410053657158993, + "grad_norm": 2.3780096269986166, + "learning_rate": 1.4696306724189312e-07, + "loss": 0.0985, + "step": 20120 + }, + { + "epoch": 2.8411465687658852, + "grad_norm": 2.3977516338728813, + "learning_rate": 1.4670278735352094e-07, + "loss": 0.1167, + "step": 20121 + }, + { + "epoch": 2.841287771815871, + "grad_norm": 4.527111388218047, + "learning_rate": 1.4644273645152196e-07, + "loss": 0.1765, + "step": 20122 + }, + { + "epoch": 2.841428974865857, + "grad_norm": 3.0990269541292896, + "learning_rate": 1.461829145419369e-07, + "loss": 0.1068, + "step": 20123 + }, + { + "epoch": 2.841570177915843, + "grad_norm": 4.874338941260348, + "learning_rate": 1.4592332163080648e-07, + "loss": 0.2147, + "step": 20124 + }, + { + "epoch": 2.8417113809658288, + "grad_norm": 3.000027031098533, + "learning_rate": 1.4566395772416254e-07, + "loss": 0.1103, + "step": 20125 + }, + { + "epoch": 2.8418525840158146, + "grad_norm": 2.713669946050697, + "learning_rate": 1.4540482282803136e-07, + "loss": 0.1092, + "step": 20126 + }, + { + "epoch": 2.8419937870658005, + "grad_norm": 3.6726079300647614, + "learning_rate": 1.4514591694843704e-07, + "loss": 0.1686, + "step": 20127 + }, + { + "epoch": 2.8421349901157864, + "grad_norm": 4.032347769911445, + "learning_rate": 1.4488724009139588e-07, + "loss": 0.142, + "step": 20128 + }, + { + "epoch": 2.8422761931657723, + "grad_norm": 3.0752797018108944, + "learning_rate": 1.4462879226291858e-07, + "loss": 0.1015, + "step": 20129 + }, + { + "epoch": 2.842417396215758, + "grad_norm": 3.4231297220189942, + "learning_rate": 1.4437057346901152e-07, + "loss": 0.1379, + "step": 20130 + }, + { + "epoch": 2.842558599265744, + "grad_norm": 3.423606595782078, + "learning_rate": 1.441125837156765e-07, + "loss": 0.136, + "step": 20131 + }, + { + "epoch": 2.84269980231573, + "grad_norm": 3.3077086863856366, + "learning_rate": 1.4385482300890873e-07, + "loss": 0.1497, + "step": 20132 + }, + { + "epoch": 2.842841005365716, + "grad_norm": 3.6895902842055133, + "learning_rate": 1.4359729135469903e-07, + "loss": 0.1407, + "step": 20133 + }, + { + "epoch": 2.8429822084157017, + "grad_norm": 2.838225346864948, + "learning_rate": 1.4333998875903032e-07, + "loss": 0.1455, + "step": 20134 + }, + { + "epoch": 2.8431234114656876, + "grad_norm": 3.6696856089573764, + "learning_rate": 1.4308291522788344e-07, + "loss": 0.1483, + "step": 20135 + }, + { + "epoch": 2.8432646145156735, + "grad_norm": 2.734964589692828, + "learning_rate": 1.4282607076723355e-07, + "loss": 0.1458, + "step": 20136 + }, + { + "epoch": 2.8434058175656594, + "grad_norm": 2.8629674673292045, + "learning_rate": 1.4256945538304812e-07, + "loss": 0.106, + "step": 20137 + }, + { + "epoch": 2.8435470206156452, + "grad_norm": 4.3442153684868074, + "learning_rate": 1.423130690812924e-07, + "loss": 0.1838, + "step": 20138 + }, + { + "epoch": 2.843688223665631, + "grad_norm": 2.4886833012373994, + "learning_rate": 1.420569118679227e-07, + "loss": 0.1162, + "step": 20139 + }, + { + "epoch": 2.843829426715617, + "grad_norm": 2.8280182103262352, + "learning_rate": 1.4180098374889429e-07, + "loss": 0.1198, + "step": 20140 + }, + { + "epoch": 2.843970629765603, + "grad_norm": 3.8174318334458572, + "learning_rate": 1.415452847301524e-07, + "loss": 0.1639, + "step": 20141 + }, + { + "epoch": 2.8441118328155888, + "grad_norm": 3.071892094348412, + "learning_rate": 1.4128981481764115e-07, + "loss": 0.1229, + "step": 20142 + }, + { + "epoch": 2.8442530358655747, + "grad_norm": 3.3662266297291317, + "learning_rate": 1.4103457401729692e-07, + "loss": 0.1576, + "step": 20143 + }, + { + "epoch": 2.8443942389155605, + "grad_norm": 2.8388120291062653, + "learning_rate": 1.4077956233505163e-07, + "loss": 0.1206, + "step": 20144 + }, + { + "epoch": 2.8445354419655464, + "grad_norm": 2.746761977345328, + "learning_rate": 1.4052477977683167e-07, + "loss": 0.0964, + "step": 20145 + }, + { + "epoch": 2.8446766450155323, + "grad_norm": 3.124899117991557, + "learning_rate": 1.402702263485567e-07, + "loss": 0.1289, + "step": 20146 + }, + { + "epoch": 2.844817848065518, + "grad_norm": 3.377381553154912, + "learning_rate": 1.4001590205614425e-07, + "loss": 0.1662, + "step": 20147 + }, + { + "epoch": 2.844959051115504, + "grad_norm": 3.3031563959642365, + "learning_rate": 1.3976180690550402e-07, + "loss": 0.1618, + "step": 20148 + }, + { + "epoch": 2.84510025416549, + "grad_norm": 3.0717957375908664, + "learning_rate": 1.3950794090254127e-07, + "loss": 0.1384, + "step": 20149 + }, + { + "epoch": 2.845241457215476, + "grad_norm": 3.079643076805089, + "learning_rate": 1.3925430405315577e-07, + "loss": 0.117, + "step": 20150 + }, + { + "epoch": 2.8453826602654617, + "grad_norm": 3.326591431499201, + "learning_rate": 1.3900089636324164e-07, + "loss": 0.1394, + "step": 20151 + }, + { + "epoch": 2.8455238633154476, + "grad_norm": 2.786912680528717, + "learning_rate": 1.3874771783868758e-07, + "loss": 0.1245, + "step": 20152 + }, + { + "epoch": 2.8456650663654335, + "grad_norm": 3.2327597455210344, + "learning_rate": 1.3849476848537656e-07, + "loss": 0.1612, + "step": 20153 + }, + { + "epoch": 2.8458062694154194, + "grad_norm": 3.3951017114134716, + "learning_rate": 1.3824204830918952e-07, + "loss": 0.1257, + "step": 20154 + }, + { + "epoch": 2.8459474724654052, + "grad_norm": 3.245018447411824, + "learning_rate": 1.379895573159995e-07, + "loss": 0.1518, + "step": 20155 + }, + { + "epoch": 2.846088675515391, + "grad_norm": 2.9032374758391457, + "learning_rate": 1.3773729551167182e-07, + "loss": 0.1157, + "step": 20156 + }, + { + "epoch": 2.846229878565377, + "grad_norm": 3.4670277528852944, + "learning_rate": 1.3748526290207065e-07, + "loss": 0.1606, + "step": 20157 + }, + { + "epoch": 2.846371081615363, + "grad_norm": 2.7438000763654578, + "learning_rate": 1.3723345949305245e-07, + "loss": 0.1259, + "step": 20158 + }, + { + "epoch": 2.846512284665349, + "grad_norm": 3.9056112957646794, + "learning_rate": 1.3698188529046918e-07, + "loss": 0.1961, + "step": 20159 + }, + { + "epoch": 2.8466534877153347, + "grad_norm": 2.4238680708012197, + "learning_rate": 1.367305403001673e-07, + "loss": 0.1313, + "step": 20160 + }, + { + "epoch": 2.8467946907653205, + "grad_norm": 3.2078907987506597, + "learning_rate": 1.3647942452798768e-07, + "loss": 0.1697, + "step": 20161 + }, + { + "epoch": 2.8469358938153064, + "grad_norm": 3.1243146667734956, + "learning_rate": 1.3622853797976786e-07, + "loss": 0.159, + "step": 20162 + }, + { + "epoch": 2.8470770968652923, + "grad_norm": 4.304785377709412, + "learning_rate": 1.3597788066133544e-07, + "loss": 0.1669, + "step": 20163 + }, + { + "epoch": 2.847218299915278, + "grad_norm": 2.869146979679962, + "learning_rate": 1.3572745257851792e-07, + "loss": 0.1501, + "step": 20164 + }, + { + "epoch": 2.847359502965264, + "grad_norm": 2.817750338207062, + "learning_rate": 1.3547725373713406e-07, + "loss": 0.1177, + "step": 20165 + }, + { + "epoch": 2.84750070601525, + "grad_norm": 3.8896156607061676, + "learning_rate": 1.3522728414299911e-07, + "loss": 0.1566, + "step": 20166 + }, + { + "epoch": 2.847641909065236, + "grad_norm": 3.165767335327929, + "learning_rate": 1.3497754380192184e-07, + "loss": 0.1385, + "step": 20167 + }, + { + "epoch": 2.8477831121152217, + "grad_norm": 2.8028314860963857, + "learning_rate": 1.3472803271970536e-07, + "loss": 0.12, + "step": 20168 + }, + { + "epoch": 2.8479243151652076, + "grad_norm": 3.023315439173604, + "learning_rate": 1.3447875090214945e-07, + "loss": 0.1202, + "step": 20169 + }, + { + "epoch": 2.8480655182151935, + "grad_norm": 4.245166720673949, + "learning_rate": 1.342296983550462e-07, + "loss": 0.1999, + "step": 20170 + }, + { + "epoch": 2.8482067212651794, + "grad_norm": 3.719732320661145, + "learning_rate": 1.3398087508418423e-07, + "loss": 0.1719, + "step": 20171 + }, + { + "epoch": 2.8483479243151653, + "grad_norm": 3.0366549031903585, + "learning_rate": 1.3373228109534675e-07, + "loss": 0.1516, + "step": 20172 + }, + { + "epoch": 2.848489127365151, + "grad_norm": 2.9598040609442133, + "learning_rate": 1.3348391639430913e-07, + "loss": 0.1387, + "step": 20173 + }, + { + "epoch": 2.848630330415137, + "grad_norm": 3.3432790963044, + "learning_rate": 1.3323578098684565e-07, + "loss": 0.1794, + "step": 20174 + }, + { + "epoch": 2.848771533465123, + "grad_norm": 2.8473246315021847, + "learning_rate": 1.3298787487872055e-07, + "loss": 0.131, + "step": 20175 + }, + { + "epoch": 2.848912736515109, + "grad_norm": 2.8006278444409687, + "learning_rate": 1.3274019807569593e-07, + "loss": 0.1342, + "step": 20176 + }, + { + "epoch": 2.8490539395650947, + "grad_norm": 2.9161502837531312, + "learning_rate": 1.324927505835283e-07, + "loss": 0.1047, + "step": 20177 + }, + { + "epoch": 2.8491951426150806, + "grad_norm": 3.9115411181793425, + "learning_rate": 1.3224553240796633e-07, + "loss": 0.1523, + "step": 20178 + }, + { + "epoch": 2.8493363456650664, + "grad_norm": 3.315597478713627, + "learning_rate": 1.3199854355475772e-07, + "loss": 0.131, + "step": 20179 + }, + { + "epoch": 2.8494775487150523, + "grad_norm": 2.9859261734114013, + "learning_rate": 1.3175178402964116e-07, + "loss": 0.1389, + "step": 20180 + }, + { + "epoch": 2.849618751765038, + "grad_norm": 2.498227420514449, + "learning_rate": 1.315052538383521e-07, + "loss": 0.1223, + "step": 20181 + }, + { + "epoch": 2.849759954815024, + "grad_norm": 3.2924253969230177, + "learning_rate": 1.3125895298661705e-07, + "loss": 0.1363, + "step": 20182 + }, + { + "epoch": 2.84990115786501, + "grad_norm": 3.1582084859666804, + "learning_rate": 1.3101288148016477e-07, + "loss": 0.1182, + "step": 20183 + }, + { + "epoch": 2.850042360914996, + "grad_norm": 2.6674987014747655, + "learning_rate": 1.3076703932470958e-07, + "loss": 0.1024, + "step": 20184 + }, + { + "epoch": 2.8501835639649817, + "grad_norm": 3.147236032883525, + "learning_rate": 1.305214265259658e-07, + "loss": 0.1568, + "step": 20185 + }, + { + "epoch": 2.8503247670149676, + "grad_norm": 2.9751718813931585, + "learning_rate": 1.3027604308964215e-07, + "loss": 0.1262, + "step": 20186 + }, + { + "epoch": 2.8504659700649535, + "grad_norm": 3.009931234788582, + "learning_rate": 1.3003088902143968e-07, + "loss": 0.1539, + "step": 20187 + }, + { + "epoch": 2.8506071731149394, + "grad_norm": 3.6321521492500697, + "learning_rate": 1.2978596432705826e-07, + "loss": 0.1659, + "step": 20188 + }, + { + "epoch": 2.8507483761649253, + "grad_norm": 3.47105747892272, + "learning_rate": 1.2954126901218778e-07, + "loss": 0.1584, + "step": 20189 + }, + { + "epoch": 2.850889579214911, + "grad_norm": 3.6253566323212314, + "learning_rate": 1.292968030825159e-07, + "loss": 0.1516, + "step": 20190 + }, + { + "epoch": 2.851030782264897, + "grad_norm": 3.9028523062322122, + "learning_rate": 1.2905256654372366e-07, + "loss": 0.159, + "step": 20191 + }, + { + "epoch": 2.851171985314883, + "grad_norm": 2.9961940391185706, + "learning_rate": 1.288085594014865e-07, + "loss": 0.1276, + "step": 20192 + }, + { + "epoch": 2.851313188364869, + "grad_norm": 2.804953586749761, + "learning_rate": 1.2856478166147546e-07, + "loss": 0.1207, + "step": 20193 + }, + { + "epoch": 2.8514543914148547, + "grad_norm": 3.110657247834433, + "learning_rate": 1.2832123332935598e-07, + "loss": 0.1703, + "step": 20194 + }, + { + "epoch": 2.8515955944648406, + "grad_norm": 3.4270481919123816, + "learning_rate": 1.2807791441078797e-07, + "loss": 0.1119, + "step": 20195 + }, + { + "epoch": 2.8517367975148264, + "grad_norm": 3.8028381211433344, + "learning_rate": 1.2783482491142474e-07, + "loss": 0.175, + "step": 20196 + }, + { + "epoch": 2.8518780005648123, + "grad_norm": 3.3663687910991857, + "learning_rate": 1.275919648369184e-07, + "loss": 0.1424, + "step": 20197 + }, + { + "epoch": 2.852019203614798, + "grad_norm": 3.0482814044569673, + "learning_rate": 1.2734933419290996e-07, + "loss": 0.1159, + "step": 20198 + }, + { + "epoch": 2.852160406664784, + "grad_norm": 3.199999772225247, + "learning_rate": 1.271069329850383e-07, + "loss": 0.1424, + "step": 20199 + }, + { + "epoch": 2.85230160971477, + "grad_norm": 2.9038813618153534, + "learning_rate": 1.2686476121894e-07, + "loss": 0.1463, + "step": 20200 + }, + { + "epoch": 2.852442812764756, + "grad_norm": 3.213831133875631, + "learning_rate": 1.2662281890024052e-07, + "loss": 0.1595, + "step": 20201 + }, + { + "epoch": 2.8525840158147417, + "grad_norm": 3.5554672878632134, + "learning_rate": 1.263811060345621e-07, + "loss": 0.1497, + "step": 20202 + }, + { + "epoch": 2.8527252188647276, + "grad_norm": 3.202487860714442, + "learning_rate": 1.2613962262752245e-07, + "loss": 0.1349, + "step": 20203 + }, + { + "epoch": 2.8528664219147135, + "grad_norm": 2.9988151105247955, + "learning_rate": 1.2589836868473259e-07, + "loss": 0.1537, + "step": 20204 + }, + { + "epoch": 2.8530076249646994, + "grad_norm": 3.226572951277175, + "learning_rate": 1.2565734421180252e-07, + "loss": 0.1194, + "step": 20205 + }, + { + "epoch": 2.8531488280146853, + "grad_norm": 3.4608649347038303, + "learning_rate": 1.2541654921432998e-07, + "loss": 0.1806, + "step": 20206 + }, + { + "epoch": 2.853290031064671, + "grad_norm": 2.7955632245415893, + "learning_rate": 1.2517598369791383e-07, + "loss": 0.1096, + "step": 20207 + }, + { + "epoch": 2.853431234114657, + "grad_norm": 2.9430711085108534, + "learning_rate": 1.2493564766814292e-07, + "loss": 0.1267, + "step": 20208 + }, + { + "epoch": 2.853572437164643, + "grad_norm": 3.2857711936755782, + "learning_rate": 1.2469554113060168e-07, + "loss": 0.1363, + "step": 20209 + }, + { + "epoch": 2.853713640214629, + "grad_norm": 2.0794461117695247, + "learning_rate": 1.244556640908712e-07, + "loss": 0.1097, + "step": 20210 + }, + { + "epoch": 2.8538548432646147, + "grad_norm": 3.2961909285149145, + "learning_rate": 1.2421601655452696e-07, + "loss": 0.1455, + "step": 20211 + }, + { + "epoch": 2.8539960463146006, + "grad_norm": 3.4543509165727944, + "learning_rate": 1.2397659852713684e-07, + "loss": 0.1768, + "step": 20212 + }, + { + "epoch": 2.8541372493645865, + "grad_norm": 2.9422164522253063, + "learning_rate": 1.237374100142663e-07, + "loss": 0.1266, + "step": 20213 + }, + { + "epoch": 2.8542784524145723, + "grad_norm": 3.0011170395580313, + "learning_rate": 1.2349845102147317e-07, + "loss": 0.1278, + "step": 20214 + }, + { + "epoch": 2.8544196554645582, + "grad_norm": 3.6748656197522154, + "learning_rate": 1.2325972155430966e-07, + "loss": 0.1911, + "step": 20215 + }, + { + "epoch": 2.854560858514544, + "grad_norm": 3.360604717328857, + "learning_rate": 1.2302122161832464e-07, + "loss": 0.1259, + "step": 20216 + }, + { + "epoch": 2.85470206156453, + "grad_norm": 3.8520345006788537, + "learning_rate": 1.2278295121906258e-07, + "loss": 0.1557, + "step": 20217 + }, + { + "epoch": 2.854843264614516, + "grad_norm": 4.663319534893335, + "learning_rate": 1.2254491036205797e-07, + "loss": 0.2138, + "step": 20218 + }, + { + "epoch": 2.8549844676645018, + "grad_norm": 3.358483363236862, + "learning_rate": 1.223070990528441e-07, + "loss": 0.1515, + "step": 20219 + }, + { + "epoch": 2.8551256707144876, + "grad_norm": 3.100920222331805, + "learning_rate": 1.220695172969477e-07, + "loss": 0.1435, + "step": 20220 + }, + { + "epoch": 2.8552668737644735, + "grad_norm": 2.6237381811926217, + "learning_rate": 1.2183216509988881e-07, + "loss": 0.1208, + "step": 20221 + }, + { + "epoch": 2.8554080768144594, + "grad_norm": 2.5264425721971877, + "learning_rate": 1.2159504246718522e-07, + "loss": 0.1448, + "step": 20222 + }, + { + "epoch": 2.8555492798644453, + "grad_norm": 3.108330700554917, + "learning_rate": 1.2135814940434587e-07, + "loss": 0.1597, + "step": 20223 + }, + { + "epoch": 2.855690482914431, + "grad_norm": 2.657930701123252, + "learning_rate": 1.2112148591687743e-07, + "loss": 0.1021, + "step": 20224 + }, + { + "epoch": 2.855831685964417, + "grad_norm": 3.358311261788388, + "learning_rate": 1.2088505201028e-07, + "loss": 0.1249, + "step": 20225 + }, + { + "epoch": 2.855972889014403, + "grad_norm": 2.7365177423227736, + "learning_rate": 1.2064884769004692e-07, + "loss": 0.1286, + "step": 20226 + }, + { + "epoch": 2.856114092064389, + "grad_norm": 2.9635294166077983, + "learning_rate": 1.2041287296166715e-07, + "loss": 0.1281, + "step": 20227 + }, + { + "epoch": 2.8562552951143747, + "grad_norm": 3.535685731643499, + "learning_rate": 1.201771278306263e-07, + "loss": 0.1729, + "step": 20228 + }, + { + "epoch": 2.8563964981643606, + "grad_norm": 2.676209698843738, + "learning_rate": 1.199416123024022e-07, + "loss": 0.1203, + "step": 20229 + }, + { + "epoch": 2.8565377012143465, + "grad_norm": 3.3646955713783813, + "learning_rate": 1.1970632638246827e-07, + "loss": 0.1295, + "step": 20230 + }, + { + "epoch": 2.8566789042643324, + "grad_norm": 2.222027329839311, + "learning_rate": 1.1947127007629234e-07, + "loss": 0.1316, + "step": 20231 + }, + { + "epoch": 2.8568201073143182, + "grad_norm": 4.139333990712217, + "learning_rate": 1.192364433893378e-07, + "loss": 0.1693, + "step": 20232 + }, + { + "epoch": 2.856961310364304, + "grad_norm": 2.70787758242927, + "learning_rate": 1.1900184632705924e-07, + "loss": 0.1062, + "step": 20233 + }, + { + "epoch": 2.85710251341429, + "grad_norm": 3.112493340863802, + "learning_rate": 1.1876747889491225e-07, + "loss": 0.1395, + "step": 20234 + }, + { + "epoch": 2.857243716464276, + "grad_norm": 2.928612164154651, + "learning_rate": 1.1853334109834136e-07, + "loss": 0.1358, + "step": 20235 + }, + { + "epoch": 2.8573849195142618, + "grad_norm": 4.0597829954849285, + "learning_rate": 1.1829943294278778e-07, + "loss": 0.1927, + "step": 20236 + }, + { + "epoch": 2.8575261225642477, + "grad_norm": 3.5381288806643103, + "learning_rate": 1.1806575443368717e-07, + "loss": 0.1632, + "step": 20237 + }, + { + "epoch": 2.8576673256142335, + "grad_norm": 3.3525270218664422, + "learning_rate": 1.1783230557647075e-07, + "loss": 0.1538, + "step": 20238 + }, + { + "epoch": 2.8578085286642194, + "grad_norm": 3.1018747781509677, + "learning_rate": 1.1759908637656525e-07, + "loss": 0.15, + "step": 20239 + }, + { + "epoch": 2.8579497317142053, + "grad_norm": 3.097626813499742, + "learning_rate": 1.1736609683938749e-07, + "loss": 0.1406, + "step": 20240 + }, + { + "epoch": 2.858090934764191, + "grad_norm": 3.0374837526393765, + "learning_rate": 1.1713333697035423e-07, + "loss": 0.1426, + "step": 20241 + }, + { + "epoch": 2.8582321378141766, + "grad_norm": 2.8533941503425857, + "learning_rate": 1.1690080677487558e-07, + "loss": 0.1299, + "step": 20242 + }, + { + "epoch": 2.8583733408641625, + "grad_norm": 3.3721739414634597, + "learning_rate": 1.166685062583528e-07, + "loss": 0.1428, + "step": 20243 + }, + { + "epoch": 2.8585145439141484, + "grad_norm": 3.170959493702561, + "learning_rate": 1.1643643542618488e-07, + "loss": 0.1439, + "step": 20244 + }, + { + "epoch": 2.8586557469641343, + "grad_norm": 3.290092304069314, + "learning_rate": 1.162045942837664e-07, + "loss": 0.149, + "step": 20245 + }, + { + "epoch": 2.85879695001412, + "grad_norm": 3.076316537817162, + "learning_rate": 1.1597298283648529e-07, + "loss": 0.1392, + "step": 20246 + }, + { + "epoch": 2.858938153064106, + "grad_norm": 3.4543080958398593, + "learning_rate": 1.1574160108972277e-07, + "loss": 0.1488, + "step": 20247 + }, + { + "epoch": 2.859079356114092, + "grad_norm": 3.382143629716755, + "learning_rate": 1.1551044904885678e-07, + "loss": 0.1558, + "step": 20248 + }, + { + "epoch": 2.859220559164078, + "grad_norm": 2.6916194512197658, + "learning_rate": 1.1527952671925968e-07, + "loss": 0.1231, + "step": 20249 + }, + { + "epoch": 2.8593617622140637, + "grad_norm": 3.557852763005533, + "learning_rate": 1.1504883410629608e-07, + "loss": 0.2, + "step": 20250 + }, + { + "epoch": 2.8595029652640496, + "grad_norm": 3.58397476953013, + "learning_rate": 1.1481837121533057e-07, + "loss": 0.165, + "step": 20251 + }, + { + "epoch": 2.8596441683140355, + "grad_norm": 3.528473036923732, + "learning_rate": 1.1458813805171665e-07, + "loss": 0.1376, + "step": 20252 + }, + { + "epoch": 2.8597853713640213, + "grad_norm": 3.3401827028229825, + "learning_rate": 1.1435813462080447e-07, + "loss": 0.1313, + "step": 20253 + }, + { + "epoch": 2.859926574414007, + "grad_norm": 3.4990557030392444, + "learning_rate": 1.1412836092793977e-07, + "loss": 0.1587, + "step": 20254 + }, + { + "epoch": 2.860067777463993, + "grad_norm": 3.335944031885215, + "learning_rate": 1.1389881697846383e-07, + "loss": 0.1448, + "step": 20255 + }, + { + "epoch": 2.860208980513979, + "grad_norm": 3.0957759909076374, + "learning_rate": 1.1366950277770794e-07, + "loss": 0.1686, + "step": 20256 + }, + { + "epoch": 2.860350183563965, + "grad_norm": 3.13192842169984, + "learning_rate": 1.134404183310045e-07, + "loss": 0.1247, + "step": 20257 + }, + { + "epoch": 2.8604913866139507, + "grad_norm": 3.7203794059442554, + "learning_rate": 1.1321156364367591e-07, + "loss": 0.1532, + "step": 20258 + }, + { + "epoch": 2.8606325896639366, + "grad_norm": 3.3916187909154534, + "learning_rate": 1.1298293872104127e-07, + "loss": 0.1823, + "step": 20259 + }, + { + "epoch": 2.8607737927139225, + "grad_norm": 3.708151458907937, + "learning_rate": 1.1275454356841298e-07, + "loss": 0.1544, + "step": 20260 + }, + { + "epoch": 2.8609149957639084, + "grad_norm": 2.8761214059586546, + "learning_rate": 1.1252637819109902e-07, + "loss": 0.1456, + "step": 20261 + }, + { + "epoch": 2.8610561988138943, + "grad_norm": 3.390287125110571, + "learning_rate": 1.1229844259440182e-07, + "loss": 0.1685, + "step": 20262 + }, + { + "epoch": 2.86119740186388, + "grad_norm": 2.745855007597313, + "learning_rate": 1.1207073678361824e-07, + "loss": 0.1327, + "step": 20263 + }, + { + "epoch": 2.861338604913866, + "grad_norm": 3.700445458841732, + "learning_rate": 1.1184326076404073e-07, + "loss": 0.1352, + "step": 20264 + }, + { + "epoch": 2.861479807963852, + "grad_norm": 3.5351679927253854, + "learning_rate": 1.1161601454095616e-07, + "loss": 0.1451, + "step": 20265 + }, + { + "epoch": 2.861621011013838, + "grad_norm": 3.2152162492024, + "learning_rate": 1.1138899811964477e-07, + "loss": 0.1478, + "step": 20266 + }, + { + "epoch": 2.8617622140638237, + "grad_norm": 3.227587442220049, + "learning_rate": 1.1116221150538231e-07, + "loss": 0.1698, + "step": 20267 + }, + { + "epoch": 2.8619034171138096, + "grad_norm": 2.6038289376774437, + "learning_rate": 1.1093565470343904e-07, + "loss": 0.1268, + "step": 20268 + }, + { + "epoch": 2.8620446201637955, + "grad_norm": 3.092086905454962, + "learning_rate": 1.1070932771908072e-07, + "loss": 0.1581, + "step": 20269 + }, + { + "epoch": 2.8621858232137813, + "grad_norm": 3.4474877337555023, + "learning_rate": 1.1048323055756649e-07, + "loss": 0.169, + "step": 20270 + }, + { + "epoch": 2.8623270262637672, + "grad_norm": 3.2764986723978495, + "learning_rate": 1.1025736322415104e-07, + "loss": 0.1492, + "step": 20271 + }, + { + "epoch": 2.862468229313753, + "grad_norm": 3.9917769584964686, + "learning_rate": 1.1003172572408349e-07, + "loss": 0.1882, + "step": 20272 + }, + { + "epoch": 2.862609432363739, + "grad_norm": 2.756172086057813, + "learning_rate": 1.0980631806260745e-07, + "loss": 0.1152, + "step": 20273 + }, + { + "epoch": 2.862750635413725, + "grad_norm": 4.15303445230611, + "learning_rate": 1.0958114024496202e-07, + "loss": 0.1908, + "step": 20274 + }, + { + "epoch": 2.8628918384637108, + "grad_norm": 2.9467043341917423, + "learning_rate": 1.0935619227637862e-07, + "loss": 0.1296, + "step": 20275 + }, + { + "epoch": 2.8630330415136966, + "grad_norm": 2.5521190761981662, + "learning_rate": 1.0913147416208636e-07, + "loss": 0.1041, + "step": 20276 + }, + { + "epoch": 2.8631742445636825, + "grad_norm": 3.134719108570739, + "learning_rate": 1.0890698590730775e-07, + "loss": 0.141, + "step": 20277 + }, + { + "epoch": 2.8633154476136684, + "grad_norm": 2.7630641858991485, + "learning_rate": 1.086827275172575e-07, + "loss": 0.1339, + "step": 20278 + }, + { + "epoch": 2.8634566506636543, + "grad_norm": 3.2105525262759813, + "learning_rate": 1.0845869899715034e-07, + "loss": 0.1232, + "step": 20279 + }, + { + "epoch": 2.86359785371364, + "grad_norm": 3.347207684214817, + "learning_rate": 1.0823490035218986e-07, + "loss": 0.1366, + "step": 20280 + }, + { + "epoch": 2.863739056763626, + "grad_norm": 2.9857748540318236, + "learning_rate": 1.080113315875797e-07, + "loss": 0.1332, + "step": 20281 + }, + { + "epoch": 2.863880259813612, + "grad_norm": 2.64748303684728, + "learning_rate": 1.0778799270851348e-07, + "loss": 0.1423, + "step": 20282 + }, + { + "epoch": 2.864021462863598, + "grad_norm": 2.9302208991713585, + "learning_rate": 1.0756488372018259e-07, + "loss": 0.1397, + "step": 20283 + }, + { + "epoch": 2.8641626659135837, + "grad_norm": 3.873961399405908, + "learning_rate": 1.0734200462777178e-07, + "loss": 0.1566, + "step": 20284 + }, + { + "epoch": 2.8643038689635696, + "grad_norm": 3.3850476326755836, + "learning_rate": 1.0711935543646023e-07, + "loss": 0.1463, + "step": 20285 + }, + { + "epoch": 2.8644450720135555, + "grad_norm": 3.2661165064688777, + "learning_rate": 1.068969361514216e-07, + "loss": 0.1315, + "step": 20286 + }, + { + "epoch": 2.8645862750635414, + "grad_norm": 2.8713120224885587, + "learning_rate": 1.0667474677782619e-07, + "loss": 0.1137, + "step": 20287 + }, + { + "epoch": 2.8647274781135272, + "grad_norm": 3.3640251115340605, + "learning_rate": 1.0645278732083763e-07, + "loss": 0.1581, + "step": 20288 + }, + { + "epoch": 2.864868681163513, + "grad_norm": 3.4299562674675093, + "learning_rate": 1.0623105778561294e-07, + "loss": 0.1597, + "step": 20289 + }, + { + "epoch": 2.865009884213499, + "grad_norm": 3.302965321669273, + "learning_rate": 1.0600955817730573e-07, + "loss": 0.1913, + "step": 20290 + }, + { + "epoch": 2.865151087263485, + "grad_norm": 3.7882142983029987, + "learning_rate": 1.0578828850106415e-07, + "loss": 0.1759, + "step": 20291 + }, + { + "epoch": 2.8652922903134708, + "grad_norm": 3.21523273646911, + "learning_rate": 1.055672487620285e-07, + "loss": 0.1372, + "step": 20292 + }, + { + "epoch": 2.8654334933634567, + "grad_norm": 3.4771356174526007, + "learning_rate": 1.0534643896533913e-07, + "loss": 0.1591, + "step": 20293 + }, + { + "epoch": 2.8655746964134425, + "grad_norm": 3.2477589673789464, + "learning_rate": 1.0512585911612416e-07, + "loss": 0.1106, + "step": 20294 + }, + { + "epoch": 2.8657158994634284, + "grad_norm": 3.384088406022248, + "learning_rate": 1.0490550921950948e-07, + "loss": 0.1446, + "step": 20295 + }, + { + "epoch": 2.8658571025134143, + "grad_norm": 2.8581201297252155, + "learning_rate": 1.0468538928061878e-07, + "loss": 0.1095, + "step": 20296 + }, + { + "epoch": 2.8659983055634, + "grad_norm": 3.741280559444293, + "learning_rate": 1.0446549930456684e-07, + "loss": 0.1809, + "step": 20297 + }, + { + "epoch": 2.866139508613386, + "grad_norm": 3.8307731895143617, + "learning_rate": 1.0424583929646181e-07, + "loss": 0.144, + "step": 20298 + }, + { + "epoch": 2.866280711663372, + "grad_norm": 3.616675116920907, + "learning_rate": 1.0402640926141072e-07, + "loss": 0.1385, + "step": 20299 + }, + { + "epoch": 2.866421914713358, + "grad_norm": 3.2909846244420256, + "learning_rate": 1.038072092045117e-07, + "loss": 0.1548, + "step": 20300 + }, + { + "epoch": 2.8665631177633437, + "grad_norm": 3.658145280150118, + "learning_rate": 1.0358823913085958e-07, + "loss": 0.1446, + "step": 20301 + }, + { + "epoch": 2.8667043208133296, + "grad_norm": 3.5854314129827225, + "learning_rate": 1.033694990455425e-07, + "loss": 0.1718, + "step": 20302 + }, + { + "epoch": 2.8668455238633155, + "grad_norm": 3.4134320576728308, + "learning_rate": 1.0315098895364417e-07, + "loss": 0.1735, + "step": 20303 + }, + { + "epoch": 2.8669867269133014, + "grad_norm": 3.399469177816294, + "learning_rate": 1.0293270886024276e-07, + "loss": 0.1357, + "step": 20304 + }, + { + "epoch": 2.8671279299632872, + "grad_norm": 3.6440333183144067, + "learning_rate": 1.0271465877041198e-07, + "loss": 0.1808, + "step": 20305 + }, + { + "epoch": 2.867269133013273, + "grad_norm": 2.7916391187280203, + "learning_rate": 1.0249683868921667e-07, + "loss": 0.1366, + "step": 20306 + }, + { + "epoch": 2.867410336063259, + "grad_norm": 3.6069155505781105, + "learning_rate": 1.0227924862172057e-07, + "loss": 0.1568, + "step": 20307 + }, + { + "epoch": 2.867551539113245, + "grad_norm": 3.013128405519665, + "learning_rate": 1.0206188857298182e-07, + "loss": 0.1306, + "step": 20308 + }, + { + "epoch": 2.867692742163231, + "grad_norm": 3.9342917486679077, + "learning_rate": 1.0184475854804865e-07, + "loss": 0.1844, + "step": 20309 + }, + { + "epoch": 2.8678339452132167, + "grad_norm": 3.136156327270639, + "learning_rate": 1.0162785855197032e-07, + "loss": 0.1324, + "step": 20310 + }, + { + "epoch": 2.8679751482632025, + "grad_norm": 3.970606890095133, + "learning_rate": 1.0141118858978393e-07, + "loss": 0.2008, + "step": 20311 + }, + { + "epoch": 2.8681163513131884, + "grad_norm": 3.3025868895949766, + "learning_rate": 1.0119474866652767e-07, + "loss": 0.1686, + "step": 20312 + }, + { + "epoch": 2.8682575543631743, + "grad_norm": 3.2152804769213397, + "learning_rate": 1.0097853878722975e-07, + "loss": 0.1717, + "step": 20313 + }, + { + "epoch": 2.86839875741316, + "grad_norm": 3.297453634720773, + "learning_rate": 1.0076255895691611e-07, + "loss": 0.1234, + "step": 20314 + }, + { + "epoch": 2.868539960463146, + "grad_norm": 2.83714541619477, + "learning_rate": 1.00546809180605e-07, + "loss": 0.1431, + "step": 20315 + }, + { + "epoch": 2.868681163513132, + "grad_norm": 3.276328357241232, + "learning_rate": 1.0033128946331128e-07, + "loss": 0.1871, + "step": 20316 + }, + { + "epoch": 2.868822366563118, + "grad_norm": 2.531575023822282, + "learning_rate": 1.0011599981004317e-07, + "loss": 0.0922, + "step": 20317 + }, + { + "epoch": 2.8689635696131037, + "grad_norm": 3.4027875098884532, + "learning_rate": 9.990094022580332e-08, + "loss": 0.1374, + "step": 20318 + }, + { + "epoch": 2.8691047726630896, + "grad_norm": 3.1780673761123146, + "learning_rate": 9.968611071558998e-08, + "loss": 0.1328, + "step": 20319 + }, + { + "epoch": 2.8692459757130755, + "grad_norm": 3.838171258226563, + "learning_rate": 9.947151128439692e-08, + "loss": 0.142, + "step": 20320 + }, + { + "epoch": 2.8693871787630614, + "grad_norm": 2.529865573428903, + "learning_rate": 9.925714193720904e-08, + "loss": 0.0871, + "step": 20321 + }, + { + "epoch": 2.8695283818130473, + "grad_norm": 3.2166928719700802, + "learning_rate": 9.904300267901012e-08, + "loss": 0.1331, + "step": 20322 + }, + { + "epoch": 2.869669584863033, + "grad_norm": 3.0174225870271636, + "learning_rate": 9.88290935147751e-08, + "loss": 0.1396, + "step": 20323 + }, + { + "epoch": 2.869810787913019, + "grad_norm": 2.963454211902711, + "learning_rate": 9.861541444947554e-08, + "loss": 0.13, + "step": 20324 + }, + { + "epoch": 2.869951990963005, + "grad_norm": 3.32357811434459, + "learning_rate": 9.840196548807857e-08, + "loss": 0.1167, + "step": 20325 + }, + { + "epoch": 2.870093194012991, + "grad_norm": 3.33142322928062, + "learning_rate": 9.818874663554356e-08, + "loss": 0.1332, + "step": 20326 + }, + { + "epoch": 2.8702343970629767, + "grad_norm": 3.459463599717467, + "learning_rate": 9.797575789682657e-08, + "loss": 0.155, + "step": 20327 + }, + { + "epoch": 2.8703756001129626, + "grad_norm": 2.4783131173873416, + "learning_rate": 9.776299927687694e-08, + "loss": 0.1197, + "step": 20328 + }, + { + "epoch": 2.8705168031629484, + "grad_norm": 3.364299098674183, + "learning_rate": 9.755047078063629e-08, + "loss": 0.1589, + "step": 20329 + }, + { + "epoch": 2.8706580062129343, + "grad_norm": 2.5742003912414413, + "learning_rate": 9.733817241304844e-08, + "loss": 0.1232, + "step": 20330 + }, + { + "epoch": 2.87079920926292, + "grad_norm": 3.2050237092980023, + "learning_rate": 9.712610417904389e-08, + "loss": 0.1353, + "step": 20331 + }, + { + "epoch": 2.870940412312906, + "grad_norm": 3.5066045863791144, + "learning_rate": 9.691426608355203e-08, + "loss": 0.1591, + "step": 20332 + }, + { + "epoch": 2.871081615362892, + "grad_norm": 3.456602117917055, + "learning_rate": 9.67026581314956e-08, + "loss": 0.1417, + "step": 20333 + }, + { + "epoch": 2.871222818412878, + "grad_norm": 3.4035098836102122, + "learning_rate": 9.649128032779287e-08, + "loss": 0.1575, + "step": 20334 + }, + { + "epoch": 2.8713640214628637, + "grad_norm": 3.4944405134582874, + "learning_rate": 9.628013267735658e-08, + "loss": 0.1504, + "step": 20335 + }, + { + "epoch": 2.8715052245128496, + "grad_norm": 2.7666618155300946, + "learning_rate": 9.606921518509172e-08, + "loss": 0.1162, + "step": 20336 + }, + { + "epoch": 2.8716464275628355, + "grad_norm": 2.7690167900919023, + "learning_rate": 9.5858527855901e-08, + "loss": 0.1044, + "step": 20337 + }, + { + "epoch": 2.8717876306128214, + "grad_norm": 3.237584405385272, + "learning_rate": 9.564807069468163e-08, + "loss": 0.1334, + "step": 20338 + }, + { + "epoch": 2.8719288336628073, + "grad_norm": 3.550702046761701, + "learning_rate": 9.543784370632414e-08, + "loss": 0.1129, + "step": 20339 + }, + { + "epoch": 2.872070036712793, + "grad_norm": 3.2501926234483745, + "learning_rate": 9.52278468957124e-08, + "loss": 0.1574, + "step": 20340 + }, + { + "epoch": 2.872211239762779, + "grad_norm": 3.641556255961306, + "learning_rate": 9.501808026772808e-08, + "loss": 0.1746, + "step": 20341 + }, + { + "epoch": 2.872352442812765, + "grad_norm": 3.4230548729522288, + "learning_rate": 9.480854382724613e-08, + "loss": 0.1659, + "step": 20342 + }, + { + "epoch": 2.872493645862751, + "grad_norm": 3.6955646338424137, + "learning_rate": 9.459923757913603e-08, + "loss": 0.1877, + "step": 20343 + }, + { + "epoch": 2.8726348489127362, + "grad_norm": 3.26625329909185, + "learning_rate": 9.439016152826275e-08, + "loss": 0.1396, + "step": 20344 + }, + { + "epoch": 2.872776051962722, + "grad_norm": 4.23319242421209, + "learning_rate": 9.418131567948352e-08, + "loss": 0.1876, + "step": 20345 + }, + { + "epoch": 2.872917255012708, + "grad_norm": 3.242569105256127, + "learning_rate": 9.397270003765224e-08, + "loss": 0.1494, + "step": 20346 + }, + { + "epoch": 2.873058458062694, + "grad_norm": 4.096917325179497, + "learning_rate": 9.376431460761725e-08, + "loss": 0.216, + "step": 20347 + }, + { + "epoch": 2.8731996611126798, + "grad_norm": 3.1564857961057653, + "learning_rate": 9.355615939422135e-08, + "loss": 0.1263, + "step": 20348 + }, + { + "epoch": 2.8733408641626657, + "grad_norm": 4.059766000741964, + "learning_rate": 9.334823440230289e-08, + "loss": 0.1758, + "step": 20349 + }, + { + "epoch": 2.8734820672126515, + "grad_norm": 3.5973189736570474, + "learning_rate": 9.314053963669245e-08, + "loss": 0.1472, + "step": 20350 + }, + { + "epoch": 2.8736232702626374, + "grad_norm": 2.9132391805538536, + "learning_rate": 9.293307510221727e-08, + "loss": 0.1093, + "step": 20351 + }, + { + "epoch": 2.8737644733126233, + "grad_norm": 2.5320790848846984, + "learning_rate": 9.272584080370018e-08, + "loss": 0.1163, + "step": 20352 + }, + { + "epoch": 2.873905676362609, + "grad_norm": 3.0877767563428407, + "learning_rate": 9.251883674595396e-08, + "loss": 0.1359, + "step": 20353 + }, + { + "epoch": 2.874046879412595, + "grad_norm": 3.332206041194482, + "learning_rate": 9.231206293379257e-08, + "loss": 0.1482, + "step": 20354 + }, + { + "epoch": 2.874188082462581, + "grad_norm": 3.4669665810571444, + "learning_rate": 9.210551937201995e-08, + "loss": 0.1177, + "step": 20355 + }, + { + "epoch": 2.874329285512567, + "grad_norm": 3.106761253166471, + "learning_rate": 9.189920606543556e-08, + "loss": 0.1564, + "step": 20356 + }, + { + "epoch": 2.8744704885625527, + "grad_norm": 4.157905161075009, + "learning_rate": 9.16931230188356e-08, + "loss": 0.2154, + "step": 20357 + }, + { + "epoch": 2.8746116916125386, + "grad_norm": 2.7239832959328583, + "learning_rate": 9.148727023700731e-08, + "loss": 0.1365, + "step": 20358 + }, + { + "epoch": 2.8747528946625245, + "grad_norm": 2.513485938518859, + "learning_rate": 9.12816477247358e-08, + "loss": 0.1254, + "step": 20359 + }, + { + "epoch": 2.8748940977125104, + "grad_norm": 2.9932638451657483, + "learning_rate": 9.107625548679944e-08, + "loss": 0.1122, + "step": 20360 + }, + { + "epoch": 2.8750353007624962, + "grad_norm": 2.8616897601905236, + "learning_rate": 9.087109352797329e-08, + "loss": 0.1179, + "step": 20361 + }, + { + "epoch": 2.875176503812482, + "grad_norm": 3.8571391579898586, + "learning_rate": 9.066616185302246e-08, + "loss": 0.1952, + "step": 20362 + }, + { + "epoch": 2.875317706862468, + "grad_norm": 3.680556762866874, + "learning_rate": 9.046146046670979e-08, + "loss": 0.1741, + "step": 20363 + }, + { + "epoch": 2.875458909912454, + "grad_norm": 3.525354760319563, + "learning_rate": 9.025698937379368e-08, + "loss": 0.1542, + "step": 20364 + }, + { + "epoch": 2.87560011296244, + "grad_norm": 4.01415818337065, + "learning_rate": 9.005274857902479e-08, + "loss": 0.1551, + "step": 20365 + }, + { + "epoch": 2.8757413160124257, + "grad_norm": 3.837528581370704, + "learning_rate": 8.984873808715155e-08, + "loss": 0.1694, + "step": 20366 + }, + { + "epoch": 2.8758825190624115, + "grad_norm": 3.5817699900635573, + "learning_rate": 8.96449579029135e-08, + "loss": 0.1496, + "step": 20367 + }, + { + "epoch": 2.8760237221123974, + "grad_norm": 3.3972088894365773, + "learning_rate": 8.944140803104573e-08, + "loss": 0.1422, + "step": 20368 + }, + { + "epoch": 2.8761649251623833, + "grad_norm": 3.259956126192769, + "learning_rate": 8.923808847628002e-08, + "loss": 0.136, + "step": 20369 + }, + { + "epoch": 2.876306128212369, + "grad_norm": 3.231640709198301, + "learning_rate": 8.903499924334147e-08, + "loss": 0.1316, + "step": 20370 + }, + { + "epoch": 2.876447331262355, + "grad_norm": 3.152697892357744, + "learning_rate": 8.883214033694964e-08, + "loss": 0.1402, + "step": 20371 + }, + { + "epoch": 2.876588534312341, + "grad_norm": 3.2566760114057156, + "learning_rate": 8.862951176181744e-08, + "loss": 0.1229, + "step": 20372 + }, + { + "epoch": 2.876729737362327, + "grad_norm": 2.669408121753116, + "learning_rate": 8.842711352265554e-08, + "loss": 0.1084, + "step": 20373 + }, + { + "epoch": 2.8768709404123127, + "grad_norm": 3.1378763379041774, + "learning_rate": 8.822494562416684e-08, + "loss": 0.1214, + "step": 20374 + }, + { + "epoch": 2.8770121434622986, + "grad_norm": 2.713762352623823, + "learning_rate": 8.80230080710498e-08, + "loss": 0.1313, + "step": 20375 + }, + { + "epoch": 2.8771533465122845, + "grad_norm": 3.5840606190416717, + "learning_rate": 8.782130086799734e-08, + "loss": 0.1936, + "step": 20376 + }, + { + "epoch": 2.8772945495622704, + "grad_norm": 2.835259407698947, + "learning_rate": 8.761982401969793e-08, + "loss": 0.1267, + "step": 20377 + }, + { + "epoch": 2.8774357526122563, + "grad_norm": 3.863618391732048, + "learning_rate": 8.741857753083228e-08, + "loss": 0.1983, + "step": 20378 + }, + { + "epoch": 2.877576955662242, + "grad_norm": 3.50569147256025, + "learning_rate": 8.721756140607885e-08, + "loss": 0.1579, + "step": 20379 + }, + { + "epoch": 2.877718158712228, + "grad_norm": 3.859532790382486, + "learning_rate": 8.701677565010725e-08, + "loss": 0.168, + "step": 20380 + }, + { + "epoch": 2.877859361762214, + "grad_norm": 3.6965802488413875, + "learning_rate": 8.681622026758485e-08, + "loss": 0.1845, + "step": 20381 + }, + { + "epoch": 2.8780005648122, + "grad_norm": 2.4554546170724625, + "learning_rate": 8.661589526317238e-08, + "loss": 0.1057, + "step": 20382 + }, + { + "epoch": 2.8781417678621857, + "grad_norm": 3.2592594123177916, + "learning_rate": 8.641580064152499e-08, + "loss": 0.1567, + "step": 20383 + }, + { + "epoch": 2.8782829709121716, + "grad_norm": 3.2083885887708887, + "learning_rate": 8.621593640729343e-08, + "loss": 0.1433, + "step": 20384 + }, + { + "epoch": 2.8784241739621574, + "grad_norm": 3.2624157058203296, + "learning_rate": 8.601630256512173e-08, + "loss": 0.1804, + "step": 20385 + }, + { + "epoch": 2.8785653770121433, + "grad_norm": 2.8779474363969806, + "learning_rate": 8.581689911965063e-08, + "loss": 0.1672, + "step": 20386 + }, + { + "epoch": 2.878706580062129, + "grad_norm": 3.104528268880012, + "learning_rate": 8.5617726075512e-08, + "loss": 0.1257, + "step": 20387 + }, + { + "epoch": 2.878847783112115, + "grad_norm": 3.257602854277619, + "learning_rate": 8.541878343733656e-08, + "loss": 0.1401, + "step": 20388 + }, + { + "epoch": 2.878988986162101, + "grad_norm": 2.4494926906812355, + "learning_rate": 8.522007120974617e-08, + "loss": 0.1076, + "step": 20389 + }, + { + "epoch": 2.879130189212087, + "grad_norm": 3.2663072041803245, + "learning_rate": 8.502158939736049e-08, + "loss": 0.1223, + "step": 20390 + }, + { + "epoch": 2.8792713922620727, + "grad_norm": 3.1817376048543276, + "learning_rate": 8.482333800479026e-08, + "loss": 0.1455, + "step": 20391 + }, + { + "epoch": 2.8794125953120586, + "grad_norm": 3.097772185608281, + "learning_rate": 8.46253170366429e-08, + "loss": 0.134, + "step": 20392 + }, + { + "epoch": 2.8795537983620445, + "grad_norm": 3.1292624237295996, + "learning_rate": 8.442752649752139e-08, + "loss": 0.1228, + "step": 20393 + }, + { + "epoch": 2.8796950014120304, + "grad_norm": 2.5528454392126716, + "learning_rate": 8.422996639202318e-08, + "loss": 0.1205, + "step": 20394 + }, + { + "epoch": 2.8798362044620163, + "grad_norm": 3.362113758666691, + "learning_rate": 8.403263672473793e-08, + "loss": 0.1484, + "step": 20395 + }, + { + "epoch": 2.879977407512002, + "grad_norm": 3.768971394779968, + "learning_rate": 8.383553750025198e-08, + "loss": 0.1479, + "step": 20396 + }, + { + "epoch": 2.880118610561988, + "grad_norm": 2.7205561925490893, + "learning_rate": 8.363866872314497e-08, + "loss": 0.1122, + "step": 20397 + }, + { + "epoch": 2.880259813611974, + "grad_norm": 3.3011194874606993, + "learning_rate": 8.344203039799214e-08, + "loss": 0.1551, + "step": 20398 + }, + { + "epoch": 2.88040101666196, + "grad_norm": 3.0203151332107416, + "learning_rate": 8.32456225293643e-08, + "loss": 0.1566, + "step": 20399 + }, + { + "epoch": 2.8805422197119457, + "grad_norm": 3.079257760107595, + "learning_rate": 8.304944512182666e-08, + "loss": 0.1291, + "step": 20400 + }, + { + "epoch": 2.8806834227619316, + "grad_norm": 2.71234672167962, + "learning_rate": 8.28534981799356e-08, + "loss": 0.1089, + "step": 20401 + }, + { + "epoch": 2.8808246258119174, + "grad_norm": 3.0497759462456657, + "learning_rate": 8.265778170824746e-08, + "loss": 0.135, + "step": 20402 + }, + { + "epoch": 2.8809658288619033, + "grad_norm": 3.672926602840041, + "learning_rate": 8.24622957113086e-08, + "loss": 0.1964, + "step": 20403 + }, + { + "epoch": 2.881107031911889, + "grad_norm": 3.480010937347849, + "learning_rate": 8.226704019366427e-08, + "loss": 0.1646, + "step": 20404 + }, + { + "epoch": 2.881248234961875, + "grad_norm": 3.5029111691103036, + "learning_rate": 8.207201515984975e-08, + "loss": 0.1526, + "step": 20405 + }, + { + "epoch": 2.881389438011861, + "grad_norm": 2.8028669015685903, + "learning_rate": 8.187722061439806e-08, + "loss": 0.1341, + "step": 20406 + }, + { + "epoch": 2.881530641061847, + "grad_norm": 2.911891182187555, + "learning_rate": 8.168265656183783e-08, + "loss": 0.1536, + "step": 20407 + }, + { + "epoch": 2.8816718441118327, + "grad_norm": 2.689927866483319, + "learning_rate": 8.148832300668763e-08, + "loss": 0.1645, + "step": 20408 + }, + { + "epoch": 2.8818130471618186, + "grad_norm": 2.496662417374966, + "learning_rate": 8.129421995346609e-08, + "loss": 0.1086, + "step": 20409 + }, + { + "epoch": 2.8819542502118045, + "grad_norm": 3.435291686232006, + "learning_rate": 8.110034740668293e-08, + "loss": 0.1445, + "step": 20410 + }, + { + "epoch": 2.8820954532617904, + "grad_norm": 3.1480099953221212, + "learning_rate": 8.090670537084455e-08, + "loss": 0.1388, + "step": 20411 + }, + { + "epoch": 2.8822366563117763, + "grad_norm": 3.3802958207009235, + "learning_rate": 8.071329385045068e-08, + "loss": 0.1186, + "step": 20412 + }, + { + "epoch": 2.882377859361762, + "grad_norm": 3.0299613977686866, + "learning_rate": 8.052011284999661e-08, + "loss": 0.1635, + "step": 20413 + }, + { + "epoch": 2.882519062411748, + "grad_norm": 2.797734570472381, + "learning_rate": 8.032716237396987e-08, + "loss": 0.1287, + "step": 20414 + }, + { + "epoch": 2.882660265461734, + "grad_norm": 3.0674494840440403, + "learning_rate": 8.013444242685686e-08, + "loss": 0.1274, + "step": 20415 + }, + { + "epoch": 2.88280146851172, + "grad_norm": 3.570408554229105, + "learning_rate": 7.9941953013134e-08, + "loss": 0.1341, + "step": 20416 + }, + { + "epoch": 2.8829426715617057, + "grad_norm": 2.6160332332851692, + "learning_rate": 7.974969413727773e-08, + "loss": 0.1012, + "step": 20417 + }, + { + "epoch": 2.8830838746116916, + "grad_norm": 3.1600160253748526, + "learning_rate": 7.955766580375334e-08, + "loss": 0.1219, + "step": 20418 + }, + { + "epoch": 2.8832250776616775, + "grad_norm": 3.5516690128159762, + "learning_rate": 7.936586801702507e-08, + "loss": 0.1553, + "step": 20419 + }, + { + "epoch": 2.8833662807116633, + "grad_norm": 3.201764644214429, + "learning_rate": 7.91743007815493e-08, + "loss": 0.1403, + "step": 20420 + }, + { + "epoch": 2.8835074837616492, + "grad_norm": 3.4835164755404313, + "learning_rate": 7.898296410177808e-08, + "loss": 0.1588, + "step": 20421 + }, + { + "epoch": 2.883648686811635, + "grad_norm": 2.669971832620358, + "learning_rate": 7.879185798215894e-08, + "loss": 0.1186, + "step": 20422 + }, + { + "epoch": 2.883789889861621, + "grad_norm": 3.226234347399841, + "learning_rate": 7.860098242713165e-08, + "loss": 0.1292, + "step": 20423 + }, + { + "epoch": 2.883931092911607, + "grad_norm": 3.6306433597131176, + "learning_rate": 7.841033744113268e-08, + "loss": 0.179, + "step": 20424 + }, + { + "epoch": 2.8840722959615928, + "grad_norm": 3.288603361273725, + "learning_rate": 7.821992302859405e-08, + "loss": 0.1428, + "step": 20425 + }, + { + "epoch": 2.8842134990115786, + "grad_norm": 3.4295717963947254, + "learning_rate": 7.802973919393775e-08, + "loss": 0.1485, + "step": 20426 + }, + { + "epoch": 2.8843547020615645, + "grad_norm": 2.8880597218069433, + "learning_rate": 7.783978594158581e-08, + "loss": 0.149, + "step": 20427 + }, + { + "epoch": 2.8844959051115504, + "grad_norm": 3.473708487842749, + "learning_rate": 7.765006327595248e-08, + "loss": 0.1715, + "step": 20428 + }, + { + "epoch": 2.8846371081615363, + "grad_norm": 2.8780097829298894, + "learning_rate": 7.746057120144757e-08, + "loss": 0.1179, + "step": 20429 + }, + { + "epoch": 2.884778311211522, + "grad_norm": 3.3571081234988167, + "learning_rate": 7.727130972247199e-08, + "loss": 0.1499, + "step": 20430 + }, + { + "epoch": 2.884919514261508, + "grad_norm": 4.520239199347437, + "learning_rate": 7.708227884342667e-08, + "loss": 0.2158, + "step": 20431 + }, + { + "epoch": 2.885060717311494, + "grad_norm": 3.2602143715118816, + "learning_rate": 7.689347856870366e-08, + "loss": 0.1592, + "step": 20432 + }, + { + "epoch": 2.88520192036148, + "grad_norm": 2.792615806408365, + "learning_rate": 7.670490890269055e-08, + "loss": 0.1239, + "step": 20433 + }, + { + "epoch": 2.8853431234114657, + "grad_norm": 3.317346808137323, + "learning_rate": 7.651656984977051e-08, + "loss": 0.1701, + "step": 20434 + }, + { + "epoch": 2.8854843264614516, + "grad_norm": 3.3121621295514805, + "learning_rate": 7.632846141432004e-08, + "loss": 0.165, + "step": 20435 + }, + { + "epoch": 2.8856255295114375, + "grad_norm": 3.627267679816503, + "learning_rate": 7.614058360070897e-08, + "loss": 0.1699, + "step": 20436 + }, + { + "epoch": 2.8857667325614234, + "grad_norm": 2.541823423977845, + "learning_rate": 7.595293641330714e-08, + "loss": 0.1189, + "step": 20437 + }, + { + "epoch": 2.8859079356114092, + "grad_norm": 3.371866772937369, + "learning_rate": 7.576551985647107e-08, + "loss": 0.1627, + "step": 20438 + }, + { + "epoch": 2.886049138661395, + "grad_norm": 3.0140698138967434, + "learning_rate": 7.557833393455838e-08, + "loss": 0.146, + "step": 20439 + }, + { + "epoch": 2.886190341711381, + "grad_norm": 3.3115059466649606, + "learning_rate": 7.539137865192003e-08, + "loss": 0.1246, + "step": 20440 + }, + { + "epoch": 2.886331544761367, + "grad_norm": 2.5562396716782274, + "learning_rate": 7.520465401290033e-08, + "loss": 0.1155, + "step": 20441 + }, + { + "epoch": 2.8864727478113528, + "grad_norm": 3.4525700310216636, + "learning_rate": 7.501816002183803e-08, + "loss": 0.1483, + "step": 20442 + }, + { + "epoch": 2.8866139508613387, + "grad_norm": 3.246805829556253, + "learning_rate": 7.483189668306635e-08, + "loss": 0.1643, + "step": 20443 + }, + { + "epoch": 2.8867551539113245, + "grad_norm": 3.036268791761304, + "learning_rate": 7.464586400091623e-08, + "loss": 0.1211, + "step": 20444 + }, + { + "epoch": 2.8868963569613104, + "grad_norm": 2.8018187823484957, + "learning_rate": 7.446006197970867e-08, + "loss": 0.0977, + "step": 20445 + }, + { + "epoch": 2.8870375600112963, + "grad_norm": 3.6775682429886585, + "learning_rate": 7.427449062376468e-08, + "loss": 0.1711, + "step": 20446 + }, + { + "epoch": 2.887178763061282, + "grad_norm": 3.2024177711186086, + "learning_rate": 7.408914993739303e-08, + "loss": 0.1249, + "step": 20447 + }, + { + "epoch": 2.887319966111268, + "grad_norm": 3.2107032137662115, + "learning_rate": 7.390403992490358e-08, + "loss": 0.1408, + "step": 20448 + }, + { + "epoch": 2.887461169161254, + "grad_norm": 2.9117696689707167, + "learning_rate": 7.371916059059847e-08, + "loss": 0.126, + "step": 20449 + }, + { + "epoch": 2.88760237221124, + "grad_norm": 2.8832625732627757, + "learning_rate": 7.353451193877092e-08, + "loss": 0.15, + "step": 20450 + }, + { + "epoch": 2.8877435752612257, + "grad_norm": 3.393575700983024, + "learning_rate": 7.33500939737164e-08, + "loss": 0.2014, + "step": 20451 + }, + { + "epoch": 2.8878847783112116, + "grad_norm": 3.5786111034923875, + "learning_rate": 7.316590669971813e-08, + "loss": 0.1421, + "step": 20452 + }, + { + "epoch": 2.8880259813611975, + "grad_norm": 4.086293983587457, + "learning_rate": 7.298195012105713e-08, + "loss": 0.1734, + "step": 20453 + }, + { + "epoch": 2.8881671844111834, + "grad_norm": 3.2527230166278374, + "learning_rate": 7.279822424200889e-08, + "loss": 0.1261, + "step": 20454 + }, + { + "epoch": 2.8883083874611692, + "grad_norm": 3.074007960174228, + "learning_rate": 7.261472906684108e-08, + "loss": 0.1408, + "step": 20455 + }, + { + "epoch": 2.888449590511155, + "grad_norm": 4.259213897994005, + "learning_rate": 7.243146459982142e-08, + "loss": 0.1841, + "step": 20456 + }, + { + "epoch": 2.888590793561141, + "grad_norm": 2.9888962442678344, + "learning_rate": 7.224843084520649e-08, + "loss": 0.1443, + "step": 20457 + }, + { + "epoch": 2.888731996611127, + "grad_norm": 3.174048788913956, + "learning_rate": 7.206562780725068e-08, + "loss": 0.165, + "step": 20458 + }, + { + "epoch": 2.888873199661113, + "grad_norm": 3.349364519204902, + "learning_rate": 7.18830554902017e-08, + "loss": 0.1373, + "step": 20459 + }, + { + "epoch": 2.8890144027110987, + "grad_norm": 3.729323360519263, + "learning_rate": 7.17007138983028e-08, + "loss": 0.1566, + "step": 20460 + }, + { + "epoch": 2.8891556057610845, + "grad_norm": 3.0511628559730744, + "learning_rate": 7.151860303579283e-08, + "loss": 0.1425, + "step": 20461 + }, + { + "epoch": 2.8892968088110704, + "grad_norm": 4.625338837018194, + "learning_rate": 7.133672290690064e-08, + "loss": 0.159, + "step": 20462 + }, + { + "epoch": 2.8894380118610563, + "grad_norm": 3.0466701860429115, + "learning_rate": 7.115507351585727e-08, + "loss": 0.1335, + "step": 20463 + }, + { + "epoch": 2.889579214911042, + "grad_norm": 3.539604967695227, + "learning_rate": 7.097365486688158e-08, + "loss": 0.1634, + "step": 20464 + }, + { + "epoch": 2.889720417961028, + "grad_norm": 2.755663442756148, + "learning_rate": 7.079246696418906e-08, + "loss": 0.1742, + "step": 20465 + }, + { + "epoch": 2.889861621011014, + "grad_norm": 3.410707496183685, + "learning_rate": 7.061150981199194e-08, + "loss": 0.1562, + "step": 20466 + }, + { + "epoch": 2.890002824061, + "grad_norm": 2.9440052357002893, + "learning_rate": 7.043078341449572e-08, + "loss": 0.1179, + "step": 20467 + }, + { + "epoch": 2.8901440271109857, + "grad_norm": 3.5253782786270595, + "learning_rate": 7.025028777589926e-08, + "loss": 0.1769, + "step": 20468 + }, + { + "epoch": 2.8902852301609716, + "grad_norm": 4.939303099223028, + "learning_rate": 7.00700229003981e-08, + "loss": 0.2284, + "step": 20469 + }, + { + "epoch": 2.8904264332109575, + "grad_norm": 3.1588728121618685, + "learning_rate": 6.988998879218111e-08, + "loss": 0.1413, + "step": 20470 + }, + { + "epoch": 2.8905676362609434, + "grad_norm": 2.8564319047344098, + "learning_rate": 6.971018545543273e-08, + "loss": 0.1428, + "step": 20471 + }, + { + "epoch": 2.8907088393109293, + "grad_norm": 2.246405780771575, + "learning_rate": 6.953061289433072e-08, + "loss": 0.0955, + "step": 20472 + }, + { + "epoch": 2.890850042360915, + "grad_norm": 3.268887350689115, + "learning_rate": 6.93512711130484e-08, + "loss": 0.1682, + "step": 20473 + }, + { + "epoch": 2.890991245410901, + "grad_norm": 3.3161696418291466, + "learning_rate": 6.917216011575357e-08, + "loss": 0.1638, + "step": 20474 + }, + { + "epoch": 2.891132448460887, + "grad_norm": 4.039205924603781, + "learning_rate": 6.899327990660953e-08, + "loss": 0.1999, + "step": 20475 + }, + { + "epoch": 2.891273651510873, + "grad_norm": 3.2633857839496, + "learning_rate": 6.8814630489773e-08, + "loss": 0.1535, + "step": 20476 + }, + { + "epoch": 2.8914148545608587, + "grad_norm": 3.2603687812863074, + "learning_rate": 6.863621186939506e-08, + "loss": 0.1216, + "step": 20477 + }, + { + "epoch": 2.8915560576108446, + "grad_norm": 3.080963820471434, + "learning_rate": 6.845802404962243e-08, + "loss": 0.1097, + "step": 20478 + }, + { + "epoch": 2.8916972606608304, + "grad_norm": 3.238059986106422, + "learning_rate": 6.828006703459622e-08, + "loss": 0.1538, + "step": 20479 + }, + { + "epoch": 2.8918384637108163, + "grad_norm": 2.7824932750983304, + "learning_rate": 6.810234082845313e-08, + "loss": 0.1189, + "step": 20480 + }, + { + "epoch": 2.891979666760802, + "grad_norm": 2.767215899042566, + "learning_rate": 6.792484543532096e-08, + "loss": 0.1277, + "step": 20481 + }, + { + "epoch": 2.892120869810788, + "grad_norm": 3.0587162095894596, + "learning_rate": 6.774758085932642e-08, + "loss": 0.1715, + "step": 20482 + }, + { + "epoch": 2.892262072860774, + "grad_norm": 3.0856715704051294, + "learning_rate": 6.757054710458955e-08, + "loss": 0.1154, + "step": 20483 + }, + { + "epoch": 2.89240327591076, + "grad_norm": 3.7429884957504793, + "learning_rate": 6.73937441752226e-08, + "loss": 0.1335, + "step": 20484 + }, + { + "epoch": 2.8925444789607457, + "grad_norm": 2.9677825162101588, + "learning_rate": 6.721717207533563e-08, + "loss": 0.1112, + "step": 20485 + }, + { + "epoch": 2.8926856820107316, + "grad_norm": 2.6400047096170787, + "learning_rate": 6.704083080903201e-08, + "loss": 0.1257, + "step": 20486 + }, + { + "epoch": 2.8928268850607175, + "grad_norm": 2.7548952586345603, + "learning_rate": 6.68647203804107e-08, + "loss": 0.1141, + "step": 20487 + }, + { + "epoch": 2.8929680881107034, + "grad_norm": 3.5257397342755192, + "learning_rate": 6.668884079356287e-08, + "loss": 0.1751, + "step": 20488 + }, + { + "epoch": 2.8931092911606893, + "grad_norm": 3.4157519565920924, + "learning_rate": 6.651319205257633e-08, + "loss": 0.1406, + "step": 20489 + }, + { + "epoch": 2.893250494210675, + "grad_norm": 2.9373819735385043, + "learning_rate": 6.633777416153232e-08, + "loss": 0.1074, + "step": 20490 + }, + { + "epoch": 2.893391697260661, + "grad_norm": 2.6086504842958833, + "learning_rate": 6.616258712450973e-08, + "loss": 0.1406, + "step": 20491 + }, + { + "epoch": 2.893532900310647, + "grad_norm": 2.353247234724596, + "learning_rate": 6.59876309455787e-08, + "loss": 0.1016, + "step": 20492 + }, + { + "epoch": 2.893674103360633, + "grad_norm": 3.2037996857375735, + "learning_rate": 6.581290562880372e-08, + "loss": 0.1406, + "step": 20493 + }, + { + "epoch": 2.8938153064106187, + "grad_norm": 3.067531594511451, + "learning_rate": 6.56384111782482e-08, + "loss": 0.1637, + "step": 20494 + }, + { + "epoch": 2.8939565094606046, + "grad_norm": 2.9093622091328597, + "learning_rate": 6.546414759796448e-08, + "loss": 0.1209, + "step": 20495 + }, + { + "epoch": 2.8940977125105904, + "grad_norm": 3.3240384152724465, + "learning_rate": 6.529011489200377e-08, + "loss": 0.117, + "step": 20496 + }, + { + "epoch": 2.8942389155605763, + "grad_norm": 3.3032984332072552, + "learning_rate": 6.511631306441058e-08, + "loss": 0.1687, + "step": 20497 + }, + { + "epoch": 2.894380118610562, + "grad_norm": 3.1027684887947524, + "learning_rate": 6.494274211922392e-08, + "loss": 0.1489, + "step": 20498 + }, + { + "epoch": 2.894521321660548, + "grad_norm": 3.075119500500554, + "learning_rate": 6.476940206047722e-08, + "loss": 0.1738, + "step": 20499 + }, + { + "epoch": 2.894662524710534, + "grad_norm": 3.491713722629365, + "learning_rate": 6.459629289219838e-08, + "loss": 0.1576, + "step": 20500 + }, + { + "epoch": 2.89480372776052, + "grad_norm": 3.023544705036932, + "learning_rate": 6.442341461841084e-08, + "loss": 0.1374, + "step": 20501 + }, + { + "epoch": 2.8949449308105057, + "grad_norm": 2.728131576731426, + "learning_rate": 6.425076724313251e-08, + "loss": 0.0948, + "step": 20502 + }, + { + "epoch": 2.8950861338604916, + "grad_norm": 4.135358863271494, + "learning_rate": 6.407835077037572e-08, + "loss": 0.153, + "step": 20503 + }, + { + "epoch": 2.8952273369104775, + "grad_norm": 2.870204425534631, + "learning_rate": 6.390616520414617e-08, + "loss": 0.1301, + "step": 20504 + }, + { + "epoch": 2.8953685399604634, + "grad_norm": 2.708127117657482, + "learning_rate": 6.373421054844842e-08, + "loss": 0.105, + "step": 20505 + }, + { + "epoch": 2.8955097430104493, + "grad_norm": 3.4564497029643535, + "learning_rate": 6.356248680727484e-08, + "loss": 0.1468, + "step": 20506 + }, + { + "epoch": 2.895650946060435, + "grad_norm": 3.3600931621531567, + "learning_rate": 6.339099398461778e-08, + "loss": 0.1108, + "step": 20507 + }, + { + "epoch": 2.895792149110421, + "grad_norm": 3.135136934310426, + "learning_rate": 6.321973208446298e-08, + "loss": 0.1202, + "step": 20508 + }, + { + "epoch": 2.895933352160407, + "grad_norm": 2.8108124745935563, + "learning_rate": 6.304870111079053e-08, + "loss": 0.1137, + "step": 20509 + }, + { + "epoch": 2.896074555210393, + "grad_norm": 2.092923482290512, + "learning_rate": 6.287790106757396e-08, + "loss": 0.0885, + "step": 20510 + }, + { + "epoch": 2.8962157582603787, + "grad_norm": 3.0261933405427897, + "learning_rate": 6.270733195878454e-08, + "loss": 0.1096, + "step": 20511 + }, + { + "epoch": 2.8963569613103646, + "grad_norm": 2.8067009564149363, + "learning_rate": 6.253699378838462e-08, + "loss": 0.1278, + "step": 20512 + }, + { + "epoch": 2.8964981643603505, + "grad_norm": 3.555494746221602, + "learning_rate": 6.23668865603333e-08, + "loss": 0.151, + "step": 20513 + }, + { + "epoch": 2.896639367410336, + "grad_norm": 2.702676450591189, + "learning_rate": 6.219701027858405e-08, + "loss": 0.1288, + "step": 20514 + }, + { + "epoch": 2.896780570460322, + "grad_norm": 3.345827631928737, + "learning_rate": 6.202736494708484e-08, + "loss": 0.1359, + "step": 20515 + }, + { + "epoch": 2.8969217735103077, + "grad_norm": 2.8478107297888267, + "learning_rate": 6.185795056977695e-08, + "loss": 0.1091, + "step": 20516 + }, + { + "epoch": 2.8970629765602935, + "grad_norm": 2.8027055545669914, + "learning_rate": 6.168876715059835e-08, + "loss": 0.1194, + "step": 20517 + }, + { + "epoch": 2.8972041796102794, + "grad_norm": 2.5689122611176476, + "learning_rate": 6.151981469348034e-08, + "loss": 0.1111, + "step": 20518 + }, + { + "epoch": 2.8973453826602653, + "grad_norm": 3.0549548478960116, + "learning_rate": 6.135109320235089e-08, + "loss": 0.1278, + "step": 20519 + }, + { + "epoch": 2.897486585710251, + "grad_norm": 2.6359749845202782, + "learning_rate": 6.118260268112908e-08, + "loss": 0.1282, + "step": 20520 + }, + { + "epoch": 2.897627788760237, + "grad_norm": 4.077794127699537, + "learning_rate": 6.101434313373178e-08, + "loss": 0.1514, + "step": 20521 + }, + { + "epoch": 2.897768991810223, + "grad_norm": 3.524269086398971, + "learning_rate": 6.084631456406919e-08, + "loss": 0.1549, + "step": 20522 + }, + { + "epoch": 2.897910194860209, + "grad_norm": 2.458890944174128, + "learning_rate": 6.067851697604599e-08, + "loss": 0.0995, + "step": 20523 + }, + { + "epoch": 2.8980513979101947, + "grad_norm": 3.5448931519412525, + "learning_rate": 6.051095037356013e-08, + "loss": 0.1395, + "step": 20524 + }, + { + "epoch": 2.8981926009601806, + "grad_norm": 2.516123939259161, + "learning_rate": 6.034361476050854e-08, + "loss": 0.0996, + "step": 20525 + }, + { + "epoch": 2.8983338040101665, + "grad_norm": 3.6325705908781725, + "learning_rate": 6.017651014077807e-08, + "loss": 0.1291, + "step": 20526 + }, + { + "epoch": 2.8984750070601524, + "grad_norm": 2.785311381226458, + "learning_rate": 6.000963651825343e-08, + "loss": 0.103, + "step": 20527 + }, + { + "epoch": 2.8986162101101383, + "grad_norm": 3.014158136556519, + "learning_rate": 5.984299389681148e-08, + "loss": 0.1028, + "step": 20528 + }, + { + "epoch": 2.898757413160124, + "grad_norm": 4.469046889064139, + "learning_rate": 5.96765822803258e-08, + "loss": 0.1745, + "step": 20529 + }, + { + "epoch": 2.89889861621011, + "grad_norm": 4.261606787965814, + "learning_rate": 5.951040167266331e-08, + "loss": 0.2166, + "step": 20530 + }, + { + "epoch": 2.899039819260096, + "grad_norm": 2.8366535666458903, + "learning_rate": 5.9344452077686464e-08, + "loss": 0.1137, + "step": 20531 + }, + { + "epoch": 2.899181022310082, + "grad_norm": 2.9425002978559354, + "learning_rate": 5.9178733499251073e-08, + "loss": 0.1232, + "step": 20532 + }, + { + "epoch": 2.8993222253600677, + "grad_norm": 3.510458337353858, + "learning_rate": 5.9013245941209606e-08, + "loss": 0.1416, + "step": 20533 + }, + { + "epoch": 2.8994634284100536, + "grad_norm": 3.185630911202359, + "learning_rate": 5.884798940740566e-08, + "loss": 0.1271, + "step": 20534 + }, + { + "epoch": 2.8996046314600394, + "grad_norm": 4.253389590310115, + "learning_rate": 5.868296390168282e-08, + "loss": 0.2083, + "step": 20535 + }, + { + "epoch": 2.8997458345100253, + "grad_norm": 2.916721573396054, + "learning_rate": 5.8518169427873584e-08, + "loss": 0.1247, + "step": 20536 + }, + { + "epoch": 2.899887037560011, + "grad_norm": 3.230876002981545, + "learning_rate": 5.8353605989808216e-08, + "loss": 0.1226, + "step": 20537 + }, + { + "epoch": 2.900028240609997, + "grad_norm": 3.6281781335381953, + "learning_rate": 5.8189273591312546e-08, + "loss": 0.2213, + "step": 20538 + }, + { + "epoch": 2.900169443659983, + "grad_norm": 2.689572629520733, + "learning_rate": 5.802517223620463e-08, + "loss": 0.1362, + "step": 20539 + }, + { + "epoch": 2.900310646709969, + "grad_norm": 3.5437054851037924, + "learning_rate": 5.786130192829809e-08, + "loss": 0.124, + "step": 20540 + }, + { + "epoch": 2.9004518497599547, + "grad_norm": 3.1720214747014523, + "learning_rate": 5.769766267140098e-08, + "loss": 0.1214, + "step": 20541 + }, + { + "epoch": 2.9005930528099406, + "grad_norm": 3.722765653954463, + "learning_rate": 5.753425446931582e-08, + "loss": 0.1467, + "step": 20542 + }, + { + "epoch": 2.9007342558599265, + "grad_norm": 3.508121745691823, + "learning_rate": 5.7371077325841795e-08, + "loss": 0.154, + "step": 20543 + }, + { + "epoch": 2.9008754589099124, + "grad_norm": 3.211094480738291, + "learning_rate": 5.7208131244769206e-08, + "loss": 0.1527, + "step": 20544 + }, + { + "epoch": 2.9010166619598983, + "grad_norm": 3.3009171935040826, + "learning_rate": 5.704541622988613e-08, + "loss": 0.1512, + "step": 20545 + }, + { + "epoch": 2.901157865009884, + "grad_norm": 3.560123632873036, + "learning_rate": 5.688293228497399e-08, + "loss": 0.146, + "step": 20546 + }, + { + "epoch": 2.90129906805987, + "grad_norm": 3.3610064312673034, + "learning_rate": 5.6720679413807546e-08, + "loss": 0.1418, + "step": 20547 + }, + { + "epoch": 2.901440271109856, + "grad_norm": 2.6780967734068053, + "learning_rate": 5.655865762015822e-08, + "loss": 0.1374, + "step": 20548 + }, + { + "epoch": 2.901581474159842, + "grad_norm": 2.8386528058332856, + "learning_rate": 5.6396866907791893e-08, + "loss": 0.1518, + "step": 20549 + }, + { + "epoch": 2.9017226772098277, + "grad_norm": 3.5994912872353693, + "learning_rate": 5.623530728046889e-08, + "loss": 0.1468, + "step": 20550 + }, + { + "epoch": 2.9018638802598136, + "grad_norm": 3.373388226146045, + "learning_rate": 5.607397874194176e-08, + "loss": 0.1266, + "step": 20551 + }, + { + "epoch": 2.9020050833097994, + "grad_norm": 3.2965930174226448, + "learning_rate": 5.591288129596195e-08, + "loss": 0.1574, + "step": 20552 + }, + { + "epoch": 2.9021462863597853, + "grad_norm": 3.47342503135177, + "learning_rate": 5.575201494627092e-08, + "loss": 0.166, + "step": 20553 + }, + { + "epoch": 2.902287489409771, + "grad_norm": 3.718683682231933, + "learning_rate": 5.559137969660899e-08, + "loss": 0.1697, + "step": 20554 + }, + { + "epoch": 2.902428692459757, + "grad_norm": 3.0037856051481095, + "learning_rate": 5.543097555070875e-08, + "loss": 0.1389, + "step": 20555 + }, + { + "epoch": 2.902569895509743, + "grad_norm": 3.7801174401943785, + "learning_rate": 5.527080251229833e-08, + "loss": 0.1817, + "step": 20556 + }, + { + "epoch": 2.902711098559729, + "grad_norm": 3.805432416613619, + "learning_rate": 5.511086058510029e-08, + "loss": 0.1742, + "step": 20557 + }, + { + "epoch": 2.9028523016097147, + "grad_norm": 3.187896854003314, + "learning_rate": 5.495114977282945e-08, + "loss": 0.1447, + "step": 20558 + }, + { + "epoch": 2.9029935046597006, + "grad_norm": 2.0275498532588063, + "learning_rate": 5.47916700791995e-08, + "loss": 0.078, + "step": 20559 + }, + { + "epoch": 2.9031347077096865, + "grad_norm": 3.2328042519877855, + "learning_rate": 5.4632421507916366e-08, + "loss": 0.1228, + "step": 20560 + }, + { + "epoch": 2.9032759107596724, + "grad_norm": 3.760951903687996, + "learning_rate": 5.447340406268042e-08, + "loss": 0.1724, + "step": 20561 + }, + { + "epoch": 2.9034171138096583, + "grad_norm": 3.4254710868456293, + "learning_rate": 5.43146177471876e-08, + "loss": 0.1647, + "step": 20562 + }, + { + "epoch": 2.903558316859644, + "grad_norm": 2.913810170412307, + "learning_rate": 5.4156062565128284e-08, + "loss": 0.1206, + "step": 20563 + }, + { + "epoch": 2.90369951990963, + "grad_norm": 2.77004274528071, + "learning_rate": 5.3997738520186194e-08, + "loss": 0.1112, + "step": 20564 + }, + { + "epoch": 2.903840722959616, + "grad_norm": 3.633839262611724, + "learning_rate": 5.3839645616041716e-08, + "loss": 0.1905, + "step": 20565 + }, + { + "epoch": 2.903981926009602, + "grad_norm": 2.5640380386202066, + "learning_rate": 5.3681783856367466e-08, + "loss": 0.1067, + "step": 20566 + }, + { + "epoch": 2.9041231290595877, + "grad_norm": 2.6716682409252104, + "learning_rate": 5.3524153244833844e-08, + "loss": 0.1169, + "step": 20567 + }, + { + "epoch": 2.9042643321095736, + "grad_norm": 3.4311303208707002, + "learning_rate": 5.336675378510348e-08, + "loss": 0.1808, + "step": 20568 + }, + { + "epoch": 2.9044055351595595, + "grad_norm": 3.0770370668437153, + "learning_rate": 5.3209585480834546e-08, + "loss": 0.1479, + "step": 20569 + }, + { + "epoch": 2.9045467382095453, + "grad_norm": 2.82957882467653, + "learning_rate": 5.305264833567747e-08, + "loss": 0.1381, + "step": 20570 + }, + { + "epoch": 2.9046879412595312, + "grad_norm": 2.9235539149470755, + "learning_rate": 5.289594235328266e-08, + "loss": 0.1247, + "step": 20571 + }, + { + "epoch": 2.904829144309517, + "grad_norm": 3.069535426301139, + "learning_rate": 5.273946753728942e-08, + "loss": 0.1348, + "step": 20572 + }, + { + "epoch": 2.904970347359503, + "grad_norm": 2.5431649632960496, + "learning_rate": 5.2583223891335963e-08, + "loss": 0.1154, + "step": 20573 + }, + { + "epoch": 2.905111550409489, + "grad_norm": 2.792071088708111, + "learning_rate": 5.2427211419051605e-08, + "loss": 0.1085, + "step": 20574 + }, + { + "epoch": 2.9052527534594748, + "grad_norm": 3.111142221769998, + "learning_rate": 5.227143012406344e-08, + "loss": 0.132, + "step": 20575 + }, + { + "epoch": 2.9053939565094606, + "grad_norm": 3.012555053401586, + "learning_rate": 5.2115880009990796e-08, + "loss": 0.1299, + "step": 20576 + }, + { + "epoch": 2.9055351595594465, + "grad_norm": 3.585818064677974, + "learning_rate": 5.1960561080448556e-08, + "loss": 0.1811, + "step": 20577 + }, + { + "epoch": 2.9056763626094324, + "grad_norm": 3.6598738957239303, + "learning_rate": 5.1805473339047173e-08, + "loss": 0.1452, + "step": 20578 + }, + { + "epoch": 2.9058175656594183, + "grad_norm": 3.190465222885676, + "learning_rate": 5.165061678939043e-08, + "loss": 0.1712, + "step": 20579 + }, + { + "epoch": 2.905958768709404, + "grad_norm": 4.6852118542538275, + "learning_rate": 5.1495991435076555e-08, + "loss": 0.2239, + "step": 20580 + }, + { + "epoch": 2.90609997175939, + "grad_norm": 2.5084334614923836, + "learning_rate": 5.134159727970045e-08, + "loss": 0.077, + "step": 20581 + }, + { + "epoch": 2.906241174809376, + "grad_norm": 3.7488561526009287, + "learning_rate": 5.118743432684814e-08, + "loss": 0.1459, + "step": 20582 + }, + { + "epoch": 2.906382377859362, + "grad_norm": 3.0030546666322824, + "learning_rate": 5.1033502580103424e-08, + "loss": 0.1444, + "step": 20583 + }, + { + "epoch": 2.9065235809093477, + "grad_norm": 3.1552555327796172, + "learning_rate": 5.0879802043043434e-08, + "loss": 0.1241, + "step": 20584 + }, + { + "epoch": 2.9066647839593336, + "grad_norm": 3.012337417866576, + "learning_rate": 5.0726332719240875e-08, + "loss": 0.1334, + "step": 20585 + }, + { + "epoch": 2.9068059870093195, + "grad_norm": 2.9748743749248723, + "learning_rate": 5.0573094612260676e-08, + "loss": 0.1298, + "step": 20586 + }, + { + "epoch": 2.9069471900593054, + "grad_norm": 3.933147051744183, + "learning_rate": 5.0420087725664424e-08, + "loss": 0.1597, + "step": 20587 + }, + { + "epoch": 2.9070883931092912, + "grad_norm": 2.659026973823948, + "learning_rate": 5.0267312063009275e-08, + "loss": 0.1261, + "step": 20588 + }, + { + "epoch": 2.907229596159277, + "grad_norm": 3.8984229382983395, + "learning_rate": 5.011476762784462e-08, + "loss": 0.1396, + "step": 20589 + }, + { + "epoch": 2.907370799209263, + "grad_norm": 3.445395458874534, + "learning_rate": 4.99624544237165e-08, + "loss": 0.1649, + "step": 20590 + }, + { + "epoch": 2.907512002259249, + "grad_norm": 3.8573817686669156, + "learning_rate": 4.9810372454163204e-08, + "loss": 0.1699, + "step": 20591 + }, + { + "epoch": 2.9076532053092348, + "grad_norm": 3.1669987064714795, + "learning_rate": 4.9658521722719676e-08, + "loss": 0.1478, + "step": 20592 + }, + { + "epoch": 2.9077944083592207, + "grad_norm": 3.3638835504309457, + "learning_rate": 4.950690223291532e-08, + "loss": 0.1473, + "step": 20593 + }, + { + "epoch": 2.9079356114092065, + "grad_norm": 2.448279382540172, + "learning_rate": 4.9355513988272874e-08, + "loss": 0.119, + "step": 20594 + }, + { + "epoch": 2.9080768144591924, + "grad_norm": 3.065077446080416, + "learning_rate": 4.920435699231063e-08, + "loss": 0.1366, + "step": 20595 + }, + { + "epoch": 2.9082180175091783, + "grad_norm": 3.0248104246976135, + "learning_rate": 4.905343124854245e-08, + "loss": 0.1252, + "step": 20596 + }, + { + "epoch": 2.908359220559164, + "grad_norm": 3.112156540386536, + "learning_rate": 4.890273676047441e-08, + "loss": 0.1782, + "step": 20597 + }, + { + "epoch": 2.90850042360915, + "grad_norm": 3.4431935218503615, + "learning_rate": 4.8752273531609276e-08, + "loss": 0.1335, + "step": 20598 + }, + { + "epoch": 2.908641626659136, + "grad_norm": 2.4756058760101745, + "learning_rate": 4.860204156544423e-08, + "loss": 0.1018, + "step": 20599 + }, + { + "epoch": 2.908782829709122, + "grad_norm": 3.024293720603294, + "learning_rate": 4.845204086546984e-08, + "loss": 0.1285, + "step": 20600 + }, + { + "epoch": 2.9089240327591077, + "grad_norm": 3.3369630983548104, + "learning_rate": 4.8302271435172185e-08, + "loss": 0.1658, + "step": 20601 + }, + { + "epoch": 2.9090652358090936, + "grad_norm": 3.189440499259207, + "learning_rate": 4.815273327803183e-08, + "loss": 0.128, + "step": 20602 + }, + { + "epoch": 2.9092064388590795, + "grad_norm": 4.185070068262172, + "learning_rate": 4.800342639752376e-08, + "loss": 0.1894, + "step": 20603 + }, + { + "epoch": 2.9093476419090654, + "grad_norm": 2.794209858605379, + "learning_rate": 4.785435079711853e-08, + "loss": 0.1342, + "step": 20604 + }, + { + "epoch": 2.9094888449590512, + "grad_norm": 3.0896557774257465, + "learning_rate": 4.770550648028005e-08, + "loss": 0.1591, + "step": 20605 + }, + { + "epoch": 2.909630048009037, + "grad_norm": 3.068451601135974, + "learning_rate": 4.7556893450466656e-08, + "loss": 0.1225, + "step": 20606 + }, + { + "epoch": 2.909771251059023, + "grad_norm": 3.143305517524094, + "learning_rate": 4.7408511711134476e-08, + "loss": 0.1341, + "step": 20607 + }, + { + "epoch": 2.909912454109009, + "grad_norm": 2.872643059985098, + "learning_rate": 4.7260361265729635e-08, + "loss": 0.1242, + "step": 20608 + }, + { + "epoch": 2.9100536571589948, + "grad_norm": 3.3369244685605635, + "learning_rate": 4.7112442117696056e-08, + "loss": 0.1608, + "step": 20609 + }, + { + "epoch": 2.9101948602089807, + "grad_norm": 2.542858835176921, + "learning_rate": 4.696475427047098e-08, + "loss": 0.1075, + "step": 20610 + }, + { + "epoch": 2.9103360632589665, + "grad_norm": 3.074874785051404, + "learning_rate": 4.681729772748611e-08, + "loss": 0.116, + "step": 20611 + }, + { + "epoch": 2.9104772663089524, + "grad_norm": 2.568794795805062, + "learning_rate": 4.66700724921687e-08, + "loss": 0.1251, + "step": 20612 + }, + { + "epoch": 2.9106184693589383, + "grad_norm": 3.548891185444923, + "learning_rate": 4.6523078567941584e-08, + "loss": 0.1394, + "step": 20613 + }, + { + "epoch": 2.910759672408924, + "grad_norm": 3.4916704760749617, + "learning_rate": 4.6376315958218676e-08, + "loss": 0.179, + "step": 20614 + }, + { + "epoch": 2.91090087545891, + "grad_norm": 3.079411566346087, + "learning_rate": 4.622978466641059e-08, + "loss": 0.1381, + "step": 20615 + }, + { + "epoch": 2.911042078508896, + "grad_norm": 4.009749453657712, + "learning_rate": 4.608348469592461e-08, + "loss": 0.1848, + "step": 20616 + }, + { + "epoch": 2.9111832815588814, + "grad_norm": 2.5861744635033, + "learning_rate": 4.593741605016022e-08, + "loss": 0.1272, + "step": 20617 + }, + { + "epoch": 2.9113244846088673, + "grad_norm": 3.8874165149774735, + "learning_rate": 4.5791578732510276e-08, + "loss": 0.1574, + "step": 20618 + }, + { + "epoch": 2.911465687658853, + "grad_norm": 3.7686938576421705, + "learning_rate": 4.5645972746366504e-08, + "loss": 0.2157, + "step": 20619 + }, + { + "epoch": 2.911606890708839, + "grad_norm": 2.4213737039995946, + "learning_rate": 4.5500598095110645e-08, + "loss": 0.1254, + "step": 20620 + }, + { + "epoch": 2.911748093758825, + "grad_norm": 3.371569193850073, + "learning_rate": 4.535545478212111e-08, + "loss": 0.1302, + "step": 20621 + }, + { + "epoch": 2.911889296808811, + "grad_norm": 4.105715252389308, + "learning_rate": 4.5210542810771864e-08, + "loss": 0.177, + "step": 20622 + }, + { + "epoch": 2.9120304998587967, + "grad_norm": 3.7299333545776747, + "learning_rate": 4.5065862184431316e-08, + "loss": 0.1319, + "step": 20623 + }, + { + "epoch": 2.9121717029087826, + "grad_norm": 3.946706279814059, + "learning_rate": 4.492141290646124e-08, + "loss": 0.1511, + "step": 20624 + }, + { + "epoch": 2.9123129059587685, + "grad_norm": 3.470777075526393, + "learning_rate": 4.477719498021782e-08, + "loss": 0.1478, + "step": 20625 + }, + { + "epoch": 2.9124541090087543, + "grad_norm": 2.9124860624509914, + "learning_rate": 4.463320840905283e-08, + "loss": 0.1345, + "step": 20626 + }, + { + "epoch": 2.9125953120587402, + "grad_norm": 2.888242426750076, + "learning_rate": 4.448945319631248e-08, + "loss": 0.1295, + "step": 20627 + }, + { + "epoch": 2.912736515108726, + "grad_norm": 3.668216460436277, + "learning_rate": 4.4345929345337436e-08, + "loss": 0.1344, + "step": 20628 + }, + { + "epoch": 2.912877718158712, + "grad_norm": 3.0647255760013583, + "learning_rate": 4.420263685946391e-08, + "loss": 0.15, + "step": 20629 + }, + { + "epoch": 2.913018921208698, + "grad_norm": 3.4077466793394797, + "learning_rate": 4.405957574202147e-08, + "loss": 0.1246, + "step": 20630 + }, + { + "epoch": 2.9131601242586838, + "grad_norm": 3.1885507043893395, + "learning_rate": 4.391674599633522e-08, + "loss": 0.1644, + "step": 20631 + }, + { + "epoch": 2.9133013273086696, + "grad_norm": 3.65860796835861, + "learning_rate": 4.377414762572474e-08, + "loss": 0.1784, + "step": 20632 + }, + { + "epoch": 2.9134425303586555, + "grad_norm": 3.069461625502871, + "learning_rate": 4.363178063350182e-08, + "loss": 0.1368, + "step": 20633 + }, + { + "epoch": 2.9135837334086414, + "grad_norm": 2.7567457953426335, + "learning_rate": 4.348964502297714e-08, + "loss": 0.1208, + "step": 20634 + }, + { + "epoch": 2.9137249364586273, + "grad_norm": 2.7005781743177604, + "learning_rate": 4.334774079745363e-08, + "loss": 0.1408, + "step": 20635 + }, + { + "epoch": 2.913866139508613, + "grad_norm": 3.0146244281751713, + "learning_rate": 4.320606796022864e-08, + "loss": 0.1615, + "step": 20636 + }, + { + "epoch": 2.914007342558599, + "grad_norm": 3.4123430480684935, + "learning_rate": 4.306462651459398e-08, + "loss": 0.1468, + "step": 20637 + }, + { + "epoch": 2.914148545608585, + "grad_norm": 2.702399196490913, + "learning_rate": 4.292341646383813e-08, + "loss": 0.1259, + "step": 20638 + }, + { + "epoch": 2.914289748658571, + "grad_norm": 3.548458024606317, + "learning_rate": 4.278243781124069e-08, + "loss": 0.1612, + "step": 20639 + }, + { + "epoch": 2.9144309517085567, + "grad_norm": 3.1274009864496817, + "learning_rate": 4.264169056008016e-08, + "loss": 0.1338, + "step": 20640 + }, + { + "epoch": 2.9145721547585426, + "grad_norm": 3.8548837279296135, + "learning_rate": 4.250117471362614e-08, + "loss": 0.1303, + "step": 20641 + }, + { + "epoch": 2.9147133578085285, + "grad_norm": 3.6116792849151427, + "learning_rate": 4.23608902751449e-08, + "loss": 0.1382, + "step": 20642 + }, + { + "epoch": 2.9148545608585144, + "grad_norm": 2.9574634916319256, + "learning_rate": 4.222083724789605e-08, + "loss": 0.1178, + "step": 20643 + }, + { + "epoch": 2.9149957639085002, + "grad_norm": 2.909926077374777, + "learning_rate": 4.208101563513367e-08, + "loss": 0.1305, + "step": 20644 + }, + { + "epoch": 2.915136966958486, + "grad_norm": 3.3274497252526922, + "learning_rate": 4.1941425440108484e-08, + "loss": 0.1734, + "step": 20645 + }, + { + "epoch": 2.915278170008472, + "grad_norm": 3.2982580975748683, + "learning_rate": 4.1802066666064564e-08, + "loss": 0.1617, + "step": 20646 + }, + { + "epoch": 2.915419373058458, + "grad_norm": 2.7671825821532794, + "learning_rate": 4.166293931624044e-08, + "loss": 0.1181, + "step": 20647 + }, + { + "epoch": 2.9155605761084438, + "grad_norm": 2.5012952267238893, + "learning_rate": 4.152404339386795e-08, + "loss": 0.1176, + "step": 20648 + }, + { + "epoch": 2.9157017791584297, + "grad_norm": 2.995845114732307, + "learning_rate": 4.1385378902175644e-08, + "loss": 0.1525, + "step": 20649 + }, + { + "epoch": 2.9158429822084155, + "grad_norm": 3.1210798974708114, + "learning_rate": 4.1246945844387595e-08, + "loss": 0.1441, + "step": 20650 + }, + { + "epoch": 2.9159841852584014, + "grad_norm": 3.5658370907133, + "learning_rate": 4.110874422371902e-08, + "loss": 0.1688, + "step": 20651 + }, + { + "epoch": 2.9161253883083873, + "grad_norm": 3.014379952635346, + "learning_rate": 4.097077404338179e-08, + "loss": 0.138, + "step": 20652 + }, + { + "epoch": 2.916266591358373, + "grad_norm": 2.8224202474785347, + "learning_rate": 4.083303530658334e-08, + "loss": 0.1542, + "step": 20653 + }, + { + "epoch": 2.916407794408359, + "grad_norm": 4.142879778442981, + "learning_rate": 4.069552801652443e-08, + "loss": 0.1627, + "step": 20654 + }, + { + "epoch": 2.916548997458345, + "grad_norm": 3.2803040965345387, + "learning_rate": 4.0558252176399196e-08, + "loss": 0.1616, + "step": 20655 + }, + { + "epoch": 2.916690200508331, + "grad_norm": 3.0218104968711583, + "learning_rate": 4.042120778939951e-08, + "loss": 0.1472, + "step": 20656 + }, + { + "epoch": 2.9168314035583167, + "grad_norm": 2.811577288548532, + "learning_rate": 4.0284394858710605e-08, + "loss": 0.1463, + "step": 20657 + }, + { + "epoch": 2.9169726066083026, + "grad_norm": 4.012848255537006, + "learning_rate": 4.014781338751106e-08, + "loss": 0.1628, + "step": 20658 + }, + { + "epoch": 2.9171138096582885, + "grad_norm": 3.4371211572305826, + "learning_rate": 4.001146337897388e-08, + "loss": 0.1498, + "step": 20659 + }, + { + "epoch": 2.9172550127082744, + "grad_norm": 2.791681621900621, + "learning_rate": 3.987534483626987e-08, + "loss": 0.127, + "step": 20660 + }, + { + "epoch": 2.9173962157582602, + "grad_norm": 2.476333091612153, + "learning_rate": 3.973945776256094e-08, + "loss": 0.1118, + "step": 20661 + }, + { + "epoch": 2.917537418808246, + "grad_norm": 4.105454122562266, + "learning_rate": 3.9603802161005675e-08, + "loss": 0.1472, + "step": 20662 + }, + { + "epoch": 2.917678621858232, + "grad_norm": 3.9702008244447806, + "learning_rate": 3.9468378034756006e-08, + "loss": 0.1659, + "step": 20663 + }, + { + "epoch": 2.917819824908218, + "grad_norm": 2.6704580893739545, + "learning_rate": 3.933318538695941e-08, + "loss": 0.1313, + "step": 20664 + }, + { + "epoch": 2.917961027958204, + "grad_norm": 3.235185417130201, + "learning_rate": 3.919822422075892e-08, + "loss": 0.1334, + "step": 20665 + }, + { + "epoch": 2.9181022310081897, + "grad_norm": 2.8602905402509773, + "learning_rate": 3.906349453928981e-08, + "loss": 0.1215, + "step": 20666 + }, + { + "epoch": 2.9182434340581755, + "grad_norm": 2.910720535632143, + "learning_rate": 3.892899634568292e-08, + "loss": 0.1148, + "step": 20667 + }, + { + "epoch": 2.9183846371081614, + "grad_norm": 3.275019393866095, + "learning_rate": 3.8794729643064635e-08, + "loss": 0.1658, + "step": 20668 + }, + { + "epoch": 2.9185258401581473, + "grad_norm": 2.7619975483667036, + "learning_rate": 3.866069443455467e-08, + "loss": 0.1083, + "step": 20669 + }, + { + "epoch": 2.918667043208133, + "grad_norm": 3.665868116531876, + "learning_rate": 3.852689072326832e-08, + "loss": 0.2055, + "step": 20670 + }, + { + "epoch": 2.918808246258119, + "grad_norm": 3.6270764814892793, + "learning_rate": 3.839331851231421e-08, + "loss": 0.1548, + "step": 20671 + }, + { + "epoch": 2.918949449308105, + "grad_norm": 3.231434037687821, + "learning_rate": 3.8259977804797624e-08, + "loss": 0.1238, + "step": 20672 + }, + { + "epoch": 2.919090652358091, + "grad_norm": 3.0116236623376227, + "learning_rate": 3.81268686038172e-08, + "loss": 0.1157, + "step": 20673 + }, + { + "epoch": 2.9192318554080767, + "grad_norm": 3.2034714968187816, + "learning_rate": 3.799399091246603e-08, + "loss": 0.1764, + "step": 20674 + }, + { + "epoch": 2.9193730584580626, + "grad_norm": 2.1730914696243318, + "learning_rate": 3.786134473383385e-08, + "loss": 0.0982, + "step": 20675 + }, + { + "epoch": 2.9195142615080485, + "grad_norm": 2.7394912855776874, + "learning_rate": 3.772893007100042e-08, + "loss": 0.1043, + "step": 20676 + }, + { + "epoch": 2.9196554645580344, + "grad_norm": 2.9064309918986333, + "learning_rate": 3.759674692704329e-08, + "loss": 0.1529, + "step": 20677 + }, + { + "epoch": 2.9197966676080203, + "grad_norm": 3.1629995701856384, + "learning_rate": 3.7464795305036664e-08, + "loss": 0.1466, + "step": 20678 + }, + { + "epoch": 2.919937870658006, + "grad_norm": 3.6583183622244233, + "learning_rate": 3.733307520804475e-08, + "loss": 0.1486, + "step": 20679 + }, + { + "epoch": 2.920079073707992, + "grad_norm": 2.7883636266396845, + "learning_rate": 3.720158663913065e-08, + "loss": 0.1476, + "step": 20680 + }, + { + "epoch": 2.920220276757978, + "grad_norm": 3.678963589093205, + "learning_rate": 3.7070329601348595e-08, + "loss": 0.1674, + "step": 20681 + }, + { + "epoch": 2.920361479807964, + "grad_norm": 3.175279569782027, + "learning_rate": 3.693930409774948e-08, + "loss": 0.1507, + "step": 20682 + }, + { + "epoch": 2.9205026828579497, + "grad_norm": 3.5342030900986035, + "learning_rate": 3.680851013137754e-08, + "loss": 0.1295, + "step": 20683 + }, + { + "epoch": 2.9206438859079356, + "grad_norm": 2.8709879688393145, + "learning_rate": 3.6677947705273664e-08, + "loss": 0.1314, + "step": 20684 + }, + { + "epoch": 2.9207850889579214, + "grad_norm": 3.197324100402615, + "learning_rate": 3.6547616822472096e-08, + "loss": 0.1309, + "step": 20685 + }, + { + "epoch": 2.9209262920079073, + "grad_norm": 2.9835732558840347, + "learning_rate": 3.641751748600042e-08, + "loss": 0.1314, + "step": 20686 + }, + { + "epoch": 2.921067495057893, + "grad_norm": 2.5876950558789558, + "learning_rate": 3.628764969888288e-08, + "loss": 0.1126, + "step": 20687 + }, + { + "epoch": 2.921208698107879, + "grad_norm": 3.0394205686051556, + "learning_rate": 3.6158013464137056e-08, + "loss": 0.1478, + "step": 20688 + }, + { + "epoch": 2.921349901157865, + "grad_norm": 3.071610915454667, + "learning_rate": 3.60286087847761e-08, + "loss": 0.1247, + "step": 20689 + }, + { + "epoch": 2.921491104207851, + "grad_norm": 3.00361193045679, + "learning_rate": 3.589943566380649e-08, + "loss": 0.1513, + "step": 20690 + }, + { + "epoch": 2.9216323072578367, + "grad_norm": 3.0007849268330937, + "learning_rate": 3.577049410423139e-08, + "loss": 0.1575, + "step": 20691 + }, + { + "epoch": 2.9217735103078226, + "grad_norm": 3.1187967551886517, + "learning_rate": 3.5641784109047286e-08, + "loss": 0.1358, + "step": 20692 + }, + { + "epoch": 2.9219147133578085, + "grad_norm": 3.054698407647, + "learning_rate": 3.5513305681244005e-08, + "loss": 0.136, + "step": 20693 + }, + { + "epoch": 2.9220559164077944, + "grad_norm": 2.915835599753541, + "learning_rate": 3.538505882380916e-08, + "loss": 0.1364, + "step": 20694 + }, + { + "epoch": 2.9221971194577803, + "grad_norm": 3.1620959998214113, + "learning_rate": 3.525704353972037e-08, + "loss": 0.1336, + "step": 20695 + }, + { + "epoch": 2.922338322507766, + "grad_norm": 3.0924892481007427, + "learning_rate": 3.5129259831956366e-08, + "loss": 0.1444, + "step": 20696 + }, + { + "epoch": 2.922479525557752, + "grad_norm": 3.470002681050187, + "learning_rate": 3.5001707703483654e-08, + "loss": 0.1745, + "step": 20697 + }, + { + "epoch": 2.922620728607738, + "grad_norm": 2.9846891055410776, + "learning_rate": 3.4874387157268766e-08, + "loss": 0.1446, + "step": 20698 + }, + { + "epoch": 2.922761931657724, + "grad_norm": 2.7678246979430225, + "learning_rate": 3.474729819626821e-08, + "loss": 0.1088, + "step": 20699 + }, + { + "epoch": 2.9229031347077097, + "grad_norm": 3.1710246294727544, + "learning_rate": 3.4620440823438517e-08, + "loss": 0.1143, + "step": 20700 + }, + { + "epoch": 2.9230443377576956, + "grad_norm": 3.6941462057199694, + "learning_rate": 3.449381504172511e-08, + "loss": 0.1398, + "step": 20701 + }, + { + "epoch": 2.9231855408076814, + "grad_norm": 3.855436662236348, + "learning_rate": 3.436742085407119e-08, + "loss": 0.1729, + "step": 20702 + }, + { + "epoch": 2.9233267438576673, + "grad_norm": 3.4460999537157244, + "learning_rate": 3.42412582634144e-08, + "loss": 0.1375, + "step": 20703 + }, + { + "epoch": 2.923467946907653, + "grad_norm": 2.988793469569093, + "learning_rate": 3.411532727268796e-08, + "loss": 0.1449, + "step": 20704 + }, + { + "epoch": 2.923609149957639, + "grad_norm": 2.565248733656489, + "learning_rate": 3.39896278848173e-08, + "loss": 0.1066, + "step": 20705 + }, + { + "epoch": 2.923750353007625, + "grad_norm": 3.3259557448948174, + "learning_rate": 3.386416010272342e-08, + "loss": 0.1599, + "step": 20706 + }, + { + "epoch": 2.923891556057611, + "grad_norm": 3.325760222419784, + "learning_rate": 3.373892392932177e-08, + "loss": 0.158, + "step": 20707 + }, + { + "epoch": 2.9240327591075967, + "grad_norm": 3.4260832924187716, + "learning_rate": 3.361391936752445e-08, + "loss": 0.1645, + "step": 20708 + }, + { + "epoch": 2.9241739621575826, + "grad_norm": 3.041494824850197, + "learning_rate": 3.348914642023471e-08, + "loss": 0.1202, + "step": 20709 + }, + { + "epoch": 2.9243151652075685, + "grad_norm": 3.5002303042328693, + "learning_rate": 3.336460509035355e-08, + "loss": 0.1636, + "step": 20710 + }, + { + "epoch": 2.9244563682575544, + "grad_norm": 2.6280002463912866, + "learning_rate": 3.324029538077422e-08, + "loss": 0.1082, + "step": 20711 + }, + { + "epoch": 2.9245975713075403, + "grad_norm": 2.8471540734428586, + "learning_rate": 3.311621729438552e-08, + "loss": 0.1349, + "step": 20712 + }, + { + "epoch": 2.924738774357526, + "grad_norm": 2.836371985922127, + "learning_rate": 3.299237083407292e-08, + "loss": 0.1094, + "step": 20713 + }, + { + "epoch": 2.924879977407512, + "grad_norm": 2.7592170590034395, + "learning_rate": 3.2868756002712997e-08, + "loss": 0.1278, + "step": 20714 + }, + { + "epoch": 2.925021180457498, + "grad_norm": 3.4417930735856674, + "learning_rate": 3.274537280317791e-08, + "loss": 0.1548, + "step": 20715 + }, + { + "epoch": 2.925162383507484, + "grad_norm": 2.7308953201801556, + "learning_rate": 3.262222123833647e-08, + "loss": 0.1038, + "step": 20716 + }, + { + "epoch": 2.9253035865574697, + "grad_norm": 2.7316602019666765, + "learning_rate": 3.249930131105083e-08, + "loss": 0.1113, + "step": 20717 + }, + { + "epoch": 2.9254447896074556, + "grad_norm": 2.214370781847232, + "learning_rate": 3.2376613024175384e-08, + "loss": 0.0972, + "step": 20718 + }, + { + "epoch": 2.9255859926574415, + "grad_norm": 3.68726946261553, + "learning_rate": 3.2254156380562284e-08, + "loss": 0.1657, + "step": 20719 + }, + { + "epoch": 2.9257271957074273, + "grad_norm": 3.206053138256326, + "learning_rate": 3.2131931383059256e-08, + "loss": 0.1352, + "step": 20720 + }, + { + "epoch": 2.9258683987574132, + "grad_norm": 2.7041246542698967, + "learning_rate": 3.200993803450514e-08, + "loss": 0.1212, + "step": 20721 + }, + { + "epoch": 2.926009601807399, + "grad_norm": 4.0396321016783325, + "learning_rate": 3.1888176337734334e-08, + "loss": 0.1911, + "step": 20722 + }, + { + "epoch": 2.926150804857385, + "grad_norm": 3.6614859253957563, + "learning_rate": 3.176664629557902e-08, + "loss": 0.1632, + "step": 20723 + }, + { + "epoch": 2.926292007907371, + "grad_norm": 3.2530270875938125, + "learning_rate": 3.1645347910860266e-08, + "loss": 0.1532, + "step": 20724 + }, + { + "epoch": 2.9264332109573568, + "grad_norm": 3.9304211142910948, + "learning_rate": 3.152428118639916e-08, + "loss": 0.1747, + "step": 20725 + }, + { + "epoch": 2.9265744140073426, + "grad_norm": 3.9863289913120044, + "learning_rate": 3.140344612500901e-08, + "loss": 0.1595, + "step": 20726 + }, + { + "epoch": 2.9267156170573285, + "grad_norm": 3.4857989278888613, + "learning_rate": 3.1282842729497556e-08, + "loss": 0.1155, + "step": 20727 + }, + { + "epoch": 2.9268568201073144, + "grad_norm": 2.8658592154206417, + "learning_rate": 3.1162471002668113e-08, + "loss": 0.1171, + "step": 20728 + }, + { + "epoch": 2.9269980231573003, + "grad_norm": 4.260285034632423, + "learning_rate": 3.1042330947316235e-08, + "loss": 0.2423, + "step": 20729 + }, + { + "epoch": 2.927139226207286, + "grad_norm": 3.131936961444449, + "learning_rate": 3.092242256623634e-08, + "loss": 0.1283, + "step": 20730 + }, + { + "epoch": 2.927280429257272, + "grad_norm": 3.5823427070378586, + "learning_rate": 3.080274586221399e-08, + "loss": 0.1457, + "step": 20731 + }, + { + "epoch": 2.927421632307258, + "grad_norm": 3.048283654211916, + "learning_rate": 3.0683300838030285e-08, + "loss": 0.114, + "step": 20732 + }, + { + "epoch": 2.927562835357244, + "grad_norm": 3.5571135335614965, + "learning_rate": 3.056408749646189e-08, + "loss": 0.1377, + "step": 20733 + }, + { + "epoch": 2.9277040384072297, + "grad_norm": 3.338133328715287, + "learning_rate": 3.044510584027771e-08, + "loss": 0.1666, + "step": 20734 + }, + { + "epoch": 2.9278452414572156, + "grad_norm": 4.745238509575811, + "learning_rate": 3.032635587224442e-08, + "loss": 0.1735, + "step": 20735 + }, + { + "epoch": 2.9279864445072015, + "grad_norm": 3.770963985580543, + "learning_rate": 3.02078375951198e-08, + "loss": 0.1478, + "step": 20736 + }, + { + "epoch": 2.9281276475571874, + "grad_norm": 3.037137935565298, + "learning_rate": 3.008955101166056e-08, + "loss": 0.1166, + "step": 20737 + }, + { + "epoch": 2.9282688506071732, + "grad_norm": 2.5014160603580553, + "learning_rate": 2.997149612461447e-08, + "loss": 0.0992, + "step": 20738 + }, + { + "epoch": 2.928410053657159, + "grad_norm": 3.9436648003834134, + "learning_rate": 2.985367293672492e-08, + "loss": 0.1892, + "step": 20739 + }, + { + "epoch": 2.928551256707145, + "grad_norm": 4.399151555603122, + "learning_rate": 2.9736081450730813e-08, + "loss": 0.2066, + "step": 20740 + }, + { + "epoch": 2.928692459757131, + "grad_norm": 3.2549887831564885, + "learning_rate": 2.9618721669363302e-08, + "loss": 0.1314, + "step": 20741 + }, + { + "epoch": 2.9288336628071168, + "grad_norm": 2.9503089534110805, + "learning_rate": 2.950159359535132e-08, + "loss": 0.1133, + "step": 20742 + }, + { + "epoch": 2.9289748658571026, + "grad_norm": 2.9893571070488236, + "learning_rate": 2.9384697231416016e-08, + "loss": 0.1379, + "step": 20743 + }, + { + "epoch": 2.9291160689070885, + "grad_norm": 3.9061673127968777, + "learning_rate": 2.9268032580275218e-08, + "loss": 0.175, + "step": 20744 + }, + { + "epoch": 2.9292572719570744, + "grad_norm": 2.9599605293832982, + "learning_rate": 2.9151599644638984e-08, + "loss": 0.1169, + "step": 20745 + }, + { + "epoch": 2.9293984750070603, + "grad_norm": 2.8603940390651803, + "learning_rate": 2.9035398427212925e-08, + "loss": 0.1288, + "step": 20746 + }, + { + "epoch": 2.929539678057046, + "grad_norm": 3.8325177411621754, + "learning_rate": 2.8919428930698213e-08, + "loss": 0.1478, + "step": 20747 + }, + { + "epoch": 2.929680881107032, + "grad_norm": 3.0140631099091215, + "learning_rate": 2.880369115778936e-08, + "loss": 0.1334, + "step": 20748 + }, + { + "epoch": 2.929822084157018, + "grad_norm": 2.940696012982415, + "learning_rate": 2.8688185111177546e-08, + "loss": 0.1175, + "step": 20749 + }, + { + "epoch": 2.929963287207004, + "grad_norm": 3.149891532154961, + "learning_rate": 2.8572910793546183e-08, + "loss": 0.1458, + "step": 20750 + }, + { + "epoch": 2.9301044902569897, + "grad_norm": 3.4771600330852497, + "learning_rate": 2.8457868207573126e-08, + "loss": 0.1097, + "step": 20751 + }, + { + "epoch": 2.9302456933069756, + "grad_norm": 3.4219733941442403, + "learning_rate": 2.8343057355932902e-08, + "loss": 0.1362, + "step": 20752 + }, + { + "epoch": 2.9303868963569615, + "grad_norm": 3.518276664395193, + "learning_rate": 2.8228478241294487e-08, + "loss": 0.1813, + "step": 20753 + }, + { + "epoch": 2.9305280994069474, + "grad_norm": 3.2968734481728816, + "learning_rate": 2.8114130866319088e-08, + "loss": 0.1406, + "step": 20754 + }, + { + "epoch": 2.9306693024569332, + "grad_norm": 2.463559615728865, + "learning_rate": 2.800001523366458e-08, + "loss": 0.1286, + "step": 20755 + }, + { + "epoch": 2.930810505506919, + "grad_norm": 3.3645537975815065, + "learning_rate": 2.788613134598328e-08, + "loss": 0.1299, + "step": 20756 + }, + { + "epoch": 2.930951708556905, + "grad_norm": 3.7870126863352565, + "learning_rate": 2.7772479205921964e-08, + "loss": 0.1905, + "step": 20757 + }, + { + "epoch": 2.931092911606891, + "grad_norm": 2.7123416693710003, + "learning_rate": 2.7659058816121855e-08, + "loss": 0.1432, + "step": 20758 + }, + { + "epoch": 2.9312341146568768, + "grad_norm": 2.8392133576822967, + "learning_rate": 2.7545870179217503e-08, + "loss": 0.1418, + "step": 20759 + }, + { + "epoch": 2.9313753177068627, + "grad_norm": 3.318621013788601, + "learning_rate": 2.7432913297841256e-08, + "loss": 0.1475, + "step": 20760 + }, + { + "epoch": 2.9315165207568485, + "grad_norm": 3.605680501703827, + "learning_rate": 2.732018817461657e-08, + "loss": 0.192, + "step": 20761 + }, + { + "epoch": 2.9316577238068344, + "grad_norm": 3.172437720371396, + "learning_rate": 2.7207694812164677e-08, + "loss": 0.1274, + "step": 20762 + }, + { + "epoch": 2.9317989268568203, + "grad_norm": 3.6957012736735595, + "learning_rate": 2.7095433213097933e-08, + "loss": 0.1262, + "step": 20763 + }, + { + "epoch": 2.931940129906806, + "grad_norm": 3.5779722452478215, + "learning_rate": 2.6983403380026474e-08, + "loss": 0.1168, + "step": 20764 + }, + { + "epoch": 2.932081332956792, + "grad_norm": 3.3382984121007704, + "learning_rate": 2.6871605315554882e-08, + "loss": 0.1689, + "step": 20765 + }, + { + "epoch": 2.932222536006778, + "grad_norm": 2.8566557995709085, + "learning_rate": 2.676003902227886e-08, + "loss": 0.1383, + "step": 20766 + }, + { + "epoch": 2.932363739056764, + "grad_norm": 3.219517369005276, + "learning_rate": 2.6648704502792998e-08, + "loss": 0.1364, + "step": 20767 + }, + { + "epoch": 2.9325049421067497, + "grad_norm": 5.2435036441773715, + "learning_rate": 2.6537601759684118e-08, + "loss": 0.2208, + "step": 20768 + }, + { + "epoch": 2.9326461451567356, + "grad_norm": 3.254076522943835, + "learning_rate": 2.6426730795534594e-08, + "loss": 0.1591, + "step": 20769 + }, + { + "epoch": 2.9327873482067215, + "grad_norm": 2.616383097656784, + "learning_rate": 2.6316091612920146e-08, + "loss": 0.1197, + "step": 20770 + }, + { + "epoch": 2.9329285512567074, + "grad_norm": 3.4405313979722667, + "learning_rate": 2.6205684214412052e-08, + "loss": 0.158, + "step": 20771 + }, + { + "epoch": 2.9330697543066933, + "grad_norm": 2.7847378574641675, + "learning_rate": 2.6095508602577147e-08, + "loss": 0.0951, + "step": 20772 + }, + { + "epoch": 2.933210957356679, + "grad_norm": 2.710583778001825, + "learning_rate": 2.5985564779974492e-08, + "loss": 0.1454, + "step": 20773 + }, + { + "epoch": 2.933352160406665, + "grad_norm": 3.188162854134947, + "learning_rate": 2.5875852749160936e-08, + "loss": 0.1725, + "step": 20774 + }, + { + "epoch": 2.933493363456651, + "grad_norm": 3.1134741308208196, + "learning_rate": 2.5766372512684436e-08, + "loss": 0.1282, + "step": 20775 + }, + { + "epoch": 2.933634566506637, + "grad_norm": 3.0486279150946567, + "learning_rate": 2.5657124073089622e-08, + "loss": 0.1303, + "step": 20776 + }, + { + "epoch": 2.9337757695566227, + "grad_norm": 2.984271559681901, + "learning_rate": 2.554810743291558e-08, + "loss": 0.124, + "step": 20777 + }, + { + "epoch": 2.9339169726066086, + "grad_norm": 2.591107220173452, + "learning_rate": 2.5439322594696946e-08, + "loss": 0.1023, + "step": 20778 + }, + { + "epoch": 2.9340581756565944, + "grad_norm": 2.7031621622279283, + "learning_rate": 2.5330769560959477e-08, + "loss": 0.1113, + "step": 20779 + }, + { + "epoch": 2.9341993787065803, + "grad_norm": 3.952708399348134, + "learning_rate": 2.5222448334227822e-08, + "loss": 0.1727, + "step": 20780 + }, + { + "epoch": 2.934340581756566, + "grad_norm": 3.926308624944096, + "learning_rate": 2.5114358917018857e-08, + "loss": 0.1761, + "step": 20781 + }, + { + "epoch": 2.934481784806552, + "grad_norm": 3.822256228825777, + "learning_rate": 2.50065013118439e-08, + "loss": 0.1519, + "step": 20782 + }, + { + "epoch": 2.934622987856538, + "grad_norm": 3.3908326680839, + "learning_rate": 2.4898875521209842e-08, + "loss": 0.1205, + "step": 20783 + }, + { + "epoch": 2.934764190906524, + "grad_norm": 3.1866167394019715, + "learning_rate": 2.4791481547619123e-08, + "loss": 0.1505, + "step": 20784 + }, + { + "epoch": 2.9349053939565097, + "grad_norm": 2.5914952553931925, + "learning_rate": 2.4684319393565303e-08, + "loss": 0.0917, + "step": 20785 + }, + { + "epoch": 2.9350465970064956, + "grad_norm": 2.772943975611951, + "learning_rate": 2.4577389061539724e-08, + "loss": 0.1345, + "step": 20786 + }, + { + "epoch": 2.935187800056481, + "grad_norm": 3.294901325673289, + "learning_rate": 2.4470690554028175e-08, + "loss": 0.1438, + "step": 20787 + }, + { + "epoch": 2.935329003106467, + "grad_norm": 3.6826856904299894, + "learning_rate": 2.4364223873509785e-08, + "loss": 0.1319, + "step": 20788 + }, + { + "epoch": 2.935470206156453, + "grad_norm": 3.178624285318878, + "learning_rate": 2.425798902245924e-08, + "loss": 0.139, + "step": 20789 + }, + { + "epoch": 2.9356114092064387, + "grad_norm": 2.268061069568154, + "learning_rate": 2.4151986003343453e-08, + "loss": 0.1087, + "step": 20790 + }, + { + "epoch": 2.9357526122564246, + "grad_norm": 2.3688306421038874, + "learning_rate": 2.4046214818628234e-08, + "loss": 0.0939, + "step": 20791 + }, + { + "epoch": 2.9358938153064105, + "grad_norm": 2.891617853686542, + "learning_rate": 2.39406754707705e-08, + "loss": 0.1202, + "step": 20792 + }, + { + "epoch": 2.9360350183563964, + "grad_norm": 2.5410854851905125, + "learning_rate": 2.3835367962222744e-08, + "loss": 0.1177, + "step": 20793 + }, + { + "epoch": 2.9361762214063822, + "grad_norm": 3.217081686721352, + "learning_rate": 2.3730292295433e-08, + "loss": 0.1476, + "step": 20794 + }, + { + "epoch": 2.936317424456368, + "grad_norm": 3.928461664318603, + "learning_rate": 2.362544847284265e-08, + "loss": 0.14, + "step": 20795 + }, + { + "epoch": 2.936458627506354, + "grad_norm": 3.857188635862983, + "learning_rate": 2.352083649688863e-08, + "loss": 0.124, + "step": 20796 + }, + { + "epoch": 2.93659983055634, + "grad_norm": 3.582835451304525, + "learning_rate": 2.3416456370002337e-08, + "loss": 0.1484, + "step": 20797 + }, + { + "epoch": 2.9367410336063258, + "grad_norm": 2.758273603749329, + "learning_rate": 2.3312308094607382e-08, + "loss": 0.1182, + "step": 20798 + }, + { + "epoch": 2.9368822366563117, + "grad_norm": 3.641176472208826, + "learning_rate": 2.3208391673127383e-08, + "loss": 0.1748, + "step": 20799 + }, + { + "epoch": 2.9370234397062975, + "grad_norm": 3.536891627822034, + "learning_rate": 2.3104707107974857e-08, + "loss": 0.1521, + "step": 20800 + }, + { + "epoch": 2.9371646427562834, + "grad_norm": 3.444384909863173, + "learning_rate": 2.30012544015612e-08, + "loss": 0.164, + "step": 20801 + }, + { + "epoch": 2.9373058458062693, + "grad_norm": 3.181454131820245, + "learning_rate": 2.2898033556288946e-08, + "loss": 0.1446, + "step": 20802 + }, + { + "epoch": 2.937447048856255, + "grad_norm": 2.9297525219099816, + "learning_rate": 2.279504457455728e-08, + "loss": 0.1714, + "step": 20803 + }, + { + "epoch": 2.937588251906241, + "grad_norm": 3.0786078644924983, + "learning_rate": 2.2692287458760953e-08, + "loss": 0.1504, + "step": 20804 + }, + { + "epoch": 2.937729454956227, + "grad_norm": 2.8226802411412355, + "learning_rate": 2.258976221128695e-08, + "loss": 0.1028, + "step": 20805 + }, + { + "epoch": 2.937870658006213, + "grad_norm": 3.1255491047069297, + "learning_rate": 2.248746883451669e-08, + "loss": 0.1193, + "step": 20806 + }, + { + "epoch": 2.9380118610561987, + "grad_norm": 3.3759019348553623, + "learning_rate": 2.238540733082939e-08, + "loss": 0.1583, + "step": 20807 + }, + { + "epoch": 2.9381530641061846, + "grad_norm": 3.1394799695288085, + "learning_rate": 2.2283577702596482e-08, + "loss": 0.15, + "step": 20808 + }, + { + "epoch": 2.9382942671561705, + "grad_norm": 3.080033697115591, + "learning_rate": 2.2181979952183852e-08, + "loss": 0.1319, + "step": 20809 + }, + { + "epoch": 2.9384354702061564, + "grad_norm": 2.646659003409431, + "learning_rate": 2.2080614081954054e-08, + "loss": 0.1124, + "step": 20810 + }, + { + "epoch": 2.9385766732561422, + "grad_norm": 2.7532735925940774, + "learning_rate": 2.1979480094260763e-08, + "loss": 0.097, + "step": 20811 + }, + { + "epoch": 2.938717876306128, + "grad_norm": 2.8327961331223546, + "learning_rate": 2.187857799145432e-08, + "loss": 0.1046, + "step": 20812 + }, + { + "epoch": 2.938859079356114, + "grad_norm": 3.4852017056970164, + "learning_rate": 2.1777907775881735e-08, + "loss": 0.1559, + "step": 20813 + }, + { + "epoch": 2.9390002824061, + "grad_norm": 3.096529974575009, + "learning_rate": 2.167746944988114e-08, + "loss": 0.1454, + "step": 20814 + }, + { + "epoch": 2.939141485456086, + "grad_norm": 3.292291730196601, + "learning_rate": 2.1577263015786222e-08, + "loss": 0.1325, + "step": 20815 + }, + { + "epoch": 2.9392826885060717, + "grad_norm": 3.7611463170135586, + "learning_rate": 2.1477288475926227e-08, + "loss": 0.1872, + "step": 20816 + }, + { + "epoch": 2.9394238915560575, + "grad_norm": 3.589243375904884, + "learning_rate": 2.137754583262486e-08, + "loss": 0.1394, + "step": 20817 + }, + { + "epoch": 2.9395650946060434, + "grad_norm": 3.0733195817462162, + "learning_rate": 2.1278035088200255e-08, + "loss": 0.1301, + "step": 20818 + }, + { + "epoch": 2.9397062976560293, + "grad_norm": 3.9807362779359607, + "learning_rate": 2.1178756244965014e-08, + "loss": 0.2042, + "step": 20819 + }, + { + "epoch": 2.939847500706015, + "grad_norm": 3.1575601435676433, + "learning_rate": 2.1079709305226183e-08, + "loss": 0.1193, + "step": 20820 + }, + { + "epoch": 2.939988703756001, + "grad_norm": 3.321944566209505, + "learning_rate": 2.0980894271284136e-08, + "loss": 0.1326, + "step": 20821 + }, + { + "epoch": 2.940129906805987, + "grad_norm": 2.7340081362552136, + "learning_rate": 2.088231114543704e-08, + "loss": 0.1181, + "step": 20822 + }, + { + "epoch": 2.940271109855973, + "grad_norm": 3.0467824097379803, + "learning_rate": 2.0783959929975283e-08, + "loss": 0.1418, + "step": 20823 + }, + { + "epoch": 2.9404123129059587, + "grad_norm": 3.258151619280257, + "learning_rate": 2.0685840627184817e-08, + "loss": 0.128, + "step": 20824 + }, + { + "epoch": 2.9405535159559446, + "grad_norm": 3.3751396773628173, + "learning_rate": 2.0587953239344926e-08, + "loss": 0.1555, + "step": 20825 + }, + { + "epoch": 2.9406947190059305, + "grad_norm": 3.7667517781513, + "learning_rate": 2.049029776873268e-08, + "loss": 0.2015, + "step": 20826 + }, + { + "epoch": 2.9408359220559164, + "grad_norm": 2.9752751070893666, + "learning_rate": 2.0392874217615154e-08, + "loss": 0.095, + "step": 20827 + }, + { + "epoch": 2.9409771251059023, + "grad_norm": 3.5186717591036207, + "learning_rate": 2.0295682588257205e-08, + "loss": 0.1469, + "step": 20828 + }, + { + "epoch": 2.941118328155888, + "grad_norm": 3.0303472849423256, + "learning_rate": 2.0198722882918132e-08, + "loss": 0.1354, + "step": 20829 + }, + { + "epoch": 2.941259531205874, + "grad_norm": 2.9106499232948955, + "learning_rate": 2.010199510385058e-08, + "loss": 0.143, + "step": 20830 + }, + { + "epoch": 2.94140073425586, + "grad_norm": 3.2687039673298877, + "learning_rate": 2.000549925330275e-08, + "loss": 0.1453, + "step": 20831 + }, + { + "epoch": 2.941541937305846, + "grad_norm": 3.198641735710895, + "learning_rate": 1.9909235333517296e-08, + "loss": 0.1204, + "step": 20832 + }, + { + "epoch": 2.9416831403558317, + "grad_norm": 2.797373071666604, + "learning_rate": 1.9813203346730203e-08, + "loss": 0.1237, + "step": 20833 + }, + { + "epoch": 2.9418243434058176, + "grad_norm": 3.769523009343869, + "learning_rate": 1.9717403295175242e-08, + "loss": 0.1657, + "step": 20834 + }, + { + "epoch": 2.9419655464558034, + "grad_norm": 2.5729950204877805, + "learning_rate": 1.9621835181077296e-08, + "loss": 0.1051, + "step": 20835 + }, + { + "epoch": 2.9421067495057893, + "grad_norm": 4.0096278710561295, + "learning_rate": 1.952649900665793e-08, + "loss": 0.191, + "step": 20836 + }, + { + "epoch": 2.942247952555775, + "grad_norm": 3.3496934815222947, + "learning_rate": 1.9431394774132028e-08, + "loss": 0.1363, + "step": 20837 + }, + { + "epoch": 2.942389155605761, + "grad_norm": 3.612548586756759, + "learning_rate": 1.9336522485710053e-08, + "loss": 0.1419, + "step": 20838 + }, + { + "epoch": 2.942530358655747, + "grad_norm": 3.447937164167159, + "learning_rate": 1.9241882143596903e-08, + "loss": 0.1409, + "step": 20839 + }, + { + "epoch": 2.942671561705733, + "grad_norm": 3.4358054521185353, + "learning_rate": 1.914747374999304e-08, + "loss": 0.1601, + "step": 20840 + }, + { + "epoch": 2.9428127647557187, + "grad_norm": 2.4434554714948225, + "learning_rate": 1.9053297307091157e-08, + "loss": 0.0956, + "step": 20841 + }, + { + "epoch": 2.9429539678057046, + "grad_norm": 2.313203894329447, + "learning_rate": 1.89593528170795e-08, + "loss": 0.0836, + "step": 20842 + }, + { + "epoch": 2.9430951708556905, + "grad_norm": 3.7312536130921408, + "learning_rate": 1.8865640282142995e-08, + "loss": 0.1751, + "step": 20843 + }, + { + "epoch": 2.9432363739056764, + "grad_norm": 2.916352656508384, + "learning_rate": 1.877215970445767e-08, + "loss": 0.1127, + "step": 20844 + }, + { + "epoch": 2.9433775769556623, + "grad_norm": 2.7478853134814187, + "learning_rate": 1.8678911086197348e-08, + "loss": 0.1447, + "step": 20845 + }, + { + "epoch": 2.943518780005648, + "grad_norm": 3.3637710555863163, + "learning_rate": 1.8585894429528073e-08, + "loss": 0.1578, + "step": 20846 + }, + { + "epoch": 2.943659983055634, + "grad_norm": 3.2664600798477728, + "learning_rate": 1.8493109736612558e-08, + "loss": 0.1806, + "step": 20847 + }, + { + "epoch": 2.94380118610562, + "grad_norm": 3.137013713899287, + "learning_rate": 1.8400557009605746e-08, + "loss": 0.113, + "step": 20848 + }, + { + "epoch": 2.943942389155606, + "grad_norm": 2.9456304613209827, + "learning_rate": 1.830823625066036e-08, + "loss": 0.1362, + "step": 20849 + }, + { + "epoch": 2.9440835922055917, + "grad_norm": 2.506184917764803, + "learning_rate": 1.821614746191913e-08, + "loss": 0.0869, + "step": 20850 + }, + { + "epoch": 2.9442247952555776, + "grad_norm": 3.5990028623899573, + "learning_rate": 1.81242906455259e-08, + "loss": 0.1413, + "step": 20851 + }, + { + "epoch": 2.9443659983055634, + "grad_norm": 2.7972524680367616, + "learning_rate": 1.8032665803612294e-08, + "loss": 0.1146, + "step": 20852 + }, + { + "epoch": 2.9445072013555493, + "grad_norm": 2.315812344467535, + "learning_rate": 1.794127293830883e-08, + "loss": 0.1026, + "step": 20853 + }, + { + "epoch": 2.944648404405535, + "grad_norm": 3.1038507930266297, + "learning_rate": 1.7850112051738255e-08, + "loss": 0.1395, + "step": 20854 + }, + { + "epoch": 2.944789607455521, + "grad_norm": 3.4952038648883867, + "learning_rate": 1.7759183146021098e-08, + "loss": 0.1402, + "step": 20855 + }, + { + "epoch": 2.944930810505507, + "grad_norm": 3.279000003336461, + "learning_rate": 1.7668486223269e-08, + "loss": 0.1333, + "step": 20856 + }, + { + "epoch": 2.945072013555493, + "grad_norm": 3.084904745799255, + "learning_rate": 1.7578021285590274e-08, + "loss": 0.1335, + "step": 20857 + }, + { + "epoch": 2.9452132166054787, + "grad_norm": 2.992898681457165, + "learning_rate": 1.7487788335087686e-08, + "loss": 0.1547, + "step": 20858 + }, + { + "epoch": 2.9453544196554646, + "grad_norm": 2.4268520297089453, + "learning_rate": 1.7397787373858442e-08, + "loss": 0.1021, + "step": 20859 + }, + { + "epoch": 2.9454956227054505, + "grad_norm": 3.921965799966041, + "learning_rate": 1.7308018403991988e-08, + "loss": 0.1478, + "step": 20860 + }, + { + "epoch": 2.9456368257554364, + "grad_norm": 3.4429000397224665, + "learning_rate": 1.721848142757665e-08, + "loss": 0.1176, + "step": 20861 + }, + { + "epoch": 2.9457780288054223, + "grad_norm": 2.9609772043201783, + "learning_rate": 1.7129176446692986e-08, + "loss": 0.1011, + "step": 20862 + }, + { + "epoch": 2.945919231855408, + "grad_norm": 3.2668807164191627, + "learning_rate": 1.704010346341489e-08, + "loss": 0.1696, + "step": 20863 + }, + { + "epoch": 2.946060434905394, + "grad_norm": 3.3291415592027853, + "learning_rate": 1.6951262479815155e-08, + "loss": 0.1531, + "step": 20864 + }, + { + "epoch": 2.94620163795538, + "grad_norm": 3.80077216276676, + "learning_rate": 1.686265349795546e-08, + "loss": 0.1613, + "step": 20865 + }, + { + "epoch": 2.946342841005366, + "grad_norm": 2.9402486294825683, + "learning_rate": 1.6774276519896383e-08, + "loss": 0.1373, + "step": 20866 + }, + { + "epoch": 2.9464840440553517, + "grad_norm": 3.6470327679308916, + "learning_rate": 1.668613154769183e-08, + "loss": 0.1495, + "step": 20867 + }, + { + "epoch": 2.9466252471053376, + "grad_norm": 2.5987681240669, + "learning_rate": 1.6598218583390168e-08, + "loss": 0.1308, + "step": 20868 + }, + { + "epoch": 2.9467664501553235, + "grad_norm": 3.5234425628350166, + "learning_rate": 1.6510537629034208e-08, + "loss": 0.1945, + "step": 20869 + }, + { + "epoch": 2.9469076532053093, + "grad_norm": 3.1796722069975774, + "learning_rate": 1.6423088686662313e-08, + "loss": 0.1362, + "step": 20870 + }, + { + "epoch": 2.9470488562552952, + "grad_norm": 4.027731174556413, + "learning_rate": 1.63358717583062e-08, + "loss": 0.1629, + "step": 20871 + }, + { + "epoch": 2.947190059305281, + "grad_norm": 3.8269986872090604, + "learning_rate": 1.624888684599202e-08, + "loss": 0.1549, + "step": 20872 + }, + { + "epoch": 2.947331262355267, + "grad_norm": 3.6234108516456875, + "learning_rate": 1.6162133951742596e-08, + "loss": 0.161, + "step": 20873 + }, + { + "epoch": 2.947472465405253, + "grad_norm": 2.9202614128479016, + "learning_rate": 1.6075613077574103e-08, + "loss": 0.1285, + "step": 20874 + }, + { + "epoch": 2.9476136684552388, + "grad_norm": 3.284953817062396, + "learning_rate": 1.598932422549604e-08, + "loss": 0.1598, + "step": 20875 + }, + { + "epoch": 2.9477548715052246, + "grad_norm": 3.9186934076941258, + "learning_rate": 1.5903267397514576e-08, + "loss": 0.1668, + "step": 20876 + }, + { + "epoch": 2.9478960745552105, + "grad_norm": 3.1555250052329105, + "learning_rate": 1.5817442595629228e-08, + "loss": 0.1319, + "step": 20877 + }, + { + "epoch": 2.9480372776051964, + "grad_norm": 2.501259278964831, + "learning_rate": 1.5731849821833955e-08, + "loss": 0.1232, + "step": 20878 + }, + { + "epoch": 2.9481784806551823, + "grad_norm": 3.4120601913602044, + "learning_rate": 1.5646489078119387e-08, + "loss": 0.1432, + "step": 20879 + }, + { + "epoch": 2.948319683705168, + "grad_norm": 3.424634254147939, + "learning_rate": 1.556136036646838e-08, + "loss": 0.1313, + "step": 20880 + }, + { + "epoch": 2.948460886755154, + "grad_norm": 2.8009613495682624, + "learning_rate": 1.5476463688859356e-08, + "loss": 0.1152, + "step": 20881 + }, + { + "epoch": 2.94860208980514, + "grad_norm": 3.020895799748451, + "learning_rate": 1.5391799047266287e-08, + "loss": 0.1615, + "step": 20882 + }, + { + "epoch": 2.948743292855126, + "grad_norm": 3.439009135289018, + "learning_rate": 1.530736644365427e-08, + "loss": 0.1548, + "step": 20883 + }, + { + "epoch": 2.9488844959051117, + "grad_norm": 3.7007266992295005, + "learning_rate": 1.522316587998729e-08, + "loss": 0.1213, + "step": 20884 + }, + { + "epoch": 2.9490256989550976, + "grad_norm": 4.13813436384253, + "learning_rate": 1.5139197358222668e-08, + "loss": 0.2245, + "step": 20885 + }, + { + "epoch": 2.9491669020050835, + "grad_norm": 3.057575122095157, + "learning_rate": 1.5055460880311068e-08, + "loss": 0.1597, + "step": 20886 + }, + { + "epoch": 2.9493081050550694, + "grad_norm": 3.4989824142155, + "learning_rate": 1.49719564481976e-08, + "loss": 0.1324, + "step": 20887 + }, + { + "epoch": 2.9494493081050552, + "grad_norm": 3.855893413690058, + "learning_rate": 1.4888684063824044e-08, + "loss": 0.1419, + "step": 20888 + }, + { + "epoch": 2.9495905111550407, + "grad_norm": 3.0457680316268414, + "learning_rate": 1.4805643729124408e-08, + "loss": 0.1368, + "step": 20889 + }, + { + "epoch": 2.9497317142050266, + "grad_norm": 2.435071140875976, + "learning_rate": 1.4722835446030481e-08, + "loss": 0.1057, + "step": 20890 + }, + { + "epoch": 2.9498729172550124, + "grad_norm": 3.185032546782885, + "learning_rate": 1.464025921646406e-08, + "loss": 0.1293, + "step": 20891 + }, + { + "epoch": 2.9500141203049983, + "grad_norm": 3.306850261169892, + "learning_rate": 1.4557915042346937e-08, + "loss": 0.1336, + "step": 20892 + }, + { + "epoch": 2.950155323354984, + "grad_norm": 3.3474712984701878, + "learning_rate": 1.447580292559092e-08, + "loss": 0.1735, + "step": 20893 + }, + { + "epoch": 2.95029652640497, + "grad_norm": 3.6664451191902647, + "learning_rate": 1.4393922868105591e-08, + "loss": 0.1462, + "step": 20894 + }, + { + "epoch": 2.950437729454956, + "grad_norm": 3.2735761219995143, + "learning_rate": 1.4312274871792763e-08, + "loss": 0.1255, + "step": 20895 + }, + { + "epoch": 2.950578932504942, + "grad_norm": 3.1394314327220463, + "learning_rate": 1.4230858938549808e-08, + "loss": 0.1374, + "step": 20896 + }, + { + "epoch": 2.9507201355549277, + "grad_norm": 2.9706231605602422, + "learning_rate": 1.4149675070269653e-08, + "loss": 0.164, + "step": 20897 + }, + { + "epoch": 2.9508613386049136, + "grad_norm": 2.917511909952137, + "learning_rate": 1.4068723268837459e-08, + "loss": 0.1377, + "step": 20898 + }, + { + "epoch": 2.9510025416548995, + "grad_norm": 3.4381218576849113, + "learning_rate": 1.3988003536137273e-08, + "loss": 0.1484, + "step": 20899 + }, + { + "epoch": 2.9511437447048854, + "grad_norm": 3.2422735335112516, + "learning_rate": 1.3907515874042044e-08, + "loss": 0.1288, + "step": 20900 + }, + { + "epoch": 2.9512849477548713, + "grad_norm": 2.7846083691438324, + "learning_rate": 1.3827260284423604e-08, + "loss": 0.1007, + "step": 20901 + }, + { + "epoch": 2.951426150804857, + "grad_norm": 2.6141712434729447, + "learning_rate": 1.3747236769147133e-08, + "loss": 0.118, + "step": 20902 + }, + { + "epoch": 2.951567353854843, + "grad_norm": 3.192281116888545, + "learning_rate": 1.366744533007225e-08, + "loss": 0.1359, + "step": 20903 + }, + { + "epoch": 2.951708556904829, + "grad_norm": 2.5669603554333316, + "learning_rate": 1.3587885969051917e-08, + "loss": 0.1363, + "step": 20904 + }, + { + "epoch": 2.951849759954815, + "grad_norm": 4.427866181204975, + "learning_rate": 1.350855868793799e-08, + "loss": 0.1925, + "step": 20905 + }, + { + "epoch": 2.9519909630048007, + "grad_norm": 2.565464631903426, + "learning_rate": 1.3429463488571216e-08, + "loss": 0.1145, + "step": 20906 + }, + { + "epoch": 2.9521321660547866, + "grad_norm": 3.1719569338103115, + "learning_rate": 1.3350600372791234e-08, + "loss": 0.1474, + "step": 20907 + }, + { + "epoch": 2.9522733691047724, + "grad_norm": 3.0280094087104543, + "learning_rate": 1.3271969342431023e-08, + "loss": 0.1519, + "step": 20908 + }, + { + "epoch": 2.9524145721547583, + "grad_norm": 3.539706646074939, + "learning_rate": 1.3193570399316902e-08, + "loss": 0.2107, + "step": 20909 + }, + { + "epoch": 2.952555775204744, + "grad_norm": 3.575091544901411, + "learning_rate": 1.3115403545270744e-08, + "loss": 0.1377, + "step": 20910 + }, + { + "epoch": 2.95269697825473, + "grad_norm": 3.4694779669667373, + "learning_rate": 1.3037468782109986e-08, + "loss": 0.1592, + "step": 20911 + }, + { + "epoch": 2.952838181304716, + "grad_norm": 3.324585501213045, + "learning_rate": 1.29597661116454e-08, + "loss": 0.1723, + "step": 20912 + }, + { + "epoch": 2.952979384354702, + "grad_norm": 3.7274612407735046, + "learning_rate": 1.288229553568221e-08, + "loss": 0.199, + "step": 20913 + }, + { + "epoch": 2.9531205874046877, + "grad_norm": 3.386342526587411, + "learning_rate": 1.2805057056022307e-08, + "loss": 0.1356, + "step": 20914 + }, + { + "epoch": 2.9532617904546736, + "grad_norm": 2.859239229103449, + "learning_rate": 1.2728050674459814e-08, + "loss": 0.1276, + "step": 20915 + }, + { + "epoch": 2.9534029935046595, + "grad_norm": 3.382259907885206, + "learning_rate": 1.2651276392783297e-08, + "loss": 0.1651, + "step": 20916 + }, + { + "epoch": 2.9535441965546454, + "grad_norm": 3.2308894305501044, + "learning_rate": 1.2574734212779105e-08, + "loss": 0.124, + "step": 20917 + }, + { + "epoch": 2.9536853996046313, + "grad_norm": 3.50807596465905, + "learning_rate": 1.2498424136223597e-08, + "loss": 0.1603, + "step": 20918 + }, + { + "epoch": 2.953826602654617, + "grad_norm": 3.8764045845226427, + "learning_rate": 1.2422346164892018e-08, + "loss": 0.1738, + "step": 20919 + }, + { + "epoch": 2.953967805704603, + "grad_norm": 2.967259658853862, + "learning_rate": 1.2346500300551844e-08, + "loss": 0.123, + "step": 20920 + }, + { + "epoch": 2.954109008754589, + "grad_norm": 2.7684775757952464, + "learning_rate": 1.227088654496611e-08, + "loss": 0.1197, + "step": 20921 + }, + { + "epoch": 2.954250211804575, + "grad_norm": 4.133150322753982, + "learning_rate": 1.2195504899890076e-08, + "loss": 0.1811, + "step": 20922 + }, + { + "epoch": 2.9543914148545607, + "grad_norm": 3.1322301932909955, + "learning_rate": 1.2120355367079007e-08, + "loss": 0.1194, + "step": 20923 + }, + { + "epoch": 2.9545326179045466, + "grad_norm": 3.065874403059459, + "learning_rate": 1.2045437948275952e-08, + "loss": 0.1825, + "step": 20924 + }, + { + "epoch": 2.9546738209545325, + "grad_norm": 3.4732700956114315, + "learning_rate": 1.197075264522396e-08, + "loss": 0.1311, + "step": 20925 + }, + { + "epoch": 2.9548150240045183, + "grad_norm": 2.627620326962049, + "learning_rate": 1.1896299459658311e-08, + "loss": 0.0914, + "step": 20926 + }, + { + "epoch": 2.9549562270545042, + "grad_norm": 2.7751812151786233, + "learning_rate": 1.1822078393309844e-08, + "loss": 0.0985, + "step": 20927 + }, + { + "epoch": 2.95509743010449, + "grad_norm": 3.394453796661725, + "learning_rate": 1.1748089447901623e-08, + "loss": 0.2011, + "step": 20928 + }, + { + "epoch": 2.955238633154476, + "grad_norm": 3.200775885712225, + "learning_rate": 1.1674332625154494e-08, + "loss": 0.1338, + "step": 20929 + }, + { + "epoch": 2.955379836204462, + "grad_norm": 3.338893544431498, + "learning_rate": 1.1600807926782642e-08, + "loss": 0.1339, + "step": 20930 + }, + { + "epoch": 2.9555210392544478, + "grad_norm": 4.018132346422408, + "learning_rate": 1.152751535449359e-08, + "loss": 0.1388, + "step": 20931 + }, + { + "epoch": 2.9556622423044336, + "grad_norm": 2.6722062048004656, + "learning_rate": 1.145445490999153e-08, + "loss": 0.1203, + "step": 20932 + }, + { + "epoch": 2.9558034453544195, + "grad_norm": 2.951709143765773, + "learning_rate": 1.1381626594975103e-08, + "loss": 0.0877, + "step": 20933 + }, + { + "epoch": 2.9559446484044054, + "grad_norm": 3.3025445750709794, + "learning_rate": 1.130903041113518e-08, + "loss": 0.139, + "step": 20934 + }, + { + "epoch": 2.9560858514543913, + "grad_norm": 3.5946964043945098, + "learning_rate": 1.1236666360159299e-08, + "loss": 0.1584, + "step": 20935 + }, + { + "epoch": 2.956227054504377, + "grad_norm": 2.5610504318441287, + "learning_rate": 1.1164534443730557e-08, + "loss": 0.0978, + "step": 20936 + }, + { + "epoch": 2.956368257554363, + "grad_norm": 3.692911897300744, + "learning_rate": 1.1092634663523171e-08, + "loss": 0.1365, + "step": 20937 + }, + { + "epoch": 2.956509460604349, + "grad_norm": 3.187921788572486, + "learning_rate": 1.1020967021210249e-08, + "loss": 0.1421, + "step": 20938 + }, + { + "epoch": 2.956650663654335, + "grad_norm": 3.496858424771091, + "learning_rate": 1.0949531518454904e-08, + "loss": 0.1912, + "step": 20939 + }, + { + "epoch": 2.9567918667043207, + "grad_norm": 3.167092149933974, + "learning_rate": 1.0878328156919139e-08, + "loss": 0.1522, + "step": 20940 + }, + { + "epoch": 2.9569330697543066, + "grad_norm": 3.1919593841485296, + "learning_rate": 1.0807356938256074e-08, + "loss": 0.1589, + "step": 20941 + }, + { + "epoch": 2.9570742728042925, + "grad_norm": 3.479331699096202, + "learning_rate": 1.0736617864117727e-08, + "loss": 0.1433, + "step": 20942 + }, + { + "epoch": 2.9572154758542784, + "grad_norm": 3.6290364438021863, + "learning_rate": 1.0666110936145002e-08, + "loss": 0.1454, + "step": 20943 + }, + { + "epoch": 2.9573566789042642, + "grad_norm": 3.7750765923061675, + "learning_rate": 1.0595836155978811e-08, + "loss": 0.1834, + "step": 20944 + }, + { + "epoch": 2.95749788195425, + "grad_norm": 3.124862124433934, + "learning_rate": 1.0525793525250072e-08, + "loss": 0.1292, + "step": 20945 + }, + { + "epoch": 2.957639085004236, + "grad_norm": 2.4536237906543814, + "learning_rate": 1.0455983045588591e-08, + "loss": 0.1277, + "step": 20946 + }, + { + "epoch": 2.957780288054222, + "grad_norm": 3.2828696618730837, + "learning_rate": 1.0386404718616406e-08, + "loss": 0.1387, + "step": 20947 + }, + { + "epoch": 2.9579214911042078, + "grad_norm": 3.6300171237004752, + "learning_rate": 1.0317058545948888e-08, + "loss": 0.1914, + "step": 20948 + }, + { + "epoch": 2.9580626941541937, + "grad_norm": 3.6384366386314038, + "learning_rate": 1.0247944529199193e-08, + "loss": 0.1461, + "step": 20949 + }, + { + "epoch": 2.9582038972041795, + "grad_norm": 4.67957418413978, + "learning_rate": 1.0179062669972705e-08, + "loss": 0.1885, + "step": 20950 + }, + { + "epoch": 2.9583451002541654, + "grad_norm": 2.7183991668721834, + "learning_rate": 1.0110412969871475e-08, + "loss": 0.1327, + "step": 20951 + }, + { + "epoch": 2.9584863033041513, + "grad_norm": 3.055314771943912, + "learning_rate": 1.0041995430488671e-08, + "loss": 0.1323, + "step": 20952 + }, + { + "epoch": 2.958627506354137, + "grad_norm": 2.942627860393065, + "learning_rate": 9.973810053416356e-09, + "loss": 0.1219, + "step": 20953 + }, + { + "epoch": 2.958768709404123, + "grad_norm": 3.0529522882345583, + "learning_rate": 9.905856840238815e-09, + "loss": 0.1319, + "step": 20954 + }, + { + "epoch": 2.958909912454109, + "grad_norm": 2.783210766361708, + "learning_rate": 9.838135792533676e-09, + "loss": 0.1419, + "step": 20955 + }, + { + "epoch": 2.959051115504095, + "grad_norm": 2.8611864191240746, + "learning_rate": 9.770646911876347e-09, + "loss": 0.1164, + "step": 20956 + }, + { + "epoch": 2.9591923185540807, + "grad_norm": 3.777205700060491, + "learning_rate": 9.703390199834461e-09, + "loss": 0.135, + "step": 20957 + }, + { + "epoch": 2.9593335216040666, + "grad_norm": 3.269026498811678, + "learning_rate": 9.636365657971215e-09, + "loss": 0.1673, + "step": 20958 + }, + { + "epoch": 2.9594747246540525, + "grad_norm": 2.485896563978982, + "learning_rate": 9.569573287845357e-09, + "loss": 0.1108, + "step": 20959 + }, + { + "epoch": 2.9596159277040384, + "grad_norm": 3.1275251110341507, + "learning_rate": 9.503013091006763e-09, + "loss": 0.1184, + "step": 20960 + }, + { + "epoch": 2.9597571307540242, + "grad_norm": 3.274138137800996, + "learning_rate": 9.436685069004192e-09, + "loss": 0.1611, + "step": 20961 + }, + { + "epoch": 2.95989833380401, + "grad_norm": 3.443405447376654, + "learning_rate": 9.370589223378635e-09, + "loss": 0.1544, + "step": 20962 + }, + { + "epoch": 2.960039536853996, + "grad_norm": 3.0748457951091575, + "learning_rate": 9.304725555665528e-09, + "loss": 0.1338, + "step": 20963 + }, + { + "epoch": 2.960180739903982, + "grad_norm": 3.7556867007562444, + "learning_rate": 9.239094067396982e-09, + "loss": 0.1654, + "step": 20964 + }, + { + "epoch": 2.9603219429539678, + "grad_norm": 2.9073318374364776, + "learning_rate": 9.173694760096219e-09, + "loss": 0.1232, + "step": 20965 + }, + { + "epoch": 2.9604631460039537, + "grad_norm": 3.109219157025226, + "learning_rate": 9.108527635284248e-09, + "loss": 0.1385, + "step": 20966 + }, + { + "epoch": 2.9606043490539395, + "grad_norm": 2.8004784197050863, + "learning_rate": 9.043592694475412e-09, + "loss": 0.1295, + "step": 20967 + }, + { + "epoch": 2.9607455521039254, + "grad_norm": 2.9274664219014173, + "learning_rate": 8.978889939178503e-09, + "loss": 0.1478, + "step": 20968 + }, + { + "epoch": 2.9608867551539113, + "grad_norm": 3.1895038946071277, + "learning_rate": 8.914419370897876e-09, + "loss": 0.13, + "step": 20969 + }, + { + "epoch": 2.961027958203897, + "grad_norm": 3.372245409808537, + "learning_rate": 8.850180991131219e-09, + "loss": 0.1103, + "step": 20970 + }, + { + "epoch": 2.961169161253883, + "grad_norm": 3.4135469954778737, + "learning_rate": 8.786174801370673e-09, + "loss": 0.1647, + "step": 20971 + }, + { + "epoch": 2.961310364303869, + "grad_norm": 3.4334545812809387, + "learning_rate": 8.722400803106157e-09, + "loss": 0.1682, + "step": 20972 + }, + { + "epoch": 2.961451567353855, + "grad_norm": 2.8223747300517874, + "learning_rate": 8.658858997816488e-09, + "loss": 0.1364, + "step": 20973 + }, + { + "epoch": 2.9615927704038407, + "grad_norm": 4.014651242224271, + "learning_rate": 8.595549386981595e-09, + "loss": 0.1786, + "step": 20974 + }, + { + "epoch": 2.9617339734538266, + "grad_norm": 2.930614623484727, + "learning_rate": 8.53247197206919e-09, + "loss": 0.1343, + "step": 20975 + }, + { + "epoch": 2.9618751765038125, + "grad_norm": 3.9461326274396904, + "learning_rate": 8.469626754549209e-09, + "loss": 0.2161, + "step": 20976 + }, + { + "epoch": 2.9620163795537984, + "grad_norm": 2.930159604387823, + "learning_rate": 8.407013735878267e-09, + "loss": 0.1272, + "step": 20977 + }, + { + "epoch": 2.9621575826037843, + "grad_norm": 3.6321178927510305, + "learning_rate": 8.344632917515194e-09, + "loss": 0.1635, + "step": 20978 + }, + { + "epoch": 2.96229878565377, + "grad_norm": 3.26693423482731, + "learning_rate": 8.282484300906613e-09, + "loss": 0.1289, + "step": 20979 + }, + { + "epoch": 2.962439988703756, + "grad_norm": 2.8588647364914195, + "learning_rate": 8.220567887498033e-09, + "loss": 0.0958, + "step": 20980 + }, + { + "epoch": 2.962581191753742, + "grad_norm": 2.869305698897458, + "learning_rate": 8.158883678728303e-09, + "loss": 0.1353, + "step": 20981 + }, + { + "epoch": 2.962722394803728, + "grad_norm": 2.8776957220445754, + "learning_rate": 8.09743167603072e-09, + "loss": 0.1239, + "step": 20982 + }, + { + "epoch": 2.9628635978537137, + "grad_norm": 3.1754993459158776, + "learning_rate": 8.036211880834144e-09, + "loss": 0.1182, + "step": 20983 + }, + { + "epoch": 2.9630048009036996, + "grad_norm": 3.5422919290700525, + "learning_rate": 7.975224294560769e-09, + "loss": 0.1364, + "step": 20984 + }, + { + "epoch": 2.9631460039536854, + "grad_norm": 2.948514609484278, + "learning_rate": 7.914468918628348e-09, + "loss": 0.1126, + "step": 20985 + }, + { + "epoch": 2.9632872070036713, + "grad_norm": 3.8391716517794556, + "learning_rate": 7.853945754447977e-09, + "loss": 0.1898, + "step": 20986 + }, + { + "epoch": 2.963428410053657, + "grad_norm": 2.7152258391253667, + "learning_rate": 7.793654803426309e-09, + "loss": 0.1309, + "step": 20987 + }, + { + "epoch": 2.963569613103643, + "grad_norm": 2.927256147875384, + "learning_rate": 7.733596066965555e-09, + "loss": 0.1163, + "step": 20988 + }, + { + "epoch": 2.963710816153629, + "grad_norm": 2.9628600210019416, + "learning_rate": 7.673769546460153e-09, + "loss": 0.1321, + "step": 20989 + }, + { + "epoch": 2.963852019203615, + "grad_norm": 2.8048862223948423, + "learning_rate": 7.614175243301213e-09, + "loss": 0.1322, + "step": 20990 + }, + { + "epoch": 2.9639932222536007, + "grad_norm": 2.345982602760121, + "learning_rate": 7.554813158873186e-09, + "loss": 0.1028, + "step": 20991 + }, + { + "epoch": 2.9641344253035866, + "grad_norm": 3.110268873537656, + "learning_rate": 7.495683294556078e-09, + "loss": 0.148, + "step": 20992 + }, + { + "epoch": 2.9642756283535725, + "grad_norm": 2.8176698707425216, + "learning_rate": 7.436785651724343e-09, + "loss": 0.1146, + "step": 20993 + }, + { + "epoch": 2.9644168314035584, + "grad_norm": 3.1147915172981606, + "learning_rate": 7.378120231745778e-09, + "loss": 0.1445, + "step": 20994 + }, + { + "epoch": 2.9645580344535443, + "grad_norm": 4.279855369330541, + "learning_rate": 7.319687035983735e-09, + "loss": 0.1946, + "step": 20995 + }, + { + "epoch": 2.96469923750353, + "grad_norm": 2.896818949598069, + "learning_rate": 7.26148606579713e-09, + "loss": 0.1172, + "step": 20996 + }, + { + "epoch": 2.964840440553516, + "grad_norm": 2.9577200511844866, + "learning_rate": 7.203517322538211e-09, + "loss": 0.1302, + "step": 20997 + }, + { + "epoch": 2.964981643603502, + "grad_norm": 3.2391628617275887, + "learning_rate": 7.145780807553681e-09, + "loss": 0.1411, + "step": 20998 + }, + { + "epoch": 2.965122846653488, + "grad_norm": 3.1520440350106766, + "learning_rate": 7.0882765221858e-09, + "loss": 0.134, + "step": 20999 + }, + { + "epoch": 2.9652640497034737, + "grad_norm": 3.390775876141292, + "learning_rate": 7.031004467771274e-09, + "loss": 0.1648, + "step": 21000 + }, + { + "epoch": 2.9654052527534596, + "grad_norm": 3.0235267782748534, + "learning_rate": 6.973964645640152e-09, + "loss": 0.1264, + "step": 21001 + }, + { + "epoch": 2.9655464558034454, + "grad_norm": 3.219361656633308, + "learning_rate": 6.91715705711804e-09, + "loss": 0.1436, + "step": 21002 + }, + { + "epoch": 2.9656876588534313, + "grad_norm": 3.1221555150131217, + "learning_rate": 6.860581703526104e-09, + "loss": 0.1568, + "step": 21003 + }, + { + "epoch": 2.965828861903417, + "grad_norm": 3.409120810950975, + "learning_rate": 6.804238586177736e-09, + "loss": 0.1383, + "step": 21004 + }, + { + "epoch": 2.965970064953403, + "grad_norm": 3.0107659771206094, + "learning_rate": 6.748127706384111e-09, + "loss": 0.1291, + "step": 21005 + }, + { + "epoch": 2.966111268003389, + "grad_norm": 3.3365191404253434, + "learning_rate": 6.692249065447521e-09, + "loss": 0.1471, + "step": 21006 + }, + { + "epoch": 2.966252471053375, + "grad_norm": 3.191440889233095, + "learning_rate": 6.636602664668035e-09, + "loss": 0.1224, + "step": 21007 + }, + { + "epoch": 2.9663936741033607, + "grad_norm": 3.0344741004497213, + "learning_rate": 6.5811885053368444e-09, + "loss": 0.1666, + "step": 21008 + }, + { + "epoch": 2.9665348771533466, + "grad_norm": 2.6352788398214595, + "learning_rate": 6.526006588744027e-09, + "loss": 0.0776, + "step": 21009 + }, + { + "epoch": 2.9666760802033325, + "grad_norm": 3.822208840567407, + "learning_rate": 6.471056916170782e-09, + "loss": 0.1409, + "step": 21010 + }, + { + "epoch": 2.9668172832533184, + "grad_norm": 3.153845501046757, + "learning_rate": 6.416339488893864e-09, + "loss": 0.1121, + "step": 21011 + }, + { + "epoch": 2.9669584863033043, + "grad_norm": 4.389716641536935, + "learning_rate": 6.361854308185589e-09, + "loss": 0.1773, + "step": 21012 + }, + { + "epoch": 2.96709968935329, + "grad_norm": 3.0115845511978487, + "learning_rate": 6.307601375312722e-09, + "loss": 0.1454, + "step": 21013 + }, + { + "epoch": 2.967240892403276, + "grad_norm": 2.8532419260155804, + "learning_rate": 6.253580691534255e-09, + "loss": 0.12, + "step": 21014 + }, + { + "epoch": 2.967382095453262, + "grad_norm": 2.7633431529123023, + "learning_rate": 6.199792258106962e-09, + "loss": 0.1505, + "step": 21015 + }, + { + "epoch": 2.967523298503248, + "grad_norm": 2.9669673198935524, + "learning_rate": 6.146236076279843e-09, + "loss": 0.1325, + "step": 21016 + }, + { + "epoch": 2.9676645015532337, + "grad_norm": 3.4762966648418288, + "learning_rate": 6.0929121472996785e-09, + "loss": 0.1553, + "step": 21017 + }, + { + "epoch": 2.9678057046032196, + "grad_norm": 3.1925919631151105, + "learning_rate": 6.039820472403257e-09, + "loss": 0.1671, + "step": 21018 + }, + { + "epoch": 2.9679469076532055, + "grad_norm": 3.3063965822323897, + "learning_rate": 5.986961052825146e-09, + "loss": 0.1477, + "step": 21019 + }, + { + "epoch": 2.9680881107031913, + "grad_norm": 3.851778152619592, + "learning_rate": 5.934333889794364e-09, + "loss": 0.1696, + "step": 21020 + }, + { + "epoch": 2.9682293137531772, + "grad_norm": 3.0275365503824263, + "learning_rate": 5.881938984533264e-09, + "loss": 0.1249, + "step": 21021 + }, + { + "epoch": 2.968370516803163, + "grad_norm": 3.9395511172534263, + "learning_rate": 5.8297763382597625e-09, + "loss": 0.1583, + "step": 21022 + }, + { + "epoch": 2.968511719853149, + "grad_norm": 3.1316371878544444, + "learning_rate": 5.777845952186223e-09, + "loss": 0.143, + "step": 21023 + }, + { + "epoch": 2.968652922903135, + "grad_norm": 2.7669193176657396, + "learning_rate": 5.726147827519457e-09, + "loss": 0.1267, + "step": 21024 + }, + { + "epoch": 2.9687941259531208, + "grad_norm": 3.3651175504250945, + "learning_rate": 5.674681965460727e-09, + "loss": 0.1292, + "step": 21025 + }, + { + "epoch": 2.9689353290031066, + "grad_norm": 2.954779052251237, + "learning_rate": 5.623448367205741e-09, + "loss": 0.1644, + "step": 21026 + }, + { + "epoch": 2.9690765320530925, + "grad_norm": 3.915213771424255, + "learning_rate": 5.5724470339468815e-09, + "loss": 0.1481, + "step": 21027 + }, + { + "epoch": 2.9692177351030784, + "grad_norm": 3.2810421365250226, + "learning_rate": 5.521677966866534e-09, + "loss": 0.1499, + "step": 21028 + }, + { + "epoch": 2.9693589381530643, + "grad_norm": 3.562055691743463, + "learning_rate": 5.471141167147087e-09, + "loss": 0.1563, + "step": 21029 + }, + { + "epoch": 2.96950014120305, + "grad_norm": 2.9352896283052177, + "learning_rate": 5.4208366359620455e-09, + "loss": 0.1299, + "step": 21030 + }, + { + "epoch": 2.969641344253036, + "grad_norm": 2.567186035594047, + "learning_rate": 5.370764374480475e-09, + "loss": 0.0946, + "step": 21031 + }, + { + "epoch": 2.969782547303022, + "grad_norm": 3.403408823164045, + "learning_rate": 5.3209243838647784e-09, + "loss": 0.1416, + "step": 21032 + }, + { + "epoch": 2.969923750353008, + "grad_norm": 2.372018652193145, + "learning_rate": 5.271316665275139e-09, + "loss": 0.1044, + "step": 21033 + }, + { + "epoch": 2.9700649534029937, + "grad_norm": 3.5854331070162604, + "learning_rate": 5.221941219863969e-09, + "loss": 0.1798, + "step": 21034 + }, + { + "epoch": 2.9702061564529796, + "grad_norm": 3.0709747911060443, + "learning_rate": 5.172798048779237e-09, + "loss": 0.1476, + "step": 21035 + }, + { + "epoch": 2.9703473595029655, + "grad_norm": 3.1385426535132357, + "learning_rate": 5.123887153161145e-09, + "loss": 0.1289, + "step": 21036 + }, + { + "epoch": 2.9704885625529514, + "grad_norm": 2.78171843382014, + "learning_rate": 5.075208534147669e-09, + "loss": 0.1261, + "step": 21037 + }, + { + "epoch": 2.9706297656029372, + "grad_norm": 2.8368198044646786, + "learning_rate": 5.026762192870127e-09, + "loss": 0.1267, + "step": 21038 + }, + { + "epoch": 2.970770968652923, + "grad_norm": 2.7312542141403426, + "learning_rate": 4.9785481304531755e-09, + "loss": 0.1283, + "step": 21039 + }, + { + "epoch": 2.970912171702909, + "grad_norm": 2.9421763362159306, + "learning_rate": 4.93056634801925e-09, + "loss": 0.1321, + "step": 21040 + }, + { + "epoch": 2.971053374752895, + "grad_norm": 4.228816865880618, + "learning_rate": 4.882816846681904e-09, + "loss": 0.1799, + "step": 21041 + }, + { + "epoch": 2.9711945778028808, + "grad_norm": 2.6729053331570425, + "learning_rate": 4.83529962755247e-09, + "loss": 0.1309, + "step": 21042 + }, + { + "epoch": 2.9713357808528666, + "grad_norm": 2.75361075539676, + "learning_rate": 4.78801469173229e-09, + "loss": 0.1394, + "step": 21043 + }, + { + "epoch": 2.9714769839028525, + "grad_norm": 3.14082367980495, + "learning_rate": 4.740962040323815e-09, + "loss": 0.1167, + "step": 21044 + }, + { + "epoch": 2.9716181869528384, + "grad_norm": 3.2894315443992515, + "learning_rate": 4.694141674417285e-09, + "loss": 0.1402, + "step": 21045 + }, + { + "epoch": 2.9717593900028243, + "grad_norm": 3.9349673646756753, + "learning_rate": 4.647553595102938e-09, + "loss": 0.1645, + "step": 21046 + }, + { + "epoch": 2.97190059305281, + "grad_norm": 3.672937943481367, + "learning_rate": 4.601197803463242e-09, + "loss": 0.1462, + "step": 21047 + }, + { + "epoch": 2.972041796102796, + "grad_norm": 3.296551551324094, + "learning_rate": 4.555074300574003e-09, + "loss": 0.1636, + "step": 21048 + }, + { + "epoch": 2.972182999152782, + "grad_norm": 2.809555824825626, + "learning_rate": 4.5091830875088065e-09, + "loss": 0.1154, + "step": 21049 + }, + { + "epoch": 2.972324202202768, + "grad_norm": 3.677021917473877, + "learning_rate": 4.463524165333466e-09, + "loss": 0.1592, + "step": 21050 + }, + { + "epoch": 2.9724654052527537, + "grad_norm": 3.000358821077733, + "learning_rate": 4.418097535108246e-09, + "loss": 0.141, + "step": 21051 + }, + { + "epoch": 2.9726066083027396, + "grad_norm": 4.976978817166408, + "learning_rate": 4.372903197891188e-09, + "loss": 0.1496, + "step": 21052 + }, + { + "epoch": 2.9727478113527255, + "grad_norm": 3.225628744055724, + "learning_rate": 4.327941154730342e-09, + "loss": 0.1508, + "step": 21053 + }, + { + "epoch": 2.9728890144027114, + "grad_norm": 3.2535841996822783, + "learning_rate": 4.283211406670429e-09, + "loss": 0.1521, + "step": 21054 + }, + { + "epoch": 2.9730302174526972, + "grad_norm": 3.438221282418488, + "learning_rate": 4.238713954752838e-09, + "loss": 0.1422, + "step": 21055 + }, + { + "epoch": 2.973171420502683, + "grad_norm": 2.9365113390660817, + "learning_rate": 4.194448800011186e-09, + "loss": 0.1253, + "step": 21056 + }, + { + "epoch": 2.973312623552669, + "grad_norm": 3.24073373641248, + "learning_rate": 4.15041594347243e-09, + "loss": 0.1129, + "step": 21057 + }, + { + "epoch": 2.973453826602655, + "grad_norm": 2.889166141985397, + "learning_rate": 4.1066153861624155e-09, + "loss": 0.1286, + "step": 21058 + }, + { + "epoch": 2.9735950296526403, + "grad_norm": 2.878492829553914, + "learning_rate": 4.063047129096998e-09, + "loss": 0.1165, + "step": 21059 + }, + { + "epoch": 2.973736232702626, + "grad_norm": 4.188067370799378, + "learning_rate": 4.019711173289809e-09, + "loss": 0.2055, + "step": 21060 + }, + { + "epoch": 2.973877435752612, + "grad_norm": 3.5985988997527913, + "learning_rate": 3.976607519746712e-09, + "loss": 0.1289, + "step": 21061 + }, + { + "epoch": 2.974018638802598, + "grad_norm": 3.7009330604687656, + "learning_rate": 3.933736169471347e-09, + "loss": 0.117, + "step": 21062 + }, + { + "epoch": 2.974159841852584, + "grad_norm": 2.2304069444430707, + "learning_rate": 3.891097123458476e-09, + "loss": 0.0897, + "step": 21063 + }, + { + "epoch": 2.9743010449025697, + "grad_norm": 4.053806122203301, + "learning_rate": 3.8486903826995266e-09, + "loss": 0.155, + "step": 21064 + }, + { + "epoch": 2.9744422479525556, + "grad_norm": 3.3111002416821447, + "learning_rate": 3.806515948180378e-09, + "loss": 0.1769, + "step": 21065 + }, + { + "epoch": 2.9745834510025415, + "grad_norm": 3.2506154676597427, + "learning_rate": 3.764573820880246e-09, + "loss": 0.1625, + "step": 21066 + }, + { + "epoch": 2.9747246540525274, + "grad_norm": 2.5605092057957872, + "learning_rate": 3.7228640017750172e-09, + "loss": 0.0988, + "step": 21067 + }, + { + "epoch": 2.9748658571025133, + "grad_norm": 2.888765786394192, + "learning_rate": 3.6813864918328057e-09, + "loss": 0.1058, + "step": 21068 + }, + { + "epoch": 2.975007060152499, + "grad_norm": 3.32867913699476, + "learning_rate": 3.6401412920183955e-09, + "loss": 0.1416, + "step": 21069 + }, + { + "epoch": 2.975148263202485, + "grad_norm": 3.118885428620935, + "learning_rate": 3.5991284032899087e-09, + "loss": 0.1346, + "step": 21070 + }, + { + "epoch": 2.975289466252471, + "grad_norm": 3.1699936544855154, + "learning_rate": 3.558347826599917e-09, + "loss": 0.1228, + "step": 21071 + }, + { + "epoch": 2.975430669302457, + "grad_norm": 3.1865535699609984, + "learning_rate": 3.5177995628976613e-09, + "loss": 0.1215, + "step": 21072 + }, + { + "epoch": 2.9755718723524427, + "grad_norm": 3.105781313668679, + "learning_rate": 3.4774836131246103e-09, + "loss": 0.1481, + "step": 21073 + }, + { + "epoch": 2.9757130754024286, + "grad_norm": 3.4706337869050174, + "learning_rate": 3.437399978216682e-09, + "loss": 0.1798, + "step": 21074 + }, + { + "epoch": 2.9758542784524145, + "grad_norm": 2.740235095286872, + "learning_rate": 3.3975486591075746e-09, + "loss": 0.112, + "step": 21075 + }, + { + "epoch": 2.9759954815024003, + "grad_norm": 3.1589986946627198, + "learning_rate": 3.357929656722103e-09, + "loss": 0.1235, + "step": 21076 + }, + { + "epoch": 2.9761366845523862, + "grad_norm": 2.9260076655295553, + "learning_rate": 3.318542971980643e-09, + "loss": 0.1068, + "step": 21077 + }, + { + "epoch": 2.976277887602372, + "grad_norm": 3.0146099607281975, + "learning_rate": 3.2793886057991277e-09, + "loss": 0.1215, + "step": 21078 + }, + { + "epoch": 2.976419090652358, + "grad_norm": 2.5100639859106417, + "learning_rate": 3.24046655908683e-09, + "loss": 0.1068, + "step": 21079 + }, + { + "epoch": 2.976560293702344, + "grad_norm": 2.6233960554489646, + "learning_rate": 3.201776832749692e-09, + "loss": 0.0988, + "step": 21080 + }, + { + "epoch": 2.9767014967523298, + "grad_norm": 2.8405040234156966, + "learning_rate": 3.163319427685885e-09, + "loss": 0.094, + "step": 21081 + }, + { + "epoch": 2.9768426998023156, + "grad_norm": 3.127971042032498, + "learning_rate": 3.125094344789137e-09, + "loss": 0.1662, + "step": 21082 + }, + { + "epoch": 2.9769839028523015, + "grad_norm": 2.72878225235653, + "learning_rate": 3.0871015849476272e-09, + "loss": 0.0945, + "step": 21083 + }, + { + "epoch": 2.9771251059022874, + "grad_norm": 2.224175064313002, + "learning_rate": 3.049341149045093e-09, + "loss": 0.0856, + "step": 21084 + }, + { + "epoch": 2.9772663089522733, + "grad_norm": 3.0064869120432043, + "learning_rate": 3.0118130379575005e-09, + "loss": 0.1234, + "step": 21085 + }, + { + "epoch": 2.977407512002259, + "grad_norm": 3.1951170089734093, + "learning_rate": 2.974517252558595e-09, + "loss": 0.1209, + "step": 21086 + }, + { + "epoch": 2.977548715052245, + "grad_norm": 3.4891742898341462, + "learning_rate": 2.937453793714351e-09, + "loss": 0.1711, + "step": 21087 + }, + { + "epoch": 2.977689918102231, + "grad_norm": 3.048334834554927, + "learning_rate": 2.9006226622874114e-09, + "loss": 0.1423, + "step": 21088 + }, + { + "epoch": 2.977831121152217, + "grad_norm": 3.4221071258428037, + "learning_rate": 2.8640238591315373e-09, + "loss": 0.1402, + "step": 21089 + }, + { + "epoch": 2.9779723242022027, + "grad_norm": 2.3014685420301157, + "learning_rate": 2.8276573850982703e-09, + "loss": 0.1177, + "step": 21090 + }, + { + "epoch": 2.9781135272521886, + "grad_norm": 5.052485989789503, + "learning_rate": 2.79152324103249e-09, + "loss": 0.1685, + "step": 21091 + }, + { + "epoch": 2.9782547303021745, + "grad_norm": 4.052973782969394, + "learning_rate": 2.755621427774635e-09, + "loss": 0.1787, + "step": 21092 + }, + { + "epoch": 2.9783959333521604, + "grad_norm": 2.6202924982325873, + "learning_rate": 2.7199519461595937e-09, + "loss": 0.0968, + "step": 21093 + }, + { + "epoch": 2.9785371364021462, + "grad_norm": 2.8629976831012844, + "learning_rate": 2.6845147970144816e-09, + "loss": 0.1305, + "step": 21094 + }, + { + "epoch": 2.978678339452132, + "grad_norm": 3.6665539702549275, + "learning_rate": 2.649309981163084e-09, + "loss": 0.1674, + "step": 21095 + }, + { + "epoch": 2.978819542502118, + "grad_norm": 3.8962937354647647, + "learning_rate": 2.6143374994247463e-09, + "loss": 0.1655, + "step": 21096 + }, + { + "epoch": 2.978960745552104, + "grad_norm": 2.831690331922944, + "learning_rate": 2.579597352612151e-09, + "loss": 0.1089, + "step": 21097 + }, + { + "epoch": 2.9791019486020898, + "grad_norm": 2.9452148502652142, + "learning_rate": 2.5450895415324306e-09, + "loss": 0.1272, + "step": 21098 + }, + { + "epoch": 2.9792431516520756, + "grad_norm": 3.0433848312705774, + "learning_rate": 2.510814066986056e-09, + "loss": 0.1315, + "step": 21099 + }, + { + "epoch": 2.9793843547020615, + "grad_norm": 3.5255456367200573, + "learning_rate": 2.4767709297712772e-09, + "loss": 0.1774, + "step": 21100 + }, + { + "epoch": 2.9795255577520474, + "grad_norm": 3.436782858784968, + "learning_rate": 2.4429601306785734e-09, + "loss": 0.1577, + "step": 21101 + }, + { + "epoch": 2.9796667608020333, + "grad_norm": 3.0814672461288004, + "learning_rate": 2.4093816704950924e-09, + "loss": 0.1356, + "step": 21102 + }, + { + "epoch": 2.979807963852019, + "grad_norm": 4.383456488385386, + "learning_rate": 2.37603554999799e-09, + "loss": 0.1895, + "step": 21103 + }, + { + "epoch": 2.979949166902005, + "grad_norm": 3.409497772401768, + "learning_rate": 2.3429217699655337e-09, + "loss": 0.1448, + "step": 21104 + }, + { + "epoch": 2.980090369951991, + "grad_norm": 3.521574078287625, + "learning_rate": 2.3100403311659967e-09, + "loss": 0.1691, + "step": 21105 + }, + { + "epoch": 2.980231573001977, + "grad_norm": 3.495531140943913, + "learning_rate": 2.277391234363213e-09, + "loss": 0.1715, + "step": 21106 + }, + { + "epoch": 2.9803727760519627, + "grad_norm": 3.6714688749552957, + "learning_rate": 2.244974480315465e-09, + "loss": 0.1517, + "step": 21107 + }, + { + "epoch": 2.9805139791019486, + "grad_norm": 3.338478865874934, + "learning_rate": 2.2127900697777038e-09, + "loss": 0.1509, + "step": 21108 + }, + { + "epoch": 2.9806551821519345, + "grad_norm": 3.6371271216189123, + "learning_rate": 2.1808380034959997e-09, + "loss": 0.1448, + "step": 21109 + }, + { + "epoch": 2.9807963852019204, + "grad_norm": 4.227345400460378, + "learning_rate": 2.1491182822153124e-09, + "loss": 0.1784, + "step": 21110 + }, + { + "epoch": 2.9809375882519062, + "grad_norm": 3.397280636148819, + "learning_rate": 2.117630906670609e-09, + "loss": 0.1531, + "step": 21111 + }, + { + "epoch": 2.981078791301892, + "grad_norm": 2.5294953311501436, + "learning_rate": 2.086375877593527e-09, + "loss": 0.1153, + "step": 21112 + }, + { + "epoch": 2.981219994351878, + "grad_norm": 3.054876120908973, + "learning_rate": 2.0553531957123727e-09, + "loss": 0.1289, + "step": 21113 + }, + { + "epoch": 2.981361197401864, + "grad_norm": 3.078202445487692, + "learning_rate": 2.0245628617465706e-09, + "loss": 0.1338, + "step": 21114 + }, + { + "epoch": 2.9815024004518498, + "grad_norm": 2.1669573568182887, + "learning_rate": 1.9940048764122143e-09, + "loss": 0.0836, + "step": 21115 + }, + { + "epoch": 2.9816436035018357, + "grad_norm": 3.7449990755583586, + "learning_rate": 1.963679240419847e-09, + "loss": 0.1636, + "step": 21116 + }, + { + "epoch": 2.9817848065518215, + "grad_norm": 2.3625193269504403, + "learning_rate": 1.9335859544733494e-09, + "loss": 0.0925, + "step": 21117 + }, + { + "epoch": 2.9819260096018074, + "grad_norm": 3.0549409637560037, + "learning_rate": 1.9037250192732728e-09, + "loss": 0.118, + "step": 21118 + }, + { + "epoch": 2.9820672126517933, + "grad_norm": 3.1049678219054218, + "learning_rate": 1.8740964355112856e-09, + "loss": 0.1621, + "step": 21119 + }, + { + "epoch": 2.982208415701779, + "grad_norm": 3.7924889764368954, + "learning_rate": 1.8447002038779471e-09, + "loss": 0.182, + "step": 21120 + }, + { + "epoch": 2.982349618751765, + "grad_norm": 2.897342613312911, + "learning_rate": 1.8155363250571544e-09, + "loss": 0.1326, + "step": 21121 + }, + { + "epoch": 2.982490821801751, + "grad_norm": 3.3585873024921993, + "learning_rate": 1.7866047997239234e-09, + "loss": 0.1578, + "step": 21122 + }, + { + "epoch": 2.982632024851737, + "grad_norm": 3.120244303035706, + "learning_rate": 1.757905628552159e-09, + "loss": 0.1313, + "step": 21123 + }, + { + "epoch": 2.9827732279017227, + "grad_norm": 2.772743037595081, + "learning_rate": 1.7294388122102157e-09, + "loss": 0.1247, + "step": 21124 + }, + { + "epoch": 2.9829144309517086, + "grad_norm": 3.054165423606782, + "learning_rate": 1.7012043513564559e-09, + "loss": 0.1221, + "step": 21125 + }, + { + "epoch": 2.9830556340016945, + "grad_norm": 2.9533676475735366, + "learning_rate": 1.6732022466503516e-09, + "loss": 0.1306, + "step": 21126 + }, + { + "epoch": 2.9831968370516804, + "grad_norm": 3.1956215631648517, + "learning_rate": 1.6454324987391635e-09, + "loss": 0.1428, + "step": 21127 + }, + { + "epoch": 2.9833380401016663, + "grad_norm": 2.622750148317922, + "learning_rate": 1.6178951082712613e-09, + "loss": 0.1104, + "step": 21128 + }, + { + "epoch": 2.983479243151652, + "grad_norm": 3.4886743088415524, + "learning_rate": 1.5905900758861336e-09, + "loss": 0.1751, + "step": 21129 + }, + { + "epoch": 2.983620446201638, + "grad_norm": 2.8352584820410742, + "learning_rate": 1.5635174022166077e-09, + "loss": 0.1304, + "step": 21130 + }, + { + "epoch": 2.983761649251624, + "grad_norm": 3.0115357331675927, + "learning_rate": 1.5366770878932903e-09, + "loss": 0.1377, + "step": 21131 + }, + { + "epoch": 2.98390285230161, + "grad_norm": 2.5115279881909482, + "learning_rate": 1.5100691335401262e-09, + "loss": 0.1036, + "step": 21132 + }, + { + "epoch": 2.9840440553515957, + "grad_norm": 3.0314406693483025, + "learning_rate": 1.4836935397744e-09, + "loss": 0.0977, + "step": 21133 + }, + { + "epoch": 2.9841852584015816, + "grad_norm": 3.652863479113, + "learning_rate": 1.4575503072100649e-09, + "loss": 0.1233, + "step": 21134 + }, + { + "epoch": 2.9843264614515674, + "grad_norm": 3.3373424720308718, + "learning_rate": 1.4316394364533027e-09, + "loss": 0.1312, + "step": 21135 + }, + { + "epoch": 2.9844676645015533, + "grad_norm": 3.304181899976067, + "learning_rate": 1.4059609281080743e-09, + "loss": 0.1677, + "step": 21136 + }, + { + "epoch": 2.984608867551539, + "grad_norm": 2.9154801031986555, + "learning_rate": 1.3805147827694599e-09, + "loss": 0.1271, + "step": 21137 + }, + { + "epoch": 2.984750070601525, + "grad_norm": 3.2550673438798507, + "learning_rate": 1.355301001030318e-09, + "loss": 0.1319, + "step": 21138 + }, + { + "epoch": 2.984891273651511, + "grad_norm": 2.849257536520862, + "learning_rate": 1.3303195834757366e-09, + "loss": 0.1204, + "step": 21139 + }, + { + "epoch": 2.985032476701497, + "grad_norm": 3.238168205683666, + "learning_rate": 1.305570530686362e-09, + "loss": 0.1294, + "step": 21140 + }, + { + "epoch": 2.9851736797514827, + "grad_norm": 2.832533835269369, + "learning_rate": 1.2810538432372898e-09, + "loss": 0.1378, + "step": 21141 + }, + { + "epoch": 2.9853148828014686, + "grad_norm": 3.540595304676716, + "learning_rate": 1.256769521699175e-09, + "loss": 0.1601, + "step": 21142 + }, + { + "epoch": 2.9854560858514545, + "grad_norm": 2.6431349966950606, + "learning_rate": 1.2327175666349e-09, + "loss": 0.0831, + "step": 21143 + }, + { + "epoch": 2.9855972889014404, + "grad_norm": 3.88829567403951, + "learning_rate": 1.2088979786040179e-09, + "loss": 0.1712, + "step": 21144 + }, + { + "epoch": 2.9857384919514263, + "grad_norm": 3.4441098148055107, + "learning_rate": 1.1853107581605294e-09, + "loss": 0.1599, + "step": 21145 + }, + { + "epoch": 2.985879695001412, + "grad_norm": 2.8017997825614303, + "learning_rate": 1.1619559058517748e-09, + "loss": 0.1425, + "step": 21146 + }, + { + "epoch": 2.986020898051398, + "grad_norm": 4.011926025480318, + "learning_rate": 1.1388334222217633e-09, + "loss": 0.1702, + "step": 21147 + }, + { + "epoch": 2.986162101101384, + "grad_norm": 3.018036377814512, + "learning_rate": 1.1159433078067327e-09, + "loss": 0.1316, + "step": 21148 + }, + { + "epoch": 2.98630330415137, + "grad_norm": 3.216344570894812, + "learning_rate": 1.0932855631384797e-09, + "loss": 0.1772, + "step": 21149 + }, + { + "epoch": 2.9864445072013557, + "grad_norm": 2.722246572315768, + "learning_rate": 1.0708601887454706e-09, + "loss": 0.1259, + "step": 21150 + }, + { + "epoch": 2.9865857102513416, + "grad_norm": 3.7183543215382557, + "learning_rate": 1.0486671851461794e-09, + "loss": 0.1375, + "step": 21151 + }, + { + "epoch": 2.9867269133013274, + "grad_norm": 2.870385681131172, + "learning_rate": 1.02670655285797e-09, + "loss": 0.1505, + "step": 21152 + }, + { + "epoch": 2.9868681163513133, + "grad_norm": 3.3498521284143203, + "learning_rate": 1.004978292390435e-09, + "loss": 0.1224, + "step": 21153 + }, + { + "epoch": 2.987009319401299, + "grad_norm": 3.217192814941655, + "learning_rate": 9.83482404249836e-10, + "loss": 0.1341, + "step": 21154 + }, + { + "epoch": 2.987150522451285, + "grad_norm": 3.125034627936791, + "learning_rate": 9.62218888934663e-10, + "loss": 0.1407, + "step": 21155 + }, + { + "epoch": 2.987291725501271, + "grad_norm": 2.759093585643996, + "learning_rate": 9.411877469389652e-10, + "loss": 0.1187, + "step": 21156 + }, + { + "epoch": 2.987432928551257, + "grad_norm": 3.3029977592846644, + "learning_rate": 9.20388978751241e-10, + "loss": 0.1506, + "step": 21157 + }, + { + "epoch": 2.9875741316012427, + "grad_norm": 2.944636840080118, + "learning_rate": 8.998225848566577e-10, + "loss": 0.162, + "step": 21158 + }, + { + "epoch": 2.9877153346512286, + "grad_norm": 2.9440498603984704, + "learning_rate": 8.794885657315011e-10, + "loss": 0.1241, + "step": 21159 + }, + { + "epoch": 2.9878565377012145, + "grad_norm": 3.358942362266349, + "learning_rate": 8.593869218487261e-10, + "loss": 0.1742, + "step": 21160 + }, + { + "epoch": 2.9879977407512, + "grad_norm": 3.592256949866937, + "learning_rate": 8.395176536746263e-10, + "loss": 0.1676, + "step": 21161 + }, + { + "epoch": 2.988138943801186, + "grad_norm": 3.363488430018918, + "learning_rate": 8.198807616732752e-10, + "loss": 0.1312, + "step": 21162 + }, + { + "epoch": 2.9882801468511717, + "grad_norm": 3.9130253572060956, + "learning_rate": 8.004762462987536e-10, + "loss": 0.1834, + "step": 21163 + }, + { + "epoch": 2.9884213499011576, + "grad_norm": 2.754768880174701, + "learning_rate": 7.813041080029226e-10, + "loss": 0.1216, + "step": 21164 + }, + { + "epoch": 2.9885625529511435, + "grad_norm": 3.4143643785401747, + "learning_rate": 7.623643472309817e-10, + "loss": 0.1476, + "step": 21165 + }, + { + "epoch": 2.9887037560011294, + "grad_norm": 3.545533543053335, + "learning_rate": 7.43656964423689e-10, + "loss": 0.1363, + "step": 21166 + }, + { + "epoch": 2.9888449590511152, + "grad_norm": 2.739064658654357, + "learning_rate": 7.251819600151422e-10, + "loss": 0.1542, + "step": 21167 + }, + { + "epoch": 2.988986162101101, + "grad_norm": 2.839532311737879, + "learning_rate": 7.069393344361075e-10, + "loss": 0.1165, + "step": 21168 + }, + { + "epoch": 2.989127365151087, + "grad_norm": 3.527188385730754, + "learning_rate": 6.889290881084699e-10, + "loss": 0.1761, + "step": 21169 + }, + { + "epoch": 2.989268568201073, + "grad_norm": 2.348909904801069, + "learning_rate": 6.711512214518934e-10, + "loss": 0.1035, + "step": 21170 + }, + { + "epoch": 2.989409771251059, + "grad_norm": 2.6624329810704435, + "learning_rate": 6.536057348793811e-10, + "loss": 0.1141, + "step": 21171 + }, + { + "epoch": 2.9895509743010447, + "grad_norm": 3.598588927650186, + "learning_rate": 6.36292628798385e-10, + "loss": 0.1782, + "step": 21172 + }, + { + "epoch": 2.9896921773510305, + "grad_norm": 2.9683948139247054, + "learning_rate": 6.19211903611916e-10, + "loss": 0.1433, + "step": 21173 + }, + { + "epoch": 2.9898333804010164, + "grad_norm": 3.2741962867233823, + "learning_rate": 6.023635597163236e-10, + "loss": 0.1528, + "step": 21174 + }, + { + "epoch": 2.9899745834510023, + "grad_norm": 3.9889610858261255, + "learning_rate": 5.857475975035165e-10, + "loss": 0.1567, + "step": 21175 + }, + { + "epoch": 2.990115786500988, + "grad_norm": 3.97499141797581, + "learning_rate": 5.693640173598525e-10, + "loss": 0.1725, + "step": 21176 + }, + { + "epoch": 2.990256989550974, + "grad_norm": 2.8821019858930574, + "learning_rate": 5.532128196650277e-10, + "loss": 0.1447, + "step": 21177 + }, + { + "epoch": 2.99039819260096, + "grad_norm": 3.347830996279111, + "learning_rate": 5.372940047965181e-10, + "loss": 0.1532, + "step": 21178 + }, + { + "epoch": 2.990539395650946, + "grad_norm": 3.218164190064572, + "learning_rate": 5.216075731218073e-10, + "loss": 0.1461, + "step": 21179 + }, + { + "epoch": 2.9906805987009317, + "grad_norm": 3.543413364851795, + "learning_rate": 5.061535250061589e-10, + "loss": 0.1603, + "step": 21180 + }, + { + "epoch": 2.9908218017509176, + "grad_norm": 3.4391258838684697, + "learning_rate": 4.90931860809285e-10, + "loss": 0.158, + "step": 21181 + }, + { + "epoch": 2.9909630048009035, + "grad_norm": 3.853763735772975, + "learning_rate": 4.759425808853468e-10, + "loss": 0.1836, + "step": 21182 + }, + { + "epoch": 2.9911042078508894, + "grad_norm": 3.261740455148176, + "learning_rate": 4.6118568558184416e-10, + "loss": 0.1705, + "step": 21183 + }, + { + "epoch": 2.9912454109008753, + "grad_norm": 2.524731214061247, + "learning_rate": 4.466611752418359e-10, + "loss": 0.1262, + "step": 21184 + }, + { + "epoch": 2.991386613950861, + "grad_norm": 2.961045452925279, + "learning_rate": 4.323690502017197e-10, + "loss": 0.1162, + "step": 21185 + }, + { + "epoch": 2.991527817000847, + "grad_norm": 3.608405115846408, + "learning_rate": 4.183093107967828e-10, + "loss": 0.1989, + "step": 21186 + }, + { + "epoch": 2.991669020050833, + "grad_norm": 3.6698333364642752, + "learning_rate": 4.044819573501002e-10, + "loss": 0.1596, + "step": 21187 + }, + { + "epoch": 2.991810223100819, + "grad_norm": 2.5951753431454163, + "learning_rate": 3.9088699018585695e-10, + "loss": 0.1159, + "step": 21188 + }, + { + "epoch": 2.9919514261508047, + "grad_norm": 2.7999160175995588, + "learning_rate": 3.7752440961935646e-10, + "loss": 0.1189, + "step": 21189 + }, + { + "epoch": 2.9920926292007906, + "grad_norm": 3.2784412296945376, + "learning_rate": 3.6439421595924065e-10, + "loss": 0.1602, + "step": 21190 + }, + { + "epoch": 2.9922338322507764, + "grad_norm": 2.8073575839091536, + "learning_rate": 3.514964095130413e-10, + "loss": 0.1105, + "step": 21191 + }, + { + "epoch": 2.9923750353007623, + "grad_norm": 3.1496331674521416, + "learning_rate": 3.388309905794085e-10, + "loss": 0.1398, + "step": 21192 + }, + { + "epoch": 2.992516238350748, + "grad_norm": 2.886957132950518, + "learning_rate": 3.263979594525513e-10, + "loss": 0.1288, + "step": 21193 + }, + { + "epoch": 2.992657441400734, + "grad_norm": 3.2426044611686424, + "learning_rate": 3.1419731642223785e-10, + "loss": 0.1463, + "step": 21194 + }, + { + "epoch": 2.99279864445072, + "grad_norm": 3.4230329906680272, + "learning_rate": 3.022290617715751e-10, + "loss": 0.1411, + "step": 21195 + }, + { + "epoch": 2.992939847500706, + "grad_norm": 3.9497303817317397, + "learning_rate": 2.9049319577811873e-10, + "loss": 0.1464, + "step": 21196 + }, + { + "epoch": 2.9930810505506917, + "grad_norm": 2.872521890581678, + "learning_rate": 2.789897187149837e-10, + "loss": 0.1363, + "step": 21197 + }, + { + "epoch": 2.9932222536006776, + "grad_norm": 3.342145491270167, + "learning_rate": 2.677186308497337e-10, + "loss": 0.106, + "step": 21198 + }, + { + "epoch": 2.9933634566506635, + "grad_norm": 2.8513249741824014, + "learning_rate": 2.5667993244327117e-10, + "loss": 0.1162, + "step": 21199 + }, + { + "epoch": 2.9935046597006494, + "grad_norm": 2.4461003914573967, + "learning_rate": 2.45873623754278e-10, + "loss": 0.1045, + "step": 21200 + }, + { + "epoch": 2.9936458627506353, + "grad_norm": 2.6283048423563433, + "learning_rate": 2.352997050325545e-10, + "loss": 0.0858, + "step": 21201 + }, + { + "epoch": 2.993787065800621, + "grad_norm": 3.5032680108407592, + "learning_rate": 2.2495817652345987e-10, + "loss": 0.17, + "step": 21202 + }, + { + "epoch": 2.993928268850607, + "grad_norm": 2.8068712309916033, + "learning_rate": 2.1484903846791249e-10, + "loss": 0.1174, + "step": 21203 + }, + { + "epoch": 2.994069471900593, + "grad_norm": 3.1647878159157123, + "learning_rate": 2.0497229110016948e-10, + "loss": 0.1225, + "step": 21204 + }, + { + "epoch": 2.994210674950579, + "grad_norm": 3.6600308908665533, + "learning_rate": 1.9532793465115717e-10, + "loss": 0.1748, + "step": 21205 + }, + { + "epoch": 2.9943518780005647, + "grad_norm": 4.068971957180614, + "learning_rate": 1.8591596934292022e-10, + "loss": 0.1282, + "step": 21206 + }, + { + "epoch": 2.9944930810505506, + "grad_norm": 3.4939923260106984, + "learning_rate": 1.7673639539639298e-10, + "loss": 0.1379, + "step": 21207 + }, + { + "epoch": 2.9946342841005364, + "grad_norm": 2.662586259248584, + "learning_rate": 1.6778921302362805e-10, + "loss": 0.1135, + "step": 21208 + }, + { + "epoch": 2.9947754871505223, + "grad_norm": 2.7633733998962624, + "learning_rate": 1.5907442243334737e-10, + "loss": 0.1476, + "step": 21209 + }, + { + "epoch": 2.994916690200508, + "grad_norm": 2.6972341571498624, + "learning_rate": 1.505920238265013e-10, + "loss": 0.115, + "step": 21210 + }, + { + "epoch": 2.995057893250494, + "grad_norm": 4.002315734873148, + "learning_rate": 1.4234201740292996e-10, + "loss": 0.141, + "step": 21211 + }, + { + "epoch": 2.99519909630048, + "grad_norm": 2.7485028377414737, + "learning_rate": 1.343244033513713e-10, + "loss": 0.1146, + "step": 21212 + }, + { + "epoch": 2.995340299350466, + "grad_norm": 3.6629749286156157, + "learning_rate": 1.26539181859453e-10, + "loss": 0.1772, + "step": 21213 + }, + { + "epoch": 2.9954815024004517, + "grad_norm": 3.8394837439769156, + "learning_rate": 1.1898635310925167e-10, + "loss": 0.1848, + "step": 21214 + }, + { + "epoch": 2.9956227054504376, + "grad_norm": 2.9268400303010154, + "learning_rate": 1.1166591727396203e-10, + "loss": 0.1246, + "step": 21215 + }, + { + "epoch": 2.9957639085004235, + "grad_norm": 2.983780546292802, + "learning_rate": 1.045778745256687e-10, + "loss": 0.1369, + "step": 21216 + }, + { + "epoch": 2.9959051115504094, + "grad_norm": 2.863031170850118, + "learning_rate": 9.772222502868467e-11, + "loss": 0.1428, + "step": 21217 + }, + { + "epoch": 2.9960463146003953, + "grad_norm": 3.441383046772891, + "learning_rate": 9.109896894066161e-11, + "loss": 0.1689, + "step": 21218 + }, + { + "epoch": 2.996187517650381, + "grad_norm": 3.614117090739518, + "learning_rate": 8.470810641814098e-11, + "loss": 0.171, + "step": 21219 + }, + { + "epoch": 2.996328720700367, + "grad_norm": 3.3628339118364683, + "learning_rate": 7.85496376076722e-11, + "loss": 0.1397, + "step": 21220 + }, + { + "epoch": 2.996469923750353, + "grad_norm": 3.498444297002625, + "learning_rate": 7.262356265358428e-11, + "loss": 0.1429, + "step": 21221 + }, + { + "epoch": 2.996611126800339, + "grad_norm": 2.7747380806840063, + "learning_rate": 6.692988169243463e-11, + "loss": 0.1371, + "step": 21222 + }, + { + "epoch": 2.9967523298503247, + "grad_norm": 3.5198455762033953, + "learning_rate": 6.146859485745004e-11, + "loss": 0.1575, + "step": 21223 + }, + { + "epoch": 2.9968935329003106, + "grad_norm": 3.8969432157436565, + "learning_rate": 5.623970227630615e-11, + "loss": 0.1183, + "step": 21224 + }, + { + "epoch": 2.9970347359502965, + "grad_norm": 2.7465664965288337, + "learning_rate": 5.124320406890704e-11, + "loss": 0.1289, + "step": 21225 + }, + { + "epoch": 2.9971759390002823, + "grad_norm": 3.7654282928742804, + "learning_rate": 4.64791003507159e-11, + "loss": 0.1436, + "step": 21226 + }, + { + "epoch": 2.9973171420502682, + "grad_norm": 2.8271615052594785, + "learning_rate": 4.1947391234975485e-11, + "loss": 0.1164, + "step": 21227 + }, + { + "epoch": 2.997458345100254, + "grad_norm": 3.694541271159491, + "learning_rate": 3.764807682604676e-11, + "loss": 0.156, + "step": 21228 + }, + { + "epoch": 2.99759954815024, + "grad_norm": 3.0891591260523046, + "learning_rate": 3.358115722273958e-11, + "loss": 0.1155, + "step": 21229 + }, + { + "epoch": 2.997740751200226, + "grad_norm": 3.5969610125661844, + "learning_rate": 2.9746632520533116e-11, + "loss": 0.1573, + "step": 21230 + }, + { + "epoch": 2.9978819542502118, + "grad_norm": 3.635837060906462, + "learning_rate": 2.6144502808245208e-11, + "loss": 0.1391, + "step": 21231 + }, + { + "epoch": 2.9980231573001976, + "grad_norm": 3.02853436563727, + "learning_rate": 2.2774768170252813e-11, + "loss": 0.1228, + "step": 21232 + }, + { + "epoch": 2.9981643603501835, + "grad_norm": 3.281446540367409, + "learning_rate": 1.963742868316132e-11, + "loss": 0.1561, + "step": 21233 + }, + { + "epoch": 2.9983055634001694, + "grad_norm": 2.7518352719091537, + "learning_rate": 1.673248442246589e-11, + "loss": 0.1302, + "step": 21234 + }, + { + "epoch": 2.9984467664501553, + "grad_norm": 3.114354127384235, + "learning_rate": 1.405993545255946e-11, + "loss": 0.1409, + "step": 21235 + }, + { + "epoch": 2.998587969500141, + "grad_norm": 2.9323082282173982, + "learning_rate": 1.1619781838945188e-11, + "loss": 0.1485, + "step": 21236 + }, + { + "epoch": 2.998729172550127, + "grad_norm": 3.2013813218348774, + "learning_rate": 9.412023636024003e-12, + "loss": 0.1403, + "step": 21237 + }, + { + "epoch": 2.998870375600113, + "grad_norm": 2.954438412189926, + "learning_rate": 7.436660894866165e-12, + "loss": 0.1135, + "step": 21238 + }, + { + "epoch": 2.999011578650099, + "grad_norm": 3.3089000963642805, + "learning_rate": 5.693693663211264e-12, + "loss": 0.1359, + "step": 21239 + }, + { + "epoch": 2.9991527817000847, + "grad_norm": 3.0678087293266443, + "learning_rate": 4.1831219799171044e-12, + "loss": 0.1393, + "step": 21240 + }, + { + "epoch": 2.9992939847500706, + "grad_norm": 2.3132531390417244, + "learning_rate": 2.904945881621046e-12, + "loss": 0.1006, + "step": 21241 + }, + { + "epoch": 2.9994351878000565, + "grad_norm": 2.7932270418459773, + "learning_rate": 1.859165397188889e-12, + "loss": 0.1659, + "step": 21242 + }, + { + "epoch": 2.9995763908500424, + "grad_norm": 2.857614043883779, + "learning_rate": 1.0457805499353157e-12, + "loss": 0.1342, + "step": 21243 + }, + { + "epoch": 2.9997175939000282, + "grad_norm": 2.8310409274034054, + "learning_rate": 4.647913598443409e-13, + "loss": 0.0953, + "step": 21244 + }, + { + "epoch": 2.999858796950014, + "grad_norm": 3.174553736792835, + "learning_rate": 1.16197840238641e-13, + "loss": 0.1432, + "step": 21245 + }, + { + "epoch": 3.0, + "grad_norm": 2.2926056070486545, + "learning_rate": 0.0, + "loss": 0.0818, + "step": 21246 + }, { "epoch": 3.0, - "step": 14307, - "total_flos": 5.786700751120105e+18, - "train_loss": 1.0319068685825985, - "train_runtime": 97307.4105, - "train_samples_per_second": 9.41, - "train_steps_per_second": 0.147 + "step": 21246, + "total_flos": 190632695685120.0, + "train_loss": 0.5977588871135013, + "train_runtime": 93801.157, + "train_samples_per_second": 1.812, + "train_steps_per_second": 0.227 } ], "logging_steps": 1.0, - "max_steps": 14307, + "max_steps": 21246, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, @@ -100184,8 +148757,8 @@ "attributes": {} } }, - "total_flos": 5.786700751120105e+18, - "train_batch_size": 16, + "total_flos": 190632695685120.0, + "train_batch_size": 2, "trial_name": null, "trial_params": null }